# Imports

In [2]:
import csv
import numpy
import typing

# Data

In [3]:
T = typing.TypeVar("T")
def read_data(
    path: str,
    converter: typing.Callable[[str], T],
) -> numpy.ndarray:
    with open(path) as f:
        data = list(
            list(converter(value) for value in line.split())
            for line in f
        )
        return numpy.array(data, dtype=converter)
        
binary_predicted = read_data("./data/git/probabilities-binary.txt", float)
multi_class_predicted = read_data("./data/git/probabilities-multi-class.txt", float)
multi_target_predicted = read_data("./data/git/probabilities-multi-target.txt", float)

binary_true = read_data("./data/git/true-binary.txt", int)
multi_class_true = read_data("./data/git/true-multi-class.txt", int)
multi_target_true = read_data("./data/git/true-multi-target.txt", int)

with open("./data/lab/flowers.csv") as f:
    reader = csv.reader(f)
    _ = next(reader)
    flower_to_int = {
        "Daisy": 0,
        "Tulip": 1,
        "Rose": 2,
    }
    flowers = list(reader)
    flowers_true = numpy.array(list(flower_to_int[true] for true, _ in flowers))
    flowers_predicted = numpy.array(list(flower_to_int[predicted] for _, predicted in flowers))

with open("./data/lab/sport.csv") as f:
    reader = csv.reader(f)
    _ = next(reader)
    sport = list(reader)
    sport_weight_true      = numpy.array(list(int(row[0]) for row in sport), dtype=int)
    sport_waist_true       = numpy.array(list(int(row[1]) for row in sport), dtype=int)
    sport_pulse_true       = numpy.array(list(int(row[2]) for row in sport), dtype=int)
    sport_weight_predicted = numpy.array(list(int(row[3]) for row in sport), dtype=int)
    sport_waist_predicted  = numpy.array(list(int(row[4]) for row in sport), dtype=int)
    sport_pulse_predicted  = numpy.array(list(int(row[5]) for row in sport), dtype=int)

# Solution

## Determinarea erorii de predictie - regresie multi-target

In [4]:
def mean_absolute_error(predicted: numpy.ndarray, true: numpy.ndarray):
    return numpy.average(numpy.abs(predicted - true))

def root_mean_square_error(predicted: numpy.ndarray, true: numpy.ndarray):
    return numpy.sqrt(numpy.average((predicted - true)**2))

In [5]:
print(
    root_mean_square_error(
        sport_weight_predicted,
        sport_weight_true))

print(
    root_mean_square_error(
        sport_waist_predicted,
        sport_waist_true))

print(
    root_mean_square_error(
        sport_pulse_predicted,
        sport_pulse_true))

5.953990258641679
1.5652475842498528
2.3345235059857505


## Determinarea acurateții, preciziei, rapelului - clasificare multi-class

In [6]:
class ConfusionMatrix:
    def __init__(self, tp: int, fp: int, tn: int, fn: int):
        self.tp = tp
        self.fp = fp
        self.tn = tn
        self.fn = fn
    
    @property
    def accuracy(self) -> float:
        return (self.tp+self.tn) / (self.tp+self.tn+self.fp+self.fn)
    
    @property
    def precision(self) -> float:
        return self.tp / (self.tp+self.fp)
    
    @property
    def recall(self) -> float:
        return self.tp / (self.tp+self.fn)

def make_confusion_matrix(
    predicted: numpy.ndarray,
    true: numpy.ndarray,
    is_positive: typing.Callable[[numpy.ndarray], numpy.ndarray],
) -> ConfusionMatrix:
    tp = numpy.sum( is_positive(true[ is_positive(predicted)]))
    fp = numpy.sum(~is_positive(true[ is_positive(predicted)]))
    tn = numpy.sum(~is_positive(true[~is_positive(predicted)]))
    fn = numpy.sum( is_positive(true[~is_positive(predicted)]))
    
    return ConfusionMatrix(tp, fp, tn, fn)

In [7]:
confusion_matrix = make_confusion_matrix(
    flowers_predicted,
    flowers_true,
    lambda a: a == 0,
)

print("acuratețe = ", confusion_matrix.accuracy)
print("precizie = ", confusion_matrix.precision)
print("rapel = ", confusion_matrix.recall)

confusion_matrix = make_confusion_matrix(
    flowers_predicted,
    flowers_true,
    lambda a: a == 1,
)

print("acuratețe = ", confusion_matrix.accuracy)
print("precizie = ", confusion_matrix.precision)
print("rapel = ", confusion_matrix.recall)

confusion_matrix = make_confusion_matrix(
    flowers_predicted,
    flowers_true,
    lambda a: a == 2,
)

print("acuratețe = ", confusion_matrix.accuracy)
print("precizie = ", confusion_matrix.precision)
print("rapel = ", confusion_matrix.recall)

acuratețe =  0.46153846153846156
precizie =  0.36363636363636365
rapel =  0.36363636363636365
acuratețe =  0.6153846153846154
precizie =  0.2857142857142857
rapel =  0.2857142857142857
acuratețe =  0.5384615384615384
precizie =  0.25
rapel =  0.25


## Determinarea loss-ului

### Regresie

$$
MSE=\frac{1}{n}\sum_{i=1}^n (\text{predicted}_i-\text{true}_i)^2
$$

In [None]:
def loss_mean_squared_error(
    predicted: numpy.ndarray,
    true: numpy.ndarray,
) -> float:
    return numpy.average((predicted - true) ** 2)

# print(loss_mean_squared_error())

### Clasificare binară

$$
BCE=-\frac{1}{n}\sum_{i=1}^n \text{true}_{1,i}\cdot\log\left(\text{predicted}_{1,i}\right)+\text{true}_{2,i}\cdot\log\left(\text{predicted}_{2,i}\right)
$$

In [77]:
def loss_binary_cross_entropy(
    predicted: numpy.ndarray,
    true: numpy.ndarray,
) -> float:
    return -numpy.average(
        numpy.multiply(
            true[:,0],
            numpy.log(predicted[:,0]),
        )
        + numpy.multiply(
            true[:,1],
            numpy.log(predicted[:,1]),
        ),
    )

print(loss_binary_cross_entropy(binary_predicted, binary_true))

0.6966520452952675


### Clasficare multi-class și multi-label

$$
CCE=-\frac{1}{n}\sum_{i=1}^n\sum_k \text{true}_{k,i}\cdot\log\left(\text{predicted}_{k,i}\right)
$$

In [82]:
def loss_categorical_cross_entropy(
    predicted: numpy.ndarray,
    true: numpy.ndarray,
) -> float:
    accumulator = numpy.zeros(predicted.shape[0])
    
    for i in range(predicted.shape[1]):
        accumulator += numpy.multiply(
            true[:,i],
            numpy.log(predicted[:,i]),
        )
    
    return -numpy.average(accumulator)

print(loss_categorical_cross_entropy(multi_class_predicted, multi_class_true))
print(loss_categorical_cross_entropy(multi_target_predicted, multi_target_true))

0.8983100982579407
1.3178916581432447
