# Clasificación

In [None]:
import numpy as np

In [None]:
y = np.random.binomial(1, 0.9, size=100)

# Accuracy

In [None]:
(y == (~y + 2)).mean()

In [None]:
(y == np.ones(100)).mean()

In [None]:
(y == np.zeros(100)).mean()

In [None]:
y = np.random.binomial(1, 0.99, size=1000)

In [None]:
(y == np.ones(1000)).mean()

# Recall

In [None]:
y = np.random.binomial(1, 0.6, size=100)

In [None]:
yh = y.copy()
yh[0] = 0
yh[3] = 1

In [None]:
m = y == 1

In [None]:
(yh[m] == 1).mean()

In [None]:
m = y == 1

In [None]:
(yh[m] == 0).mean()

# Precision

In [None]:
hy = np.zeros(100)

In [None]:
hy[0] = 1
hy[3] = 1

In [None]:
m = hy == 1

In [None]:
(y[m] == 1).mean()

# F1

In [None]:
from sklearn.metrics import f1_score, recall_score, precision_score

In [None]:
f1_score(y, hy)

In [None]:
(hy[m] == 1).mean()

In [None]:
2 * (0.5 * 0.016666666666666666 ) / (0.5 + 0.016666666666666666) 

In [None]:
f1_score(y, hy, average=None), recall_score(y, hy, average=None), precision_score(y, hy, average=None)

# Macro - *

In [None]:
f1_score(y, hy, average='macro')

In [None]:
recall_score(y, hy, average='macro')

In [None]:
y = np.random.binomial(1, 0.9, size=100)
hy = np.ones(100)

In [None]:
f1_score(y, hy, average='macro')

In [None]:
recall_score(y, hy, average='macro')

# Comparación

In [None]:
from sklearn.datasets import load_wine

In [None]:
X, y = load_wine(return_X_y=True)

In [None]:
m = np.random.binomial(1, 0.5, X.shape[0]).astype(np.bool)

In [None]:
Xtrain = X[m]
ytrain = y[m]
Xtest = X[~m]
ytest = y[~m]

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

In [None]:
nb = GaussianNB().fit(Xtrain, ytrain)
tree = DecisionTreeClassifier().fit(Xtrain, ytrain)

In [None]:
f1_score(ytrain, nb.predict(Xtrain), average=None), f1_score(ytrain, tree.predict(Xtrain),average=None)

In [None]:
f1_score(ytest, nb.predict(Xtest), average=None), f1_score(ytest, tree.predict(Xtest),average=None)

In [None]:
f1_score(ytest, nb.predict(Xtest), average='macro'), f1_score(ytest, tree.predict(Xtest),average='macro')

# Videos

* [Video 1](https://youtu.be/i7LiU_1O-Dk)
* [Video 2](https://youtu.be/atsoVRf7xOg)

# K-Fold Cross-validation

$(\mathcal X_1, \mathcal V_1), \ldots, (\mathcal X_K, \mathcal V_K)$

$\mathcal X = \mathcal V_1 \cup, \ldots, \cup \mathcal V_K$

$\mathcal V_i \cap \mathcal V_j = \emptyset \mid i \neq j$

In [None]:
from sklearn.model_selection import KFold

In [None]:
kf = KFold(n_splits=10, shuffle=True, random_state=0)

In [None]:
comparar = []
for tr, ts in kf.split(X):
    nb = GaussianNB().fit(X[tr], y[tr])
    score = f1_score(y[ts], nb.predict(X[ts]), average='macro')
    tree = DecisionTreeClassifier().fit(X[tr], y[tr])
    comparar.append((score, f1_score(y[ts], tree.predict(X[ts]), average='macro')))

In [None]:
np.mean(comparar, axis=0)

# Paired test

In [None]:
p = [(x - y) for x, y in comparar]

$\frac{\sqrt K \cdot m}{S} \approx t_{K-1}$

$(-t_{\frac{\alpha}{2}, K-1}, t_{\frac{\alpha}{2}, K-1})$

$t_{0.025, 9} = 2.26$

In [None]:
m = np.mean(p)
s = np.std(p)

In [None]:
(np.sqrt(10) * m ) / s

In [None]:
from sklearn.model_selection import StratifiedKFold

# Videos
* [Video 3](https://youtu.be/f5cfAHY4Fg4)
* [Video 4](https://youtu.be/kO5VYOCFj1g)