# Day 09. Exercise 02
# Metrics

## 0. Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tqdm
import joblib
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import label_binarize

## 1. Preprocessing

1. Create the same dataframe as in the previous exercise.
2. Using `train_test_split` with parameters `test_size=0.2`, `random_state=21` get `X_train`, `y_train`, `X_test`, `y_test`. Use the additional parameter `stratify`.

In [21]:
data = pd.read_csv('../data/day-of-week-not-scaled.csv')
data

Unnamed: 0,numTrials,hour,uid_user_0,uid_user_1,uid_user_10,uid_user_11,uid_user_12,uid_user_13,uid_user_14,uid_user_15,...,labname_lab02,labname_lab03,labname_lab03s,labname_lab05s,labname_laba04,labname_laba04s,labname_laba05,labname_laba06,labname_laba06s,labname_project1
0,1,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,3,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,5,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1681,9,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1682,6,20,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1683,7,20,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1684,8,20,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [22]:
weeks = pd.read_csv('../data/dayofweek.csv')
weeks = weeks['dayofweek']
weeks

0       4
1       4
2       4
3       4
4       4
       ..
1681    3
1682    3
1683    3
1684    3
1685    3
Name: dayofweek, Length: 1686, dtype: int64

In [23]:
X = data
y = weeks

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21, stratify=y)

## 2. SVM

1. Use the best parameters from the previous exercise and train the model of SVM.
2. You need to calculate `accuracy`, `precision`, `recall`, `ROC AUC`.

 - `precision` and `recall` should be calculated for each class (use `average='weighted'`)
 - `ROC AUC` should be calculated for each class against any other class (all possible pairwise combinations) and then weighted average should be applied for the final metric
 - the code in the cell should display the result as below:

```
accuracy is 0.88757
precision is 0.89267
recall is 0.88757
roc_auc is 0.97878
```

In [9]:
svc = SVC(C=10, class_weight=None, gamma='auto', kernel='rbf', random_state=21, probability=True)
svc.fit(X_train, y_train)

In [10]:
y_pred = svc.predict(X_test)
y_scores = svc.predict_proba(X_test)

In [11]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)


y_test_bin = label_binarize(y_test, classes=np.unique(y))
y_scores_bin = label_binarize(np.argmax(y_scores, axis=1), classes=np.unique(y))

roc_auc = []
for i in range(y_test_bin.shape[1]):
    if np.sum(y_test_bin[:, i]) > 0: 
        roc_auc.append(roc_auc_score(y_test_bin[:, i], y_scores_bin[:, i]))

if roc_auc: 
    weighted_roc_auc = np.average(roc_auc, weights=[np.sum(y_test_bin[:, i]) for i in range(y_test_bin.shape[1]) if np.sum(y_test_bin[:, i]) > 0])
else:
    weighted_roc_auc = 0.0  

print(f'accuracy is {accuracy:.5f}')
print(f'precision is {precision:.5f}')
print(f'recall is {recall:.5f}')
print(f'roc_auc is {weighted_roc_auc:.5f}')

accuracy is 0.88757
precision is 0.89267
recall is 0.88757
roc_auc is 0.93822


## 3. Decision tree

1. The same task for decision tree

In [12]:
tree = DecisionTreeClassifier(class_weight='balanced', criterion='entropy', max_depth=25, random_state=21)
tree.fit(X_train, y_train)

In [13]:
y_pred = tree.predict(X_test)
y_scores = tree.predict_proba(X_test)

In [14]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)


y_test_bin = label_binarize(y_test, classes=np.unique(y))
y_scores_bin = label_binarize(np.argmax(y_scores, axis=1), classes=np.unique(y))

roc_auc = roc_auc_score(y_test_bin, y_scores_bin)

print(f'accuracy is {accuracy:.5f}')
print(f'precision is {precision:.5f}')
print(f'recall is {recall:.5f}')
print(f'roc_auc is {roc_auc:.5f}')

accuracy is 0.90533
precision is 0.90667
recall is 0.90533
roc_auc is 0.93986


## 4. Random forest

1. The same task for random forest.

In [24]:
forest = RandomForestClassifier(class_weight='balanced', criterion='gini', max_depth=22, n_estimators=100, random_state=21)
forest.fit(X_train, y_train)

In [25]:
y_pred = forest.predict(X_test)
y_scores = forest.predict_proba(X_test)

In [26]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)


y_test_bin = label_binarize(y_test, classes=np.unique(y))
y_scores_bin = label_binarize(np.argmax(y_scores, axis=1), classes=np.unique(y))

roc_auc = roc_auc_score(y_test_bin, y_scores_bin)

print(f'accuracy is {accuracy:.5f}')
print(f'precision is {precision:.5f}')
print(f'recall is {recall:.5f}')
print(f'roc_auc is {roc_auc:.5f}')

accuracy is 0.93491
precision is 0.93565
recall is 0.93491
roc_auc is 0.94926


## 5. Predictions

1. Choose the best model.
2. Analyze: for which `weekday` your model makes the most errors (in % of the total number of samples of that class in your full dataset), for which `labname` and for which `users`.
3. Save the model.

In [27]:
error_analysis = {
    'weekday_errors': {},
    'labname_errors': {},
    'user_errors': {}
}

data['predictions'] = forest.predict(X)
data['target'] = weeks
data['errors'] = data['predictions'] != data['target']

data

Unnamed: 0,numTrials,hour,uid_user_0,uid_user_1,uid_user_10,uid_user_11,uid_user_12,uid_user_13,uid_user_14,uid_user_15,...,labname_lab05s,labname_laba04,labname_laba04s,labname_laba05,labname_laba06,labname_laba06s,labname_project1,predictions,target,errors
0,1,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4,4,False
1,2,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4,4,False
2,3,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4,4,False
3,4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4,4,False
4,5,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4,4,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1681,9,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3,3,False
1682,6,20,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3,3,False
1683,7,20,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3,3,False
1684,8,20,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3,3,False


In [33]:
for weekday in data['target'].unique():
    total_samples = data[data['target'] == weekday].shape[0]
    error_samples = data[(data['target'] == weekday) & (data['errors'])].shape[0]
    error_rate = (error_samples / total_samples) * 100 if total_samples > 0 else 0
    error_analysis['weekday_errors'][weekday] = error_rate


In [34]:
error_analysis['weekday_errors']

{np.int64(4): 2.8846153846153846,
 np.int64(5): 1.4760147601476015,
 np.int64(6): 0.2808988764044944,
 np.int64(0): 5.147058823529411,
 np.int64(1): 0.7299270072992701,
 np.int64(2): 1.342281879194631,
 np.int64(3): 0.7575757575757576}

In [28]:
for labname in data.columns[data.columns.str.startswith('labname_')]:
    total_samples = data[data[labname] == 1].shape[0]
    error_samples = data[(data[labname] == 1) & (data['errors'])].shape[0]
    error_rate = (error_samples / total_samples) * 100 if total_samples > 0 else 0
    error_analysis['labname_errors'][labname] = error_rate

In [30]:
error_analysis['labname_errors']

{'labname_code_rvw': 1.2195121951219512,
 'labname_lab02': 0.0,
 'labname_lab03': 100.0,
 'labname_lab03s': 100.0,
 'labname_lab05s': 2.7777777777777777,
 'labname_laba04': 2.8089887640449436,
 'labname_laba04s': 0.9615384615384616,
 'labname_laba05': 0.45045045045045046,
 'labname_laba06': 2.083333333333333,
 'labname_laba06s': 1.639344262295082,
 'labname_project1': 0.9463722397476341}

In [31]:
for user_col in data.columns[data.columns.str.startswith('uid_user_')]:
    total_samples = data[data[user_col] == 1].shape[0]
    error_samples = data[(data[user_col] == 1) & (data['errors'])].shape[0]
    error_rate = (error_samples / total_samples) * 100 if total_samples > 0 else 0
    error_analysis['user_errors'][user_col] = error_rate

In [32]:
error_analysis['user_errors']

{'uid_user_0': 0.0,
 'uid_user_1': 0.0,
 'uid_user_10': 1.4084507042253522,
 'uid_user_11': 0.0,
 'uid_user_12': 0.0,
 'uid_user_13': 1.6666666666666667,
 'uid_user_14': 0.7575757575757576,
 'uid_user_15': 0.0,
 'uid_user_16': 6.25,
 'uid_user_17': 0.0,
 'uid_user_18': 2.857142857142857,
 'uid_user_19': 2.197802197802198,
 'uid_user_2': 0.8264462809917356,
 'uid_user_20': 0.0,
 'uid_user_21': 0.0,
 'uid_user_22': 0.0,
 'uid_user_23': 0.0,
 'uid_user_24': 1.7857142857142856,
 'uid_user_25': 1.6666666666666667,
 'uid_user_26': 0.0,
 'uid_user_27': 4.3478260869565215,
 'uid_user_28': 0.0,
 'uid_user_29': 1.5625,
 'uid_user_3': 2.8169014084507045,
 'uid_user_30': 2.564102564102564,
 'uid_user_31': 2.666666666666667,
 'uid_user_4': 1.0638297872340425,
 'uid_user_6': 8.333333333333332,
 'uid_user_7': 0.0,
 'uid_user_8': 0.0}

In [36]:
joblib.dump(forest, '../data/forest_ex02.pkl')

['../data/forest_ex02.pkl']

## 6. Function

1. Write a function that takes a list of different models and a corresponding list of parameters (dicts) and returns a dict that contains all the 4 metrics for each model.

In [76]:
def eval_metrics(models, params, X_train, y_train, X_test, y_test, y):
    results = {}
    
    for model, param in zip(models, params):

        model_instance = model(**param)

        model_instance.fit(X_train, y_train)

        y_pred = model_instance.predict(X_test)
        y_scores = tree.predict_proba(X_test)

        y_test_bin = label_binarize(y_test, classes=np.unique(y))
        y_scores_bin = label_binarize(np.argmax(y_scores, axis=1), classes=np.unique(y))
        
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        roc_auc = roc_auc_score(y_test_bin, y_scores_bin)

        results[model.__name__] = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'auc_score': roc_auc
        }
    
    return results

In [80]:
models = [DecisionTreeClassifier, RandomForestClassifier]
params = [{'criterion': 'gini', 'max_depth': 36}, {'n_estimators': 100}]
results = eval_metrics(models, params, X_train, y_train, X_test, y_test, y)
print(results)

{'DecisionTreeClassifier': {'accuracy': 0.8698224852071006, 'precision': 0.8743032943311944, 'recall': 0.8698224852071006, 'auc_score': np.float64(0.9157852752779444)}, 'RandomForestClassifier': {'accuracy': 0.9319526627218935, 'precision': 0.9332776349027482, 'recall': 0.9319526627218935, 'auc_score': np.float64(0.9157852752779444)}}
