# Day 09. Exercise 02
# Metrics

## 0. Imports

In [1]:
import pandas as pd     
import numpy as np 

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC 
from sklearn.tree import DecisionTreeClassifier   
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix

import joblib    

## 1. Preprocessing

1. Create the same dataframe as in the previous exercise.
2. Using `train_test_split` with parameters `test_size=0.2`, `random_state=21` get `X_train`, `y_train`, `X_test`, `y_test`. Use the additional parameter `stratify`.

In [2]:
df = pd.read_csv('../data/day-of-week-not-scaled.csv')

In [3]:
df_day = pd.read_csv('../data/dayofweek.csv')
df['dayofweek'] = df_day['dayofweek']

In [4]:
X = df.drop('dayofweek', axis=1)
y = df['dayofweek']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21, stratify=y)

## 2. SVM

1. Use the best parameters from the previous exercise and train the model of SVM.
2. You need to calculate `accuracy`, `precision`, `recall`, `ROC AUC`.

 - `precision` and `recall` should be calculated for each class (use `average='weighted'`)
 - `ROC AUC` should be calculated for each class against any other class (all possible pairwise combinations) and then weighted average should be applied for the final metric
 - the code in the cell should display the result as below:

```
accuracy is 0.88757
precision is 0.89267
recall is 0.88757
roc_auc is 0.97878
```

In [6]:
svm = SVC(
    C=10,
    kernel='rbf',
    gamma='auto',
    probability=True,
    random_state=21,
    class_weight=None
)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
y_proba = svm.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted')
rec = recall_score(y_test, y_pred, average='weighted')
roc_auc = roc_auc_score(y_test, y_proba, average='weighted', multi_class='ovo')

print(f"accuracy is {acc:.5f}")
print(f"precision is {prec:.5f}")
print(f"recall is {rec:.5f}")
print(f"roc_auc is {roc_auc:.5f}")

accuracy is 0.88757
precision is 0.89267
recall is 0.88757
roc_auc is 0.97878


## 3. Decision tree

1. The same task for decision tree

In [7]:
tree = DecisionTreeClassifier(
    class_weight='balanced',
    criterion='gini',
    max_depth=21,
    random_state=21
)
tree.fit(X_train, y_train)
y_pred = tree.predict(X_test)
y_proba = tree.predict_proba(X_test)
acc_tree = accuracy_score(y_test, y_pred)
prec_tree = precision_score(y_test, y_pred, average='weighted')
rec_tree = recall_score(y_test, y_pred, average='weighted')
roc_auc_tree = roc_auc_score(y_test, y_proba, average='weighted', multi_class='ovo')

print(f"accuracy is {acc_tree:.5f}")
print(f"precision is {prec_tree:.5f}")
print(f"recall is {rec_tree:.5f}")
print(f"roc_auc is {roc_auc_tree:.5f}")

accuracy is 0.88462
precision is 0.88765
recall is 0.88462
roc_auc is 0.93528


## 4. Random forest

1. The same task for random forest.

In [8]:
rfc = RandomForestClassifier(
    class_weight='balanced',
    criterion='entropy',
    max_depth=24,
    n_estimators=100,
    random_state=21,
)

rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)
y_proba = rfc.predict_proba(X_test) 

acc_rfc = accuracy_score(y_test, y_pred)
prec_rfc = precision_score(y_test, y_pred, average='weighted')
rec_rfc = recall_score(y_test, y_pred, average='weighted')
roc_auc_rfc = roc_auc_score(y_test, y_proba, average='weighted', multi_class='ovo')

print(f"accuracy is {acc_rfc:.5f}")
print(f"precision is {prec_rfc:.5f}")
print(f"recall is {rec_rfc:.5f}")
print(f"roc_auc is {roc_auc_rfc:.5f}")

accuracy is 0.92604
precision is 0.92754
recall is 0.92604
roc_auc is 0.98939


## 5. Predictions

1. Choose the best model.
2. Analyze: for which `weekday` your model makes the most errors (in % of the total number of samples of that class in your full dataset), for which `labname` and for which `users`.
3. Save the model.

In [9]:
metrics = [
    {'name': 'svm', 'accuracy': acc, 'precision': prec, 'recall': rec, 'roc_auc': roc_auc, 'model': svm},
    {'name': 'tree', 'accuracy': acc_tree, 'precision': prec_tree, 'recall': rec_tree, 'roc_auc': roc_auc_tree, 'model': tree},
    {'name': 'rfc', 'accuracy': acc_rfc, 'precision': prec_rfc, 'recall': rec_rfc, 'roc_auc': roc_auc_rfc, 'model': rfc}
]
best_model = sorted(metrics, key=lambda x: (x['accuracy'], x['precision']), reverse=True)[0]
print(f"Best model: {best_model['name']}")

Best model: rfc


In [10]:
best_model['model'].fit(X_train, y_train)
y_pred_best = best_model['model'].predict(X_test)
cm = confusion_matrix(y_test, y_pred_best, labels=np.unique(y_test))
errors = (cm.sum(axis=1) - np.diag(cm)) / cm.sum(axis=1)
for i, day in enumerate(np.unique(y_test)):
    print(f'Model makes the most errors {day}: {errors[i]:.2%}')

Model makes the most errors 0: 22.22%
Model makes the most errors 1: 7.27%
Model makes the most errors 2: 6.67%
Model makes the most errors 3: 3.75%
Model makes the most errors 4: 14.29%
Model makes the most errors 5: 9.26%
Model makes the most errors 6: 2.82%


In [11]:
lab_cols = [col for col in X_test.columns if col.startswith('labname_')]
for lab in lab_cols:
    idx = X_test[lab] == 1
    if idx.sum() == 0:
        continue
    err = (y_test[idx] != y_pred_best[idx]).sum() / idx.sum()
    print(f'Model makes the most errors {lab}: {err:.2%}')

Model makes the most errors labname_code_rvw: 7.69%
Model makes the most errors labname_lab03: 100.00%
Model makes the most errors labname_lab03s: 0.00%
Model makes the most errors labname_lab05s: 16.67%
Model makes the most errors labname_laba04: 17.14%
Model makes the most errors labname_laba04s: 8.00%
Model makes the most errors labname_laba05: 2.13%
Model makes the most errors labname_laba06: 11.11%
Model makes the most errors labname_laba06s: 13.33%
Model makes the most errors labname_project1: 5.38%


In [12]:
user_cols = [col for col in X_test.columns if col.startswith('uid_user_')]
for user in user_cols:
    idx = X_test[user] == 1
    if idx.sum() == 0:
        continue
    err = (y_test[idx] != y_pred_best[idx]).sum() / idx.sum()
    print(f'Model makes the most errors {user}: {err:.2%}')

Model makes the most errors uid_user_1: 0.00%
Model makes the most errors uid_user_10: 8.33%
Model makes the most errors uid_user_12: 0.00%
Model makes the most errors uid_user_13: 5.88%
Model makes the most errors uid_user_14: 3.23%
Model makes the most errors uid_user_15: 0.00%
Model makes the most errors uid_user_16: 20.00%
Model makes the most errors uid_user_17: 0.00%
Model makes the most errors uid_user_18: 16.67%
Model makes the most errors uid_user_19: 10.53%
Model makes the most errors uid_user_2: 10.71%
Model makes the most errors uid_user_20: 0.00%
Model makes the most errors uid_user_21: 0.00%
Model makes the most errors uid_user_22: 100.00%
Model makes the most errors uid_user_23: 0.00%
Model makes the most errors uid_user_24: 9.09%
Model makes the most errors uid_user_25: 9.09%
Model makes the most errors uid_user_26: 0.00%
Model makes the most errors uid_user_27: 16.67%
Model makes the most errors uid_user_28: 0.00%
Model makes the most errors uid_user_29: 9.09%
Model ma

In [13]:
joblib.dump(best_model['model'], '../data/best_model.joblib')

['../data/best_model.joblib']

## 6. Function

1. Write a function that takes a list of different models and a corresponding list of parameters (dicts) and returns a dict that contains all the 4 metrics for each model.

In [14]:
def written(models_with_params, X_test, y_test):
    results = {}
    for name, model, params in models_with_params:
        if params:
            model.set_params(**params)
        y_pred = model.predict(X_test)
        if hasattr(model, "predict_proba"):
            y_proba = model.predict_proba(X_test)
        else:
            y_proba = None

        acc_written = accuracy_score(y_test, y_pred)
        prec_written = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec_written = recall_score(y_test, y_pred, average='weighted', zero_division=0)

        roc_auc_written = None
        if y_proba is not None:
            try:
                roc_auc_written = roc_auc_score(y_test, y_proba, average='weighted', multi_class='ovo')
            except ValueError:
                roc_auc_written = None

        results[name] = {
            'accuracy': acc_written,
            'precision': prec_written,
            'recall': rec_written,
            'roc_auc': roc_auc_written,
            'params': params 
        }
    return results
        

In [15]:
models_with_params = [
    ('svm', svm, {'kernel': 'rbf', 'C': 10, 'gamma': 'auto'}),
    ('tree', tree, {'max_depth': 21, 'criterion': 'gini'}),
    ('rf', rfc, {'n_estimators': 100, 'max_depth': 24, 'criterion': 'entropy'})
]

metrics_dict = written(models_with_params, X_test, y_test)
for name, metrics in metrics_dict.items():
    print(f"{name}: {metrics}")

svm: {'accuracy': 0.8875739644970414, 'precision': 0.8926729169690374, 'recall': 0.8875739644970414, 'roc_auc': 0.9787793228216216, 'params': {'kernel': 'rbf', 'C': 10, 'gamma': 'auto'}}
tree: {'accuracy': 0.8846153846153846, 'precision': 0.8876518218623483, 'recall': 0.8846153846153846, 'roc_auc': 0.935280206669359, 'params': {'max_depth': 21, 'criterion': 'gini'}}
rf: {'accuracy': 0.9260355029585798, 'precision': 0.9275374670957044, 'recall': 0.9260355029585798, 'roc_auc': 0.9893851880258296, 'params': {'n_estimators': 100, 'max_depth': 24, 'criterion': 'entropy'}}
