In [1]:
import numpy
import pandas
import joblib
import json
import seaborn

import sklearn

%matplotlib inline

# Instructions

- Read **the test data** from the CSV file and properly set the index
- Load the trained model

In [2]:
data_test = pandas.read_csv('./data/features.test.csv').set_index('id')
data_test

Unnamed: 0_level_0,feature_2,feature_3,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
645,9.303652,-6.539224,1
402,-1.501131,-2.053955,0
758,3.684480,4.312892,1
330,1.817236,1.061347,1
110,8.828041,-2.564622,0
...,...,...,...
387,1.565732,-0.354053,0
557,11.231470,-1.216562,0
778,-7.441229,0.065030,0
611,10.733720,1.966244,1


In [3]:
model = joblib.load('model/model.joblib')
model

# Instructions
- Calculate and add the `prediction` column to the dataframe of test data
- Write the dataframe to `./results/predictions.test.csv`

In [4]:
data_test['prediction'] = model.predict(data_test.drop(['label'], axis='columns'))
data_test

Unnamed: 0_level_0,feature_2,feature_3,label,prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
645,9.303652,-6.539224,1,1
402,-1.501131,-2.053955,0,0
758,3.684480,4.312892,1,1
330,1.817236,1.061347,1,1
110,8.828041,-2.564622,0,1
...,...,...,...,...
387,1.565732,-0.354053,0,1
557,11.231470,-1.216562,0,1
778,-7.441229,0.065030,0,0
611,10.733720,1.966244,1,1


In [5]:
data_test.to_csv('./results/predictions.test.csv')

# Instructions
- Use `sklearn.metrics.confusion_matrix` to calculate the confusion matrix

In [6]:
confusion_matrix_from_sklearn = sklearn.metrics.confusion_matrix(
    y_true = data_test['label'],
    y_pred = data_test['prediction'],
)
confusion_matrix_from_sklearn

array([[71, 12],
       [11, 94]])

# Instructions

- Use `sklearn.metrics.classification_report` to print the classification report
- Now, **manually fill** the following metrics `y=1` and put them in a Python dictionary
- The expected output should look like:

```
scores = {
     'true_positive': 109,
     'true_negative': 103,
     'false_negative': 19,
     'false_positive': 19,
     'accuracy': 0.848,
     'precision': 0.851562,
     'recall': 0.851562,
     'support': 128,
     'f1-score': 0.851562
}
```
- Use `json.dump` to save the output to  `./results/scores.test.json`


In [7]:
pandas.DataFrame(sklearn.metrics.classification_report(
    y_true = data_test['label'],
    y_pred = data_test['prediction'],
    digits = 4,
    output_dict = True,
))


Unnamed: 0,0,1,accuracy,macro avg,weighted avg
precision,0.865854,0.886792,0.87766,0.876323,0.877548
recall,0.855422,0.895238,0.87766,0.87533,0.87766
f1-score,0.860606,0.890995,0.87766,0.875801,0.877579
support,83.0,105.0,0.87766,188.0,188.0


In [8]:
scores = {
     'true_positive': 101,
     'true_negative': 92,
     'false_negative': 27,
     'false_positive': 30,
     'accuracy': 0.772,
     'precision': 0.770992,
     'recall': 0.789062,
     'support': 128,
     'f1-score': 0.779923
}

In [9]:
with open('./results/scores.test.json', 'w') as f:
    json.dump(scores, f, indent=4)