In [599]:
import warnings
warnings.filterwarnings("ignore")
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import (confusion_matrix, classification_report, precision_recall_fscore_support,
                             roc_auc_score, roc_curve, log_loss, auc)
from sklearn.multiclass import OneVsRestClassifier

##plotting
from plotly.offline import init_notebook_mode, iplot, download_plotlyjs
import plotly.offline as pyo
import cufflinks as cf
import matplotlib.pyplot as plt
init_notebook_mode(connected=True)
cf.go_offline()

In [97]:
iris = load_iris()

In [98]:
df = pd.DataFrame(np.concatenate((iris.data.reshape(-1,4),iris.target.reshape(-1,1)),axis=1))
df.columns =[feature[:-5] for feature in iris.feature_names]+['target']

In [99]:
df['target'] = df.target.apply(lambda x: iris['target_names'][int(x)])
# df.drop(target,axis=1,inplace=True)

In [100]:
pyo.iplot(
    {
        'data': [
            {
                'x': df[df['target']==label]['petal width'],
                'y': df[df['target']==label]['petal length'],
                'name': label, 'mode': 'markers',
            } for label in iris.target_names
        ],
        'layout': {
            'xaxis': {'title': 'petal width'},
            'yaxis': {'title': "petal length"}
        }
})

In [101]:
df = pd.get_dummies(df)

In [104]:
target = [col for col in df.columns if col.startswith('target')]
variables = list(set(df.columns) - set([col for col in df.columns if col.startswith('target')]))

In [385]:
X_train, X_test, y_train, y_test = train_test_split(df[variables],df[target],test_size=0.2,random_state=42)
rf = RandomForestClassifier(random_state=42, max_depth=4, n_estimators=10, min_samples_leaf=3)
rf.fit(X_train,y_train)
nb = OneVsRestClassifier(GaussianNB())
nb.fit(X_train,y_train)
svm = OneVsRestClassifier(SVC(random_state=42,probability=True,kernel='linear',gamma='auto'))
svm.fit(X_train,y_train)

OneVsRestClassifier(estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=True, random_state=42, shrinking=True,
  tol=0.001, verbose=False),
          n_jobs=None)

In [386]:
def multi_label_confusion_matrix(X, y, clf):
    zipped = np.dstack((y.values, clf.predict(X)))
#     else:
#         lr_pred = np.zeros_like(y)
#         for ind,col_ind in enumerate(np.argmax(clf.predict_proba(X),axis=1)):
#             lr_pred[ind, col_ind] = 1
#         zipped = np.dstack((y.values, lr_pred))
    conf_matrix = np.zeros([y.shape[1],y.shape[1]])
    for rows in zipped:
        if len(np.where(rows[:,1]==1)[0])>0:
            conf_matrix[np.where(rows[:,0]==1)[0][0],np.where(rows[:,1]==1)[0][0]] += 1
    return conf_matrix

Metrics for classification:
<br>
<br>
<font size='4'>
<center>
$Accuracy = \frac{TP+TN}{TP+TN+FP+FN}$    |    $Precision = \frac{TP}{TP+FP}$    |    $Recall = \frac{TP}{TP+FN}$    |    $F1 = \frac{2}{\frac{1}{Precision}+\frac{1}{Recall}}$
</center>
</font>

In [505]:
y_train.columns

Index(['target_setosa', 'target_versicolor', 'target_virginica'], dtype='object')

In [388]:
print(multi_label_confusion_matrix(X_train, y_train, rf))
print(classification_report(y_train,rf.predict(X_train)))

[[40.  0.  0.]
 [ 0. 38.  3.]
 [ 0.  2. 37.]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.95      0.93      0.94        41
           2       0.93      0.95      0.94        39

   micro avg       0.96      0.96      0.96       120
   macro avg       0.96      0.96      0.96       120
weighted avg       0.96      0.96      0.96       120
 samples avg       0.96      0.96      0.96       120



### Cohen's Kappa statistic
<br>
<br>
Formula for the kappa statistic:
<br>
<center>
<font size="4">
    $\kappa = \frac{(observed\space accuracy - expected\space accuracy)}{(1 - expected\space accuracy)}$ 
</font> 
</center>

In [566]:
rf_conf_matrix = multi_label_confusion_matrix(X_train, y_train, rf)
rf_conf_matrix

array([[40.,  0.,  0.],
       [ 0., 38.,  3.],
       [ 0.,  2., 37.]])

In [567]:
with_marginal = np.zeros((4,4))
with_marginal[:3,:3] =  rf_conf_matrix
for i in range(3):
    with_marginal[3,i] = with_marginal[:3,i].sum()
    with_marginal[i,3] = with_marginal[i,:3].sum()
with_marginal[3,3] = with_marginal[:3,:3].sum()
with_marginal_prob = with_marginal/120
print(with_marginal,'\n\n',with_marginal_prob)

[[ 40.   0.   0.  40.]
 [  0.  38.   3.  41.]
 [  0.   2.  37.  39.]
 [ 40.  40.  40. 120.]] 

 [[0.33333333 0.         0.         0.33333333]
 [0.         0.31666667 0.025      0.34166667]
 [0.         0.01666667 0.30833333 0.325     ]
 [0.33333333 0.33333333 0.33333333 1.        ]]


In [577]:
observed_accuracy = rf_conf_matrix.diagonal().sum()/rf_conf_matrix.sum()
observed_accuracy

0.9583333333333334

In [576]:
expected_accuracy = 0
for i in range(3):
    expected_accuracy += with_marginal_prob[i,3]*with_marginal_prob[3,i]
expected_accuracy

0.3333333333333333

In [579]:
kappa = (observed_accuracy - expected_accuracy)/(1 - expected_accuracy)

0.9374999999999999

In [613]:
print(multi_label_confusion_matrix(X_train, y_train, nb))
print(classification_report(y_train,nb.predict(X_train)))

[[40.  0.  0.]
 [ 0. 36.  2.]
 [ 0.  7. 32.]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.84      0.88      0.86        41
           2       0.81      0.97      0.88        39

   micro avg       0.88      0.95      0.91       120
   macro avg       0.88      0.95      0.91       120
weighted avg       0.88      0.95      0.91       120
 samples avg       0.90      0.95      0.91       120



In [614]:
print(multi_label_confusion_matrix(X_train, y_train, svm))
print(classification_report(y_train,svm.predict(X_train)))

[[40.  0.  0.]
 [ 0. 17.  0.]
 [ 0.  6. 32.]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.71      0.41      0.52        41
           2       0.95      0.97      0.96        39

   micro avg       0.91      0.79      0.85       120
   macro avg       0.89      0.80      0.83       120
weighted avg       0.88      0.79      0.82       120
 samples avg       0.75      0.79      0.77       120



# Log loss

Formula for binary classification, where N is the number of observations:
<br>
<br>
<font size="4.5">
<center>
$logloss = -\frac{1}{N} \sum_{i=1}^{N} (y_{i} \log p_{i} + (1 - y_{i}) \log (1 - p_{i}))$ 
</center>
</font>
<br>
<br>
Formula for Multiclass classification, where N is the number of observations and M the number of classes:
<br>
<br>
<font size="4.5">
<center>
$logloss = -\frac{1}{N} \sum_{i=1}^{N}\sum_{c=1}^{M}  y_{i,c} \log p_{i,c}$ 
</center>
</font>

In [615]:
log_loss = -np.nan_to_num(np.log(pred_proba_rf(rf, X_train, y_train))*y_train.values).sum()/y_train.shape[0]
print("Random forest log loss: %0.4f"  %log_loss)

Random forest log loss: 0.0933


In [616]:
log_loss = -np.nan_to_num(np.log(nb.predict_proba(X_train))*y_train.values).sum()/y_train.shape[0]
print("Naive Bayes log loss: %0.4f"  %log_loss)

Naive Bayes log loss: 0.0992


In [617]:
log_loss = -np.nan_to_num(np.log(svm.predict_proba(X_train))*y_train.values).sum()/y_train.shape[0]
print("SVM log loss: %0.4f"  %log_loss)

SVM log loss: 0.3547


In [580]:
def pred_proba_rf(rf, X, y):
    return np.dstack(np.array([pred[:,1] for pred in rf.predict_proba(X)])).reshape(y.shape)

In [641]:
fpr_0, tpr_0, thr_0 = roc_curve(1*(y_train.idxmax(axis=1)=='target_setosa'),pred_proba_rf(rf, X_train, y_train)[:,0])
fpr_1, tpr_1, thr_1 = roc_curve(1*(y_train.idxmax(axis=1)=='target_versicolor'),pred_proba_rf(rf, X_train, y_train)[:,1])
fpr_2, tpr_2, thr_2 = roc_curve(1*(y_train.idxmax(axis=1)=='target_virginica'),pred_proba_rf(rf, X_train, y_train)[:,2])
fpr_micro, tpr_micro, thr_micro = roc_curve(y_train.values.ravel(), pred_proba_rf(rf, X_train, y_train).ravel())

In [642]:
ind_series = pd.Series(np.sort(np.unique(np.concatenate((fpr_0,fpr_1,fpr_2)))), name='False Positive Rate')
roc_c = pd.merge(ind_series.to_frame(), pd.DataFrame(np.array([fpr_0,tpr_0]).T).rename(columns={0:'False Positive Rate',1:'Recall_setosa'}), how='left', left_on='False Positive Rate', right_on='False Positive Rate')
roc_c = pd.merge(roc_c, pd.DataFrame(np.array([fpr_1,tpr_1]).T).rename(columns={0:'False Positive Rate',1:'Recall_versicolor'}), how='left', left_on='False Positive Rate', right_on='False Positive Rate')
roc_c = pd.merge(roc_c, pd.DataFrame(np.array([fpr_2,tpr_2]).T).rename(columns={0:'False Positive Rate',1:'Recall_virginica'}), how='left', left_on='False Positive Rate', right_on='False Positive Rate')
roc_c = pd.merge(roc_c, pd.DataFrame(np.array([fpr_micro,tpr_micro]).T).rename(columns={0:'False Positive Rate',1:'Recall_micro'}), how='left', left_on='False Positive Rate', right_on='False Positive Rate')
roc_c.set_index('False Positive Rate',inplace=True)
roc_c['Recall_random'] = roc_c.index#np.array([0.0]+[np.nan]*(roc_c.shape[0]-2)+[1.0])
for col in roc_c.columns:
    roc_c[col] = roc_c[col].interpolate()
roc_c['Recall_macro'] = (roc_c['Recall_setosa']+roc_c['Recall_virginica']+roc_c['Recall_versicolor'])/3.0
roc_c.iplot(yTitle='Recall',xTitle='False Positive Rate',title='ROC curve for Random Forest classifier')

In [637]:
fpr_0, tpr_0, thr_0 = roc_curve(1*(y_train.idxmax(axis=1)=='target_setosa'),nb.predict_proba(X_train)[:,0])
fpr_1, tpr_1, thr_1 = roc_curve(1*(y_train.idxmax(axis=1)=='target_versicolor'),nb.predict_proba(X_train)[:,1])
fpr_2, tpr_2, thr_2 = roc_curve(1*(y_train.idxmax(axis=1)=='target_virginica'),nb.predict_proba(X_train)[:,2])
fpr_micro, tpr_micro, thr_micro = roc_curve(y_train.values.ravel(), nb.predict_proba(X_train).ravel())

In [638]:
ind_series = pd.Series(np.sort(np.unique(np.concatenate((fpr_0,fpr_1,fpr_2)))), name='False Positive Rate')
roc_c = pd.merge(ind_series, pd.DataFrame(np.array([fpr_0,tpr_0]).T).rename(columns={0:'False Positive Rate',1:'Recall_setosa'}), how='left')
roc_c = pd.merge(roc_c, pd.DataFrame(np.array([fpr_1,tpr_1]).T).rename(columns={0:'False Positive Rate',1:'Recall_versicolor'}), how='left')
roc_c = pd.merge(roc_c, pd.DataFrame(np.array([fpr_2,tpr_2]).T).rename(columns={0:'False Positive Rate',1:'Recall_virginica'}), how='left')
roc_c = pd.merge(roc_c, pd.DataFrame(np.array([fpr_micro,tpr_micro]).T).rename(columns={0:'False Positive Rate',1:'Recall_micro'}), how='left', left_on='False Positive Rate', right_on='False Positive Rate')
roc_c.set_index('False Positive Rate',inplace=True)
roc_c['Recall_random'] = roc_c.index#np.array([0.0]+[np.nan]*(roc_c.shape[0]-2)+[1.0])
for col in roc_c.columns:
    roc_c[col] = roc_c[col].interpolate()
roc_c['Recall_macro'] = (roc_c['Recall_setosa']+roc_c['Recall_virginica']+roc_c['Recall_versicolor'])/3.0
roc_c.iplot(yTitle='Recall',xTitle='False Positive Rate',title='ROC curve for OvR Naive Bayes classifier')

In [639]:
fpr_0, tpr_0, thr_0 = roc_curve(1*(y_train.idxmax(axis=1)=='target_setosa'),svm.predict_proba(X_train)[:,0])
fpr_1, tpr_1, thr_1 = roc_curve(1*(y_train.idxmax(axis=1)=='target_versicolor'),svm.predict_proba(X_train)[:,1])
fpr_2, tpr_2, thr_2 = roc_curve(1*(y_train.idxmax(axis=1)=='target_virginica'),svm.predict_proba(X_train)[:,2])
fpr_micro, tpr_micro, thr_micro = roc_curve(y_train.values.ravel(), svm.predict_proba(X_train).ravel())

In [640]:
ind_series = pd.Series(np.sort(np.unique(np.concatenate((fpr_0,fpr_1,fpr_2,fpr_micro)))), name='False Positive Rate')
roc_c = pd.merge(ind_series.to_frame(), pd.DataFrame(np.array([fpr_0,tpr_0]).T).rename(columns={0:'False Positive Rate',1:'Recall_setosa'}), how='left', left_on='False Positive Rate', right_on='False Positive Rate')
roc_c = pd.merge(roc_c, pd.DataFrame(np.array([fpr_1,tpr_1]).T).rename(columns={0:'False Positive Rate',1:'Recall_versicolor'}), how='left', left_on='False Positive Rate', right_on='False Positive Rate')
roc_c = pd.merge(roc_c, pd.DataFrame(np.array([fpr_2,tpr_2]).T).rename(columns={0:'False Positive Rate',1:'Recall_virginica'}), how='left', left_on='False Positive Rate', right_on='False Positive Rate')
roc_c = pd.merge(roc_c, pd.DataFrame(np.array([fpr_micro,tpr_micro]).T).rename(columns={0:'False Positive Rate',1:'Recall_micro'}), how='left', left_on='False Positive Rate', right_on='False Positive Rate')
roc_c.set_index('False Positive Rate',inplace=True)
roc_c['Recall_random'] = roc_c.index#np.array([0.0]+[np.nan]*(roc_c.shape[0]-2)+[1.0])
for col in roc_c.columns:
    roc_c[col] = roc_c[col].interpolate()
roc_c['Recall_macro'] = (roc_c['Recall_setosa']+roc_c['Recall_virginica']+roc_c['Recall_versicolor'])/3.0
roc_c.iplot(yTitle='Recall',xTitle='False Positive Rate',title='ROC curve for OvR SVM classifier')

## Other datasets

In [507]:
from sklearn.datasets import make_classification

In [545]:
dataset, target = make_classification(n_samples=10000,
                              n_features=12,
                              n_informative=7,
                              n_redundant=5,
                              n_classes=4,
                              random_state=42)

In [549]:
df2 = pd.DataFrame(dataset)
df2['target'] = target 
df2.corr()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,target
0,1.0,-0.063466,-0.219969,-0.265537,0.124539,0.588696,-0.138705,-0.282483,-0.029489,-0.033697,-0.249294,-0.126437,0.064732
1,-0.063466,1.0,0.730113,-0.08887,0.300213,-0.158684,-0.30998,0.618606,0.380947,0.176545,-0.110717,-0.010717,0.08613
2,-0.219969,0.730113,1.0,0.484975,0.026864,-0.194444,0.097955,0.272312,0.274862,-0.237831,0.074473,-0.047581,0.11104
3,-0.265537,-0.08887,0.484975,1.0,-0.420529,-0.067088,0.21968,0.040124,-0.201632,-0.647978,-0.010502,0.351583,-0.011216
4,0.124539,0.300213,0.026864,-0.420529,1.0,-0.071938,-0.375645,-0.075131,-0.134489,0.112429,-0.587874,-0.159212,-0.314187
5,0.588696,-0.158684,-0.194444,-0.067088,-0.071938,1.0,-0.566534,-0.002766,0.427296,-0.617484,-0.470639,-0.043538,-0.108222
6,-0.138705,-0.30998,0.097955,0.21968,-0.375645,-0.566534,1.0,-0.508787,-0.430559,0.350791,0.829768,-0.360148,0.294507
7,-0.282483,0.618606,0.272312,0.040124,-0.075131,-0.002766,-0.508787,1.0,0.156249,0.021087,-0.123271,0.125148,0.004186
8,-0.029489,0.380947,0.274862,-0.201632,-0.134489,0.427296,-0.430559,0.156249,1.0,-0.195723,-0.11801,0.150869,0.067041
9,-0.033697,0.176545,-0.237831,-0.647978,0.112429,-0.617484,0.350791,0.021087,-0.195723,1.0,0.506784,-0.210982,0.227764
