# Process Results

## Imports

In [1]:
import pandas as pd
import cufflinks as cf

cf.go_offline()

In [2]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import recall_score

In [3]:
colors=['red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue','red', 'blue']

In [4]:
def compute_metrics(predictions, real):
    metrics = dict()
    bin_preds = predictions
    metrics['mse'] = mean_squared_error(bin_preds, real)
    metrics['recall'] = recall_score(bin_preds, real)
    metrics['f1'] = f1_score(bin_preds, real)
    metrics['acc'] = accuracy_score(bin_preds, real)
    return metrics

## Machine Learning

In [5]:
results_base_line = pd.read_pickle('machine_learning/tweeter/base_line/2-clases/results.pkl').to_dict()
results_grid = pd.read_pickle('machine_learning/tweeter/grid_search/2-clases/results.pkl').to_dict()
results_test = pd.read_pickle('machine_learning/tweeter/grid_search/2-clases/test_results.pkl').to_dict()

In [6]:
def get_results_df(res, nrange):
    keys = res.keys()
    results = []
    for k in keys:
        for i in range(nrange):
            results.append(compute_metrics(res[k]['predicted'][i], res[k]['real'][i]))
    results_df = pd.DataFrame(results).transpose()
    results_df.columns = pd.MultiIndex.from_product([keys, range(nrange)])
    results_df = results_df.transpose().reset_index().groupby(['level_0']).mean()
    results_df = results_df.drop(columns=['level_1'])
    return results_df

base line results over test set

In [86]:
get_results_df(results_base_line, 10).style.highlight_max()

Unnamed: 0_level_0,acc,f1,mse,recall
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
lr,0.771733,0.810739,0.228267,0.775426
ls,0.772948,0.810216,0.227052,0.780145
mb,0.799696,0.823197,0.200304,0.837787
rf,0.73921,0.785951,0.26079,0.745388


In [96]:
get_results_df(results_base_line, 10).to_latex()

'\\begin{tabular}{lrrrr}\n\\toprule\n{} &       acc &        f1 &       mse &    recall \\\\\nlevel\\_0 &           &           &           &           \\\\\n\\midrule\nlr      &  0.771733 &  0.810739 &  0.228267 &  0.775426 \\\\\nls      &  0.772948 &  0.810216 &  0.227052 &  0.780145 \\\\\nmb      &  0.799696 &  0.823197 &  0.200304 &  0.837787 \\\\\nrf      &  0.739210 &  0.785951 &  0.260790 &  0.745388 \\\\\n\\bottomrule\n\\end{tabular}\n'

means of grid search results over k-fold

In [85]:
get_results_df(results_grid, 10).style.highlight_max()

Unnamed: 0_level_0,acc,f1,mse,recall
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
lr,0.73921,0.782815,0.26079,0.752238
ls,0.723708,0.775764,0.276292,0.728284
mb,0.745289,0.778118,0.254711,0.781461
rf,0.703647,0.733663,0.296353,0.759517


In [95]:
get_results_df(results_grid, 10).to_latex()

'\\begin{tabular}{lrrrr}\n\\toprule\n{} &       acc &        f1 &       mse &    recall \\\\\nlevel\\_0 &           &           &           &           \\\\\n\\midrule\nlr      &  0.739210 &  0.782815 &  0.260790 &  0.752238 \\\\\nls      &  0.723708 &  0.775764 &  0.276292 &  0.728284 \\\\\nmb      &  0.745289 &  0.778118 &  0.254711 &  0.781461 \\\\\nrf      &  0.703647 &  0.733663 &  0.296353 &  0.759517 \\\\\n\\bottomrule\n\\end{tabular}\n'

Grid search results over test set

In [84]:
get_results_df(results_test, 1).style.highlight_max()

Unnamed: 0_level_0,acc,f1,mse,recall
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
lr,0.733333,0.776722,0.266667,0.755196
ls,0.729787,0.780656,0.270213,0.737758
mb,0.756738,0.788141,0.243262,0.796504
rf,0.70922,0.73852,0.29078,0.772


In [94]:
get_results_df(results_test, 1).to_latex()

'\\begin{tabular}{lrrrr}\n\\toprule\n{} &       acc &        f1 &       mse &    recall \\\\\nlevel\\_0 &           &           &           &           \\\\\n\\midrule\nlr      &  0.733333 &  0.776722 &  0.266667 &  0.755196 \\\\\nls      &  0.729787 &  0.780656 &  0.270213 &  0.737758 \\\\\nmb      &  0.756738 &  0.788141 &  0.243262 &  0.796504 \\\\\nrf      &  0.709220 &  0.738520 &  0.290780 &  0.772000 \\\\\n\\bottomrule\n\\end{tabular}\n'

In [10]:
pd.DataFrame(results_test['lr']['real'][0]).iplot(kind='histogram')

In [11]:
pd.DataFrame([results_test['lr']['predicted'][0], results_test['ls']['predicted'][0],\
             results_test['mb']['predicted'][0], results_test['rf']['predicted'][0]], index=['lr', 'ls' , 'mb', 'rf'])\
.transpose().iplot(kind='histogram')

We could highlight Naive bayes model over the others because it seems to have better generalization over test cases

## Deep Learning

### Get Results

In [32]:
lstm_base = pd.read_pickle('deep_learning/tweeter/2-clases/lstm_val_lstm.pkl')
lstm_base_evas = pd.read_pickle('deep_learning/tweeter/2-clases/lstm_val_lstm_evas.pkl')
lstm_base_pred = pd.read_pickle('deep_learning/tweeter/2-clases/lstm_val_preds.pkl')

In [35]:
lstm_simpler = pd.read_pickle('deep_learning/tweeter/2-clases/lstm_simple_lstm.pkl')
lstm_simpler_evas = pd.read_pickle('deep_learning/tweeter/2-clases/lstm_simple_lstm_evas.pkl')
lstm_simpler_preds = pd.read_pickle('deep_learning/tweeter/2-clases/lstm_simple_preds.pkl')

In [36]:
lstm_dropout = pd.read_pickle('deep_learning/tweeter/2-clases/dropout_lstm_lstm.pkl')
lstm_dropout_evas = pd.read_pickle('deep_learning/tweeter/2-clases/dropout_lstm_lstm_evas.pkl')
lstm_dropout_preds = pd.read_pickle('deep_learning/tweeter/2-clases/dropout_lstm_preds.pkl')

In [37]:
lstm_dropout2 = pd.read_pickle('deep_learning/tweeter/2-clases/dropout2_lstm_lstm.pkl')
lstm_dropout2_evas = pd.read_pickle('deep_learning/tweeter/2-clases/dropout2_lstm_lstm_evas.pkl')
lstm_dropout2_preds = pd.read_pickle('deep_learning/tweeter/2-clases/dropout2_lstm_preds.pkl')

In [41]:
lstm_bn = pd.read_pickle('deep_learning/tweeter/2-clases/bn_lstm_lstm.pkl')
lstm_bn_evas = pd.read_pickle('deep_learning/tweeter/2-clases/bn_lstm_lstm_evas.pkl')
lstm_bn_preds = pd.read_pickle('deep_learning/tweeter/2-clases/bn_lstm_preds.pkl')

In [42]:
lstm_glorot = pd.read_pickle('deep_learning/tweeter/2-clases/glorot_lstm_lstm.pkl')
lstm_glorot_evas = pd.read_pickle('deep_learning/tweeter/2-clases/glorot_lstm_lstm_evas.pkl')
lstm_glorot_preds = pd.read_pickle('deep_learning/tweeter/2-clases/glorot_lstm_preds.pkl')

In [43]:
lstm_glorot_wo_bn = pd.read_pickle('deep_learning/tweeter/2-clases/glorot__wobn_lstm_lstm.pkl')
lstm_glorot_wo_bn_evas = pd.read_pickle('deep_learning/tweeter/2-clases/glorot__wobn_lstm_lstm_evas.pkl')
lstm_glorot_wo_bn_preds = pd.read_pickle('deep_learning/tweeter/2-clases/glorot__wobn_lstm_preds.pkl')

In [44]:
lstm_double = pd.read_pickle('deep_learning/tweeter/2-clases/double_lstm_lstm.pkl')
lstm_double_evas = pd.read_pickle('deep_learning/tweeter/2-clases/double_lstm_lstm_evas.pkl')
lstm_double_preds = pd.read_pickle('deep_learning/tweeter/2-clases/double_lstm_preds.pkl')

In [46]:
lstm_conv = pd.read_pickle('deep_learning/tweeter/2-clases/convolutional_lstm.pkl')
lstm_conv_evas = pd.read_pickle('deep_learning/tweeter/2-clases/convolutional_lstm_evas.pkl')
lstm_conv_preds = pd.read_pickle('deep_learning/tweeter/2-clases/convolutional_preds.pkl')

In [47]:
lstm_conv1d = pd.read_pickle('deep_learning/tweeter/2-clases/convolutional1d_lstm.pkl')
lstm_conv1d_evas = pd.read_pickle('deep_learning/tweeter/2-clases/convolutional1d_lstm_evas.pkl')
lstm_conv1d_preds = pd.read_pickle('deep_learning/tweeter/2-clases/convolutional1d_preds.pkl')

In [48]:
lstm_bidirectional = pd.read_pickle('deep_learning/tweeter/2-clases/bidirectional_lstm.pkl')
lstm_bidirectional_evas = pd.read_pickle('deep_learning/tweeter/2-clases/bidirectional_lstm_evas.pkl')
lstm_bidirectional_preds = pd.read_pickle('deep_learning/tweeter/2-clases/bidirectional_preds.pkl')

### Plot results

In [23]:
print('base')
lstm_base.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
print('simplification')
lstm_simpler.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
print('dropout')
lstm_dropout.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
print('dropout 0.2')
lstm_dropout2.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
print('batch normalization')
lstm_bn.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
print('glorot initialization')
lstm_glorot.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
print('glorot initialization without batch normalization')
lstm_glorot_wo_bn.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
print('double lstm')
lstm_double.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
print('convolutional lstm')
lstm_conv.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
print('convolutional 2d lstm')
lstm_conv1d.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)
print('bidirectional lstm')
lstm_bidirectional.loc[:, pd.IndexSlice[:, ['loss', 'val_loss']]].iplot(colors=colors)

base


simplification


dropout


dropout 0.2


batch normalization


glorot initialization


glorot initialization without batch normalization


double lstm


convolutional lstm


convolutional 2d lstm


bidirectional lstm


In [24]:
print('base')
lstm_base_evas.iplot()
print('simplification')
lstm_simpler_evas.iplot()
print('dropout')
lstm_dropout_evas.iplot()
print('dropout 0.2')
lstm_dropout2_evas.iplot()
print('batch normalization')
lstm_bn_evas.iplot()
print('glorot initialization')
lstm_glorot_evas.iplot()
print('glorot initialization without batch normalization')
lstm_glorot_wo_bn_evas.iplot()
print('double lstm')
lstm_double_evas.iplot()
print('convolutional lstm')
lstm_conv_evas.iplot()
print('convolutional 2d lstm')
lstm_conv1d_evas.iplot()
print('bidirectional lstm')
lstm_bidirectional_evas.iplot()

base


simplification


dropout


dropout 0.2


batch normalization


glorot initialization


glorot initialization without batch normalization


double lstm


convolutional lstm


convolutional 2d lstm


bidirectional lstm


### Table results

#### Train results

In [25]:
means = pd.concat([\
           lstm_base.stack(level=0).mean(),\
           lstm_simpler.stack(level=0).mean(),\
           lstm_dropout.stack(level=0).mean(),\
           lstm_dropout2.stack(level=0).mean(),\
           lstm_bn.stack(level=0).mean(),\
           lstm_glorot.stack(level=0).mean(),\
           lstm_glorot_wo_bn.stack(level=0).mean(),\
           lstm_double.stack(level=0).mean(),\
           lstm_conv.stack(level=0).mean(),\
           lstm_conv1d.stack(level=0).mean(),\
           lstm_bidirectional.stack(level=0).mean()
          ], axis=1)

In [26]:
means.columns = ['base', 'simpler', 'dropout', 'dropout 0.2', 'batch norm', 'glorot', 'glorot_wo_bn', 'double', 'conv', 'conv1d', 'bidirectional']

In [83]:
means.transpose().style.highlight_min(subset=pd.IndexSlice[:, ['val_loss']])

Unnamed: 0,acc,loss,val_acc,val_loss
base,0.795908,0.408313,0.765856,1.3411
simpler,0.739269,0.488603,0.727224,0.53658
dropout,0.65263,0.598253,0.692827,0.588355
dropout 0.2,0.742694,0.498521,0.734458,0.580971
batch norm,0.824369,0.410353,0.777518,0.522213
glorot,0.830993,0.376538,0.754975,0.531392
glorot_wo_bn,0.723824,0.515169,0.718045,0.577295
double,0.744781,0.480423,0.7292,0.536301
conv,0.969988,0.0782381,0.808744,0.682893
conv1d,0.970137,0.0796165,0.815268,0.664958


In [93]:
means.transpose().to_latex()

'\\begin{tabular}{lrrrr}\n\\toprule\n{} &       acc &      loss &   val\\_acc &  val\\_loss \\\\\n\\midrule\nbase          &  0.795908 &  0.408313 &  0.765856 &  1.341105 \\\\\nsimpler       &  0.739269 &  0.488603 &  0.727224 &  0.536580 \\\\\ndropout       &  0.652630 &  0.598253 &  0.692827 &  0.588355 \\\\\ndropout 0.2   &  0.742694 &  0.498521 &  0.734458 &  0.580971 \\\\\nbatch norm    &  0.824369 &  0.410353 &  0.777518 &  0.522213 \\\\\nglorot        &  0.830993 &  0.376538 &  0.754975 &  0.531392 \\\\\nglorot\\_wo\\_bn  &  0.723824 &  0.515169 &  0.718045 &  0.577295 \\\\\ndouble        &  0.744781 &  0.480423 &  0.729200 &  0.536301 \\\\\nconv          &  0.969988 &  0.078238 &  0.808744 &  0.682893 \\\\\nconv1d        &  0.970137 &  0.079617 &  0.815268 &  0.664958 \\\\\nbidirectional &  0.928475 &  0.189918 &  0.822442 &  0.516847 \\\\\n\\bottomrule\n\\end{tabular}\n'

The better results in terms of val_loss (more robust model) are the ones achieved by batch norm model, so we can assume this model will generalize better on predictions. The resuts we get on accuracy could depend highly on how the data is distributed, so if a model predicts a high rate os positive texts it could be getting higher accuracy due to there are a lot more positive texts in corpus

#### validation results

In [29]:
means_test = pd.concat([\
           lstm_base_evas.mean(),\
           lstm_simpler_evas.mean(),\
           lstm_dropout_evas.mean(),\
           lstm_dropout2_evas.mean(),\
           lstm_bn_evas.mean(),\
           lstm_glorot_evas.mean(),\
           lstm_glorot_wo_bn_evas.mean(),\
           lstm_double_evas.mean(),\
           lstm_conv_evas.mean(),\
           lstm_conv1d_evas.mean(),\
           lstm_bidirectional_evas.mean()
          ], axis=1)

In [30]:
means_test.columns = ['base', 'simpler', 'dropout', 'dropout 0.2', 'batch norm', 'glorot', 'glorot_wo_bn', 'double', 'conv', 'conv1d', 'bidirectional']

In [77]:
means_test.transpose().style.highlight_max()

Unnamed: 0,acc,f1,mse,recall
base,0.80617,0.817238,0.19383,0.901163
simpler,0.798227,0.821239,0.201773,0.845267
dropout,0.785887,0.824628,0.214113,0.810059
dropout 0.2,0.81,0.835971,0.19,0.839154
batch norm,0.81227,0.838702,0.18773,0.83863
glorot,0.810355,0.834371,0.189645,0.846984
glorot_wo_bn,0.797305,0.817123,0.202695,0.864239
double,0.780993,0.809816,0.219007,0.858894
conv,0.82617,0.849522,0.17383,0.85608
conv1d,0.819362,0.839427,0.180638,0.867145


In [92]:
means_test.transpose().to_latex()

'\\begin{tabular}{lrrrr}\n\\toprule\n{} &       acc &        f1 &       mse &    recall \\\\\n\\midrule\nbase          &  0.806170 &  0.817238 &  0.193830 &  0.901163 \\\\\nsimpler       &  0.798227 &  0.821239 &  0.201773 &  0.845267 \\\\\ndropout       &  0.785887 &  0.824628 &  0.214113 &  0.810059 \\\\\ndropout 0.2   &  0.810000 &  0.835971 &  0.190000 &  0.839154 \\\\\nbatch norm    &  0.812270 &  0.838702 &  0.187730 &  0.838630 \\\\\nglorot        &  0.810355 &  0.834371 &  0.189645 &  0.846984 \\\\\nglorot\\_wo\\_bn  &  0.797305 &  0.817123 &  0.202695 &  0.864239 \\\\\ndouble        &  0.780993 &  0.809816 &  0.219007 &  0.858894 \\\\\nconv          &  0.826170 &  0.849522 &  0.173830 &  0.856080 \\\\\nconv1d        &  0.819362 &  0.839427 &  0.180638 &  0.867145 \\\\\nbidirectional &  0.816596 &  0.843058 &  0.183404 &  0.839131 \\\\\n\\bottomrule\n\\end{tabular}\n'

#### Test results

In [51]:
preds = pd.concat([\
                   lstm_base_pred,\
                   lstm_simpler_preds,\
                   lstm_dropout_preds,\
                   lstm_dropout2_preds,\
                   lstm_bn_preds,\
                   lstm_glorot_preds,\
                   lstm_glorot_wo_bn_preds,\
                   lstm_double_preds,\
                   lstm_conv_preds,\
                   lstm_conv1d_preds,\
                   lstm_bidirectional_preds\
                  ], axis=1)

In [55]:
preds = preds.applymap(lambda x: 1 if x >= 0.5 else 0)

In [56]:
preds.columns = ['base', 'simpler', 'dropout', 'dropout 0.2', 'batch norm', 'glorot', 'glorot_wo_bn', 'double', 'conv', 'conv1d', 'bidirectional']

In [59]:
preds.shape

(1410, 11)

In [63]:
metrics = []
for p in preds.columns:
    metrics.append(compute_metrics(preds[p], results_test['ls']['real'][0]))

In [90]:
pd.DataFrame(metrics, index = preds.columns).style.highlight_max(axis=0)

Unnamed: 0,acc,f1,mse,recall
base,0.802128,0.803383,0.197872,0.948419
simpler,0.814184,0.832051,0.185816,0.874663
dropout,0.82766,0.845714,0.17234,0.879789
dropout 0.2,0.81844,0.842365,0.18156,0.848635
batch norm,0.822695,0.85119,0.177305,0.829466
glorot,0.815603,0.849711,0.184397,0.805921
glorot_wo_bn,0.839716,0.85804,0.160284,0.882429
double,0.817021,0.827309,0.182979,0.914201
conv,0.789362,0.790106,0.210638,0.936348
conv1d,0.814184,0.849252,0.185816,0.802174


In [91]:
pd.DataFrame(metrics, index = preds.columns).to_latex()

'\\begin{tabular}{lrrrr}\n\\toprule\n{} &       acc &        f1 &       mse &    recall \\\\\n\\midrule\nbase          &  0.802128 &  0.803383 &  0.197872 &  0.948419 \\\\\nsimpler       &  0.814184 &  0.832051 &  0.185816 &  0.874663 \\\\\ndropout       &  0.827660 &  0.845714 &  0.172340 &  0.879789 \\\\\ndropout 0.2   &  0.818440 &  0.842365 &  0.181560 &  0.848635 \\\\\nbatch norm    &  0.822695 &  0.851190 &  0.177305 &  0.829466 \\\\\nglorot        &  0.815603 &  0.849711 &  0.184397 &  0.805921 \\\\\nglorot\\_wo\\_bn  &  0.839716 &  0.858040 &  0.160284 &  0.882429 \\\\\ndouble        &  0.817021 &  0.827309 &  0.182979 &  0.914201 \\\\\nconv          &  0.789362 &  0.790106 &  0.210638 &  0.936348 \\\\\nconv1d        &  0.814184 &  0.849252 &  0.185816 &  0.802174 \\\\\nbidirectional &  0.810638 &  0.824918 &  0.189362 &  0.889675 \\\\\n\\bottomrule\n\\end{tabular}\n'

In [88]:
pd.concat([pd.DataFrame(results_test['lr']['real'][0]), preds ]).iplot(kind='histogram')