In [None]:
!python run_classifier.py --task_name=cola --do_train=true --do_eval=true --data_dir=./data/ --vocab_file=./model/vocab.txt --bert_config_file=./model/bert_config.json --init_checkpoint=./model/bert_model.ckpt --max_seq_length=256 --train_batch_size=8 --learning_rate=2e-5 --num_train_epochs=3.0 --save_checkpoints_steps=4000 --output_dir=./output/ --do_lower_case=False


In [None]:
!python run_classifier.py --task_name=cola --do_predict=true --data_dir=./data/ --vocab_file=./model/vocab.txt --bert_config_file=./model/bert_config.json --init_checkpoint=./output/model.ckpt-12000 --max_seq_length=256 --output_dir=./output/


In [None]:
import pandas as pd
df_result = pd.read_csv('output/test_results.tsv', sep='\t', header=None)
df_result.head()

In [None]:
df_test_with_label = pd.read_csv('data/test_2.tsv', sep='\t')

In [None]:
df_predict = pd.DataFrame({'id':df_test_with_label['id'],
                            'labels':df_result.idxmax(axis=1),
                            'text':df_test_with_label['text_clean'],})
df_predict.head()

In [None]:
df_predict.to_csv('data/df_predict.csv',index=False)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, f1_score

In [None]:
y_labels = df_test_with_label['labels']
y_predict = df_predict['labels']

In [None]:
accuracy = accuracy_score(y_labels, y_predict)
print('Accuracy: %f' % accuracy)

precision = precision_score(y_labels, y_predict, average='macro')
print('Precision: %f' % precision)

recall = recall_score(y_labels, y_predict, average='macro')
print('Recall: %f' % recall)

f1 = f1_score(y_labels, y_predict, average='macro')
print('F1 score: %f' % f1)

In [None]:
cm = confusion_matrix(y_labels, y_predict)
print(cm)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt 
%matplotlib inline
import numpy as np

ax= plt.subplot()
sns.heatmap(cm, annot=True, ax = ax, fmt='g', cmap='Greens'); #annot=True to annotate cells

# labels, title and ticks
ax.set_xlabel('Predicted labels'); ax.set_ylabel('True labels'); 
ax.set_title('Confusion Matrix'); 
ax.xaxis.set_ticklabels(['pos', 'neg', 'neu']); 
ax.yaxis.set_ticklabels(['pos', 'neg', 'neu']);

In [None]:
# Normalized
cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
cmn = np.around(cmn, decimals=2)
ax = plt.subplot()
sns.heatmap(cmn, annot=True, ax = ax, fmt='g', cmap='Blues'); #annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted labels'); ax.set_ylabel('True labels'); 
ax.set_title('Normalized Confusion Matrix'); 
ax.xaxis.set_ticklabels(['pos', 'neg', 'neu']); 
ax.yaxis.set_ticklabels(['pos', 'neg', 'neu']);