# Evaluation

In [None]:
import pandas as pd
import pickle
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import (confusion_matrix, accuracy_score, 
                            f1_score, recall_score, balanced_accuracy_score, 
                            precision_score, precision_recall_fscore_support)

In [None]:
data = pd.read_parquet('data/test.parquet')
feature_questions = ['dq823','dq789','dq109','dq821','dq8','dq305','dq373','dq420','dq10','dq141','dq9','dq0','dq268','dq198','dq331','dq500','dq296','dq299','dq824','dq95','dq475','dq581','dq294','dq118','dq40','dq725','dq34','dq234','dq136','dq173','dq64','dq117','dq516','dq337','dq579','dq237','dq426','dq657','dq820','dq391','dq348','dq359','dq327','dq379','dq354','dq759','dq470','dq11','dq175','dq60','dq650','dq23','dq586','dq679','dq663','dq158','dq251','dq257','dq307','dq256','dq681','dq270']

In [None]:
X_test = data[feature_questions].dropna()
y_test = data.loc[X_test.index]['political_belief']
feature_encoder = pickle.load(open('models/feature_encoder.pkl', 'rb'))
target_encoder = pickle.load(open('models/target_encoder.pkl', 'rb'))
X_test = feature_encoder.transform(X_test)
y_test = target_encoder.transform(y_test.values.ravel())
print('X_test shape: ', X_test.shape)
print('y_test shape: ', y_test.shape)


In [None]:
model = joblib.load(open('models/rf_downsampled_trn_set.joblib', 'rb'))

In [None]:
y_pred = model.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred, normalize='true', labels=range(4))
accuracy = accuracy_score(y_test, y_pred)
balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
prfs = precision_recall_fscore_support(y_test, y_pred)
print('accuracy', accuracy)
print('balanced accuracy', balanced_accuracy)
print('f1 ', f1)
print('recall ', recall)
print('precision ', precision)
print('among classes: precision, recall, f1, score', prfs)
print(conf_matrix)


In [None]:
prfs = pd.DataFrame(prfs, index=['precision', 'recall', 'f1', 'size'], columns=['centrist', 'conservative', 'liberal', 'other'])

In [None]:
labels = ['Centrist', 'Conservative', 'Liberal', 'Other']

plt.figure(figsize=(8,6), dpi=100)
# Scale up the size of all text
sns.set(font_scale = 1.1)

ax = sns.heatmap(conf_matrix, annot=True )

# set x-axis label and ticks. 
ax.set_xlabel("Predicted Political Orientation", fontsize=14, labelpad=20)
ax.xaxis.set_ticklabels(labels)

# set y-axis label and ticks
ax.set_ylabel("Actual Political Orientation", fontsize=14, labelpad=20)
ax.yaxis.set_ticklabels(labels)

# set plot title
#ax.set_title("", fontsize=14, pad=20)
plt.savefig(f'data/images/ConfusionMatrix.png',bbox_inches='tight')

plt.show()