In [None]:
import pandas as pd
import numpy as np

# Reading in the data
liwc_train = pd.read_csv("data/data_final/train_data_liwc.csv")
liwc_test = pd.read_csv("data/data_final/test_data_liwc.csv")
y_train = pd.read_csv("data/data_final/train_y.csv").to_numpy()
y_test = pd.read_csv("data/data_final/test_y.csv").to_numpy()

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

rf_base = SVC(random_state = 1)

param_grid = [
      {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
      {'C': [1, 10, 100, 1000], 'degree': [1, 2], 'gamma': [0.1, 0.01, 0.001, 0.0001], 'kernel': ['poly']},
      {'C': [1, 10, 100, 1000], 'degree': [1, 2], 'gamma': [0.1, 0.01, 0.001, 0.0001], 'kernel': ['rbf']},
      {'C': [1, 10, 100, 1000], 'gamma': [0.1, 0.01, 0.001, 0.0001], 'kernel': ['sigmoid']}
     ]

rf_random = GridSearchCV(
        SVC(), param_grid, scoring= 'f1', cv = 3, verbose = 10, n_jobs = -1
        )

rf_random.fit(liwc_train, y_train.ravel())

print(rf_random.best_params_)

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report

test_model = SVC(kernel = 'rbf', C = 1000, gamma = 0.0001, degree = 1)
test_model.fit(liwc_train, y_train.ravel())
y_true, y_pred = y_test.ravel(), test_model.predict(liwc_test)
print(classification_report(y_true, y_pred))

In [None]:
import matplotlib.pyplot as plt
import itertools   
from sklearn.metrics import confusion_matrix
cf = confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, normalize=None)

plt.imshow(cf,cmap=plt.cm.Blues,interpolation='nearest')
plt.colorbar()
plt.xlabel('Predicted')
plt.ylabel('Actual')
tick_marks = np.arange(len(set(y_pred)))
class_labels = ['0','1']
tick_marks
plt.xticks(tick_marks,class_labels)
plt.yticks(tick_marks,class_labels)
thresh = cf.max() / 2.
for i,j in itertools.product(range(cf.shape[0]),range(cf.shape[1])):
    plt.text(j,i,format(cf[i,j],'d'),horizontalalignment='center',color='white' if cf[i,j] >thresh else 'black')
plt.show();