# Appendix C: METHODOLOGY I – Models with No sentiment scores

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

import seaborn as sns
import matplotlib 
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

In [None]:
# Read the pickle file of the variable selected dataset created in Appendix B, into a dataframe

num_df_features = pd.read_pickle("Variable selected financial dataset")

# View the DataFrame
num_df_features

In [None]:
# Dropping unneccessary columns

num_df_features.drop(['Index','Instrument'],inplace = True, axis=1)

In [None]:
num_df_features['Target/Non-Target'].value_counts()

In [None]:
# Choosing a subset of the data to split in between train and test:

X = num_df_features
y = num_df_features['Target/Non-Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.3, random_state=11)

# BASELINE MODEL: Logistic Regression

In [None]:
# Baseline performance: Logistic regression classifier

lr = LogisticRegression()

# Applying the model to the training data:

lr.fit(X_train,y_train)

# Predict the test model:

labels_lr = lr.predict(X_test)
labels_lr

In [None]:
# Let's evaluate the results with accuracy:

print('Logistic Regression Test Accuracy:', accuracy_score(y_test, labels_lr))
print('Logistic Regression Train Accuracy:', accuracy_score(y_train, lr.predict(X_train)))

# Recall - but also precision, f1-score and support:

print(classification_report(y_test, labels_lr))
print(classification_report(y_train, lr.predict(X_train)))

# Confusion matrix:

mat_lr = confusion_matrix(y_test,labels_lr)
sns.heatmap(mat_lr, square=True, annot=True, fmt="d", cbar=False,
           xticklabels=['Non-Target', 'Target'], yticklabels=['Non-Target', 'Target'])

plt.xlabel('Predicted Label')
plt.ylabel('Label')


In [None]:
# Precision

precision = precision_score(y_test, labels_lr, average=None)
print(precision)

# Recall

recall = recall_score(y_test, labels_lr, average=None)
print(recall)

# F-score
f_score = f1_score(y_test, labels_lr, average=None)
print(f_score)

In [None]:
# Applying nested cross-validation check:

scores = cross_val_score(lr, X, y, cv=10, scoring='accuracy')
print(scores)
print("%0.4f accuracy with a standard deviation of %0.4f" % (scores.mean(), scores.std()))

# THE OTHER CLASSIFICATION MODELS

In [None]:
# Random Forest model

# Creating the regressor object
    
regressor = RandomForestRegressor(n_estimators = 100, random_state = 0)
  
# Applying the model to the training data:

regressor.fit(X_train,y_train)

# Predict the test model:

labels_regressor = regressor.predict(X_test)
labels_regressor


In [None]:
# Let's evalueate the results with accuracy:

print('Random Forest Test Accuracy:', accuracy_score(y_test, labels_regressor))
print('Random Forest Train Accuracy:', accuracy_score(y_train, regressor.predict(X_train)))

# Recall - but also precision, f1-score and support:

print(classification_report(y_test, labels_regressor))
print(classification_report(y_train, regressor.predict(X_train)))

# Confusion matrix:

mat_regressor = confusion_matrix(y_test,labels_regressor)
sns.heatmap(mat_regressor, square=True, annot=True, fmt="d", cbar=False,
           xticklabels=['Non-Target', 'Target'], yticklabels=['Non-Target', 'Target'])

plt.xlabel('Predicted Label')
plt.ylabel('Label')

In [None]:
# Precision

precision = precision_score(y_test, labels_regressor, average=None)
print(precision)

# Recall

recall = recall_score(y_test, labels_regressor, average=None)
print(recall)

# F-score
f_score = f1_score(y_test, labels_regressor, average=None)
print(f_score)

In [None]:
# Neural Network (NN)

# Building the classifier

mlp = MLPClassifier(hidden_layer_sizes=(13,13,13),max_iter=500)

# Applying the model to the training data:

mlp.fit(X_train,y_train)

# Predict the test model:

labels_mlp = mlp.predict(X_test)
labels_mlp

In [None]:
# Let's evalueate the results with accuracy:

print('NN Test Accuracy:', accuracy_score(y_test, labels_mlp))
print('NN Train Accuracy:', accuracy_score(y_train, mlp.predict(X_train)))

# Recall - but also precision, f1-score and support:

print(classification_report(y_test, labels_mlp))
print(classification_report(y_train, mlp.predict(X_train)))

# Confusion matrix:

mat_mlp = confusion_matrix(y_test,labels_mlp)
sns.heatmap(mat_mlp, square=True, annot=True, fmt="d", cbar=False,
           xticklabels=['Non-Target', 'Target'], yticklabels=['Non-Target', 'Target'])

plt.xlabel('Predicted Label')
plt.ylabel('Label')



In [None]:
# Precision

precision = precision_score(y_test, labels_mlp, average=None)
print(precision)

# Recall

recall = recall_score(y_test, labels_mlp, average=None)
print(recall)

# F-score
f_score = f1_score(y_test, labels_mlp, average=None)
print(f_score)

In [None]:
# Applying nested cross-validation check:
scores_mlp = cross_val_score(mlp, X, y, cv=10, scoring='accuracy')
print(scores_mlp)
print("%0.4f accuracy with a standard deviation of %0.4f" % (scores_mlp.mean(), scores_mlp.std()))

In [None]:
# Decision Tree

# Building the model

dt = DecisionTreeClassifier(criterion='entropy')

# Applying the model to the training data:

dt.fit(X_train,y_train)

# Predict the test model:

labels_dt = dt.predict(X_test)
labels_dt

In [None]:
# Let's evalueate the results with accuracy:

print('Decision Tree Test Accuracy:', accuracy_score(y_test, labels_dt))
print('Decision Tree Train Accuracy:', accuracy_score(y_train, dt.predict(X_train)))

# Recall - but also precision, f1-score and support:

print(classification_report(y_test, labels_dt))
print(classification_report(y_train, dt.predict(X_train)))

# Confusion matrix:

mat_dt = confusion_matrix(y_test,labels_dt)
sns.heatmap(mat_dt, square=True, annot=True, fmt="d", cbar=False,
           xticklabels=['Non-Target', 'Target'], yticklabels=['Non-Target', 'Target'])

plt.xlabel('Predicted Label')
plt.ylabel('Label')



In [None]:
# Precision

precision = precision_score(y_test, labels_dt, average=None)
print(precision)

# Recall

recall = recall_score(y_test, labels_dt, average=None)
print(recall)

# F-score
f_score = f1_score(y_test, labels_dt, average=None)
print(f_score)

In [None]:
# Support Vector Machine (SVM)

# Building the linear Support Vector Machine Classifier

Svm = LinearSVC(dual = False, random_state = 0, penalty = 'l1',tol = 1e-5)

Svm.fit(X_train,y_train) 

# Predict the test model:

labels_svm = Svm.predict(X_test)
labels_svm

In [None]:
# Let's evalueate the results with accuracy:

print('SVM Test Accuracy:', accuracy_score(y_test, labels_svm))
print('SVM Train Accuracy:', accuracy_score(y_train, Svm.predict(X_train)))

# Recall - but also precision, f1-score and support:

print(classification_report(y_test, labels_svm))
print(classification_report(y_train, Svm.predict(X_train)))

# Confusion matrix:

mat_svm = confusion_matrix(y_test,labels_svm)
sns.heatmap(mat_svm, square=True, annot=True, fmt="d", cbar=False,
           xticklabels=['Non-Terget', 'Terget'], yticklabels=['Non-Terget', 'Terget'])

plt.xlabel('Predicted Label')
plt.ylabel('Label')


In [None]:
# Precision

precision = precision_score(y_test, labels_svm, average=None)
print(precision)

# Recall

recall = recall_score(y_test, labels_svm, average=None)
print(recall)

# F-score
f_score = f1_score(y_test, labels_svm, average=None)
print(f_score)