# Results Analysis 

In [None]:
# Utilities
from joblib import Parallel, delayed
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=False)
from multiprocessing import Pool, cpu_count
import warnings 
warnings.filterwarnings("ignore")
import os
import pandas as pd
import numpy as np
import tqdm
from tqdm import tqdm
tqdm.pandas(desc="progress bar")
import gc
os.environ['TOKENIZERS_PARALLELISM'] = 'true'

# Import time packages 
import time
import datetime

# Packages needed for pre-processing:
from sklearn.preprocessing import MinMaxScaler

# Deep Learning Models 
import torch
import tensorflow as tf

# Visualization Tools
import matplotlib.pyplot as plt
import seaborn as sns 
import matplotlib.pyplot as plt 

# Applying Settings to Viaualization Tools 
plt.rcParams['font.size'] = 13
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14
# Set color palette to blue shades
sns.set_palette(["#003366", "#6699CC", "#99CCFF", "#99CCFF"])
sns.set_palette(["#002855", "#3E7EAA", "#82B5D8", "#B3D9F2"])
plt.rcParams['font.family'] = 'Times New Roman'
sns.set_style("darkgrid")

%load_ext autoreload
%autoreload 2

In [8]:
from utils import search_folder
current_dir = os.getcwd()
thesis_folder_path = current_dir.replace("thesis_code", "")
stock_data_path = search_folder(thesis_folder_path,"stock_prices")
modelling_data_path = search_folder(thesis_folder_path,"modelling_data")
google_trending_path = search_folder(thesis_folder_path,"google_search")
results_folder = search_folder(thesis_folder_path,"results_output")

--- 

## Moderna Results 

In [171]:
moderna_results_df = pd.read_csv(results_folder + "/moderna_results_summary.csv", index_col = [0])
moderna_results_df = moderna_results_df.sort_values(by = "accuracy", ascending = False)
moderna_results_df.head()

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_700,moderna,close,all,rolled_vader_senti_tw_13,xgboost,permutations,0.530067,0.538462,0.528409,0.534231,4.852,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.853098, 13.170958, 13.747823, 14.411941, 1..."
trial_723,moderna,close,all,rolled_finbert_senti_tw_19,xgboost,permutations,0.528261,0.549774,0.527273,0.538803,4.838,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.865635, 13.2151575, 13.94238, 13.944755, 1..."
trial_706,moderna,close,all,rolled_vader_senti_tw_19,xgboost,permutations,0.529279,0.531674,0.527273,0.530474,4.826,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[13.121658, 13.320258, 13.868723, 14.098442, 1..."
trial_28,moderna,close,all,rolled_textblob_tw_sw_271,xgboost,permutations,0.527594,0.540724,0.526136,0.534078,4.849,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.753787, 13.407628, 13.691781, 14.28641, 14..."
trial_458,moderna,close,all,rolled_textblob_eik_twi_senti_11,xgboost,permutations,0.527716,0.538462,0.526136,0.533035,4.843,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.889562, 13.381269, 13.85841, 13.979132, 14..."


In [172]:
moderna_results_df[moderna_results_df["comments"] == "baseline score"]

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_0,moderna,close,all,,xgboost,baseline score,0.512141,0.524887,0.510227,0.518436,4.875,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.894571, 13.140184, 13.619936, 14.064367, 1..."


--- 

## Apple Results 

In [183]:
apple_results_df = pd.read_csv(results_folder + "/apple_results_summary.csv", index_col = [0])
apple_results_df = apple_results_df.sort_values(by = "accuracy", ascending = False)
apple_results_df.head()

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_276,apple,close,all,rolled_finbert_tw_sw_351,xgboost,permutations,0.537827,0.541551,0.511355,0.539683,1.972,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.21833, 39.923134, 39.87051, 39.172104, 39...."
trial_25,apple,close,all,rolled_eik_finbert_senti_sw_131,xgboost regression,permutations,0.537396,0.537396,0.510623,0.537396,1.941,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.237324, 39.953026, 39.847683, 39.237587, 3..."
trial_163,apple,close,all,rolled_eik_vader_senti_sw_281,xgboost,permutations,0.535311,0.524931,0.507692,0.53007,1.965,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.02322, 39.982407, 39.855785, 39.962322, 39..."
trial_358,apple,close,all,rolled_finbert_senti_sw_31,xgboost,permutations,0.53453,0.536011,0.507692,0.53527,1.991,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.22936, 39.92876, 39.84852, 39.21196, 39.90..."
trial_489,apple,close,all,rolled_finbert_senti_tw_201,xgboost,permutations,0.533981,0.533241,0.50696,0.533611,1.961,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.111496, 39.9362, 40.233242, 39.460262, 39...."


In [182]:
apple_results_df[(apple_results_df["comments"] == "baseline score") & (apple_results_df["model"] == "xgboost")].iloc[1:,:]

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_0,apple,close,all,,xgboost,baseline score,0.503516,0.495845,0.474725,0.499651,1.98,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.23484, 39.92794, 39.847237, 39.259945, 39...."


--- 

## Tesla Results 

In [161]:
tesla_results_df = pd.read_csv(results_folder + "/tesla_results_summary.csv", index_col = [0])
tesla_results_df = tesla_results_df.sort_values(by = "f1_score", ascending = False)
tesla_results_df.head()

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_523,tesla,close,all,rolled_finbert_eik_twi_senti_281,xgboost,permutations,0.54418,0.546479,0.526007,0.545327,3.698,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.98509, 24.151989, 24.709486, 24.993444, 25..."
trial_433,tesla,close,all,rolled_eik_vader_senti_sw_141,xgboost,permutations,0.541029,0.547887,0.523077,0.544437,3.706,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.708046, 23.927109, 24.366182, 25.039324, 2..."
trial_37,tesla,close,all,rolled_textblob_tw_sw_361,xgboost,permutations,0.541259,0.54507,0.523077,0.543158,3.676,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.98509, 24.151989, 24.709486, 24.993444, 25..."
trial_409,tesla,close,all,rolled_eik_finbert_senti_sw_281,xgboost,permutations,0.535616,0.550704,0.517949,0.543056,3.702,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.68765, 23.945473, 24.370571, 25.052164, 25..."
trial_413,tesla,close,all,rolled_eik_finbert_senti_sw_321,xgboost,permutations,0.538036,0.547887,0.520147,0.542917,3.661,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.68765, 23.945473, 24.370571, 25.052164, 25..."


In [175]:
tesla_results_df[tesla_results_df["comments"] == "close baseline score"]

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_0,tesla,close,all,,xgboost,close baseline score,0.519553,0.523944,0.500366,0.521739,3.733,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.67142, 23.955599, 24.378054, 25.058353, 25..."
