In [14]:
import pandas as pd
import numpy as np
import os
from scipy.stats import wilcoxon, mannwhitneyu
import json

In [31]:
directory = os.getcwd()

In [32]:
apple_scores = {}

In [33]:
for filename in os.listdir(directory):
    if filename.endswith('.json'):
        file_path = os.path.join(directory, filename)
        if 'apple' in filename and 'test' not in filename:
            with open(file_path) as file:
                json_data = json.load(file)
                apple_scores[filename[:-5]] = json_data

In [34]:
rmse_scores = pd.DataFrame()
mae_scores = pd.DataFrame()
accuracy_scores = pd.DataFrame()

In [36]:
for key1, item1 in apple_scores.items():
    for key2, item2 in item1.items():
        if 'lstm' in key1:
            rmse_scores.at['LSTM', key2] = format(np.mean(item2[1]),".2f")
            mae_scores.at['LSTM', key2] = format(np.mean(item2[2]),".2f")
            accuracy_scores.at['LSTM', key2] = format(np.mean(item2[3]),".2f")
        elif 'gru' in key1:
            rmse_scores.at['GRU', key2] = format(np.mean(item2[1]),".2f")
            mae_scores.at['GRU', key2] = format(np.mean(item2[2]),".2f")
            accuracy_scores.at['GRU', key2] = format(np.mean(item2[3]),".2f")
        else:
            rmse_scores.at['AdaBoost', key2] = format(np.mean(item2[1]),".2f")
            mae_scores.at['AdaBoost', key2] = format(np.mean(item2[2]),".2f")
            accuracy_scores.at['AdaBoost', key2] = format(np.mean(item2[3]),".2f")

In [132]:
rmse_scores

Unnamed: 0,Prices,Prices + Tweets,Prices + News,Prices + Political News,Prices + Tweets + News,Prices + Tweets + Political News,Prices + News + Political News,Prices + Tweets + News + Political News
LSTM,4.87,4.5,4.41,5.12,4.43,5.11,5.05,5.02
AdaBoost,5.3,5.28,5.25,5.32,5.26,5.3,5.26,5.28
GRU,2.87,2.23,2.27,3.47,2.12,3.01,2.31,3.12


In [133]:
mae_scores

Unnamed: 0,Prices,Prices + Tweets,Prices + News,Prices + Political News,Prices + Tweets + News,Prices + Tweets + Political News,Prices + News + Political News,Prices + Tweets + News + Political News
LSTM,3.67,3.06,3.19,3.39,3.04,3.31,3.38,3.61
AdaBoost,2.9,2.86,2.85,2.87,2.85,2.88,2.87,2.88
GRU,2.3,1.75,1.68,2.34,1.88,2.29,2.61,2.36


In [134]:
accuracy_scores

Unnamed: 0,Prices,Prices + Tweets,Prices + News,Prices + Political News,Prices + Tweets + News,Prices + Tweets + Political News,Prices + News + Political News,Prices + Tweets + News + Political News
LSTM,0.51,0.55,0.54,0.51,0.55,0.49,0.52,0.5
AdaBoost,0.49,0.5,0.47,0.49,0.48,0.49,0.48,0.48
GRU,0.53,0.56,0.55,0.53,0.56,0.53,0.54,0.51


In [40]:
#Wilcoxon Signed Rank Test

def wilcoxon_signed_rank_test(arr1, arr2, metric):
  if metric != 'accuracy':
    statistic, p_value = wilcoxon(arr1, arr2, alternative='greater')
  else:
    statistic, p_value = wilcoxon(arr1, arr2, alternative='less')
  alpha = 0.05
  if p_value < alpha:
    return False #there is a difference
  return True #no difference

In [41]:
#Mann-Whitney U test

def mannwhitneyu_test(arr1, arr2, metric):
  if metric != 'accuracy':
    statistic, p_value = mannwhitneyu(arr1, arr2, alternative='greater')
  else:
    statistic, p_value = mannwhitneyu(arr1, arr2, alternative='less')
  alpha = 0.05
  if p_value < alpha:
    return False #there is a difference
  return True #no difference

In [42]:
all_scores = {}

In [43]:
for filename in os.listdir(directory):
    if filename.endswith('.json'):
        file_path = os.path.join(directory, filename)
        if 'test' not in filename:
            with open(file_path) as file:
                    json_data = json.load(file)
                    all_scores[filename[:-5]] = json_data

In [136]:
for company_model, model_results in all_scores.items():
    test_results = {
    'RMSE Wilcoxon' : [],
    'MAE Wilcoxon' : [],
    'Accuracy Wilcoxon' : [],
    'RMSE Mann-Whitney' : [],
    'MAE Mann-Whitney' : [],
    'Accuracy Mann-Whitney' : []
        }
    baseline_rmse = model_results['Prices'][1]
    for key, item in model_results.items():
      if key != 'Prices':
        wilcoxon_rmse_result = wilcoxon_signed_rank_test(baseline_rmse, item[1], 'rmse')


        mannwhitneyu_rmse_result = mannwhitneyu_test(baseline_rmse, item[1], 'rmse')


        test_results['RMSE Wilcoxon'].append(['Prices', key, wilcoxon_rmse_result])
        test_results['RMSE Mann-Whitney'].append(['Prices', key, mannwhitneyu_rmse_result])
    
    baseline_rmse = model_results['Prices'][2]
    for key, item in model_results.items():
      if key != 'Prices':
        wilcoxon_rmse_result = wilcoxon_signed_rank_test(baseline_rmse, item[2], 'mae')


        mannwhitneyu_rmse_result = mannwhitneyu_test(baseline_rmse, item[2], 'mae')

        test_results['MAE Wilcoxon'].append(['Prices', key, wilcoxon_rmse_result])
        test_results['MAE Mann-Whitney'].append(['Prices', key, mannwhitneyu_rmse_result])


    baseline_rmse = model_results['Prices'][3]
    for key, item in model_results.items():
      if key != 'Prices':
        wilcoxon_rmse_result = wilcoxon_signed_rank_test(baseline_rmse, item[3], 'accuracy')


        mannwhitneyu_rmse_result = mannwhitneyu_test(baseline_rmse, item[3], 'accuracy')


        test_results['Accuracy Wilcoxon'].append(['Prices', key, wilcoxon_rmse_result])
        test_results['Accuracy Mann-Whitney'].append(['Prices', key, mannwhitneyu_rmse_result])
    with open(f'{company_model} test.json', "w") as file:
        json.dump(test_results, file)

In [137]:
test_scores = {}

In [138]:
for filename in os.listdir(directory):
    if filename.endswith('.json'):
        file_path = os.path.join(directory, filename)
        if 'test' in filename:
            with open(file_path) as file:
                    json_data = json.load(file)
                    test_scores[filename[:-5]] = json_data

In [139]:
wilcoxon_results = pd.DataFrame(0, index=['RMSE', 'MAE', 'Accuracy'], columns=list(accuracy_scores.columns))
mann_whitney_results = pd.DataFrame(0, index=['RMSE', 'MAE', 'Accuracy'], columns=list(accuracy_scores.columns))

In [140]:
for company_model, company_model_res in test_scores.items():
    for metric_test, test_res in company_model_res.items():
        for res in test_res:
            if metric_test.split(' ')[1] == 'Wilcoxon' and res[2] == False:
                wilcoxon_results.at[metric_test.split(' ')[0], res[1]] += 1

In [141]:
for company_model, company_model_res in test_scores.items():
    for metric_test, test_res in company_model_res.items():
        for res in test_res:
            if metric_test.split(' ')[1] == 'Mann-Whitney' and res[2] == False:
                mann_whitney_results.at[metric_test.split(' ')[0], res[1]] += 1

In [122]:
wilcoxon_results.drop(columns='Prices')

Unnamed: 0,Prices + Tweets,Prices + News,Prices + Political News,Prices + Tweets + News,Prices + Tweets + Political News,Prices + News + Political News,Prices + Tweets + News + Political News
RMSE,13,9,2,10,5,5,5
MAE,14,9,1,8,6,4,4
Accuracy,13,8,3,8,5,4,4


In [124]:
mann_whitney_results.drop(columns='Prices')

Unnamed: 0,Prices + Tweets,Prices + News,Prices + Political News,Prices + Tweets + News,Prices + Tweets + Political News,Prices + News + Political News,Prices + Tweets + News + Political News
RMSE,15,8,2,10,4,4,5
MAE,14,8,2,10,5,5,5
Accuracy,13,8,3,8,6,4,4
