## Naieve Bayes Model for Sentiment Analysis Predictions

In [1]:
#reset (Code allows to me to run the notebook without errors when making a change)
%rm -rf stonks/

In [2]:
# import required packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import *
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB # The Naive Bayes algo

In [3]:
## Retrieving Master CSV data from github for all News Outlet Averages
%%capture
!git clone https://github.com/IS737StockPicker/stonks.git

In [4]:
# Loading the data
master_data_CSV = pd.read_csv('/content/stonks/master_data.csv')
master_data_CSV

Unnamed: 0,Date,XLE,XLF,XLU,XLI,XLK,XLV,XLY,IYR,AAPL,...,Huff_headline_min,Huff_body_min,NYT_headline_mean,NYT_headline_median,NYT_body_mean,NYT_body_median,NYT_headline_max,NYT_headline_min,NYT_body_max,NYT_body_min
0,2022-03-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.80,-0.67,-0.05,0.0,-0.03,0.00,0.88,-0.73,0.91,-0.96
1,2022-03-15,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,...,-0.76,-0.85,-0.09,0.0,0.02,0.00,0.84,-0.86,0.88,-0.91
2,2022-03-16,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.25,-0.56,-0.06,0.0,-0.00,0.00,0.86,-0.80,0.88,-0.90
3,2022-03-17,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.90,-0.49,-0.02,0.0,-0.05,0.00,0.61,-0.84,0.82,-0.89
4,2022-03-18,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.60,-0.76,-0.06,0.0,-0.05,-0.08,0.83,-0.82,0.86,-0.97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,2022-09-07,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.78,-0.57,0.09,0.0,-0.07,0.00,0.65,-0.86,0.84,-0.93
121,2022-09-08,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,...,-0.77,-0.83,-0.04,0.0,-0.02,0.00,0.75,-0.90,0.92,-0.93
122,2022-09-09,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.60,0.00,-0.01,0.0,0.14,0.14,0.81,-0.80,0.85,-0.91
123,2022-09-12,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.79,0.00,-0.10,0.0,-0.06,-0.13,0.75,-0.90,0.89,-0.89


In [5]:
#Seperating Tickers from CSV Dataset
tickers = master_data_CSV.iloc[:,:15]
tickers = tickers.drop(columns=['Date'])
tickers

Unnamed: 0,XLE,XLF,XLU,XLI,XLK,XLV,XLY,IYR,AAPL,XLB,XLP,SPY,^DJI,NDX
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
2,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
121,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
122,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
123,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0


In [6]:
#Gettting the Average across News Outlets for each measure (Avg, median, min, max) to get model accuracy for each ticker


#1 Headline Median Avg for common indexes 
accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_headline_median','Huff_headline_median','NYT_headline_median']
  accuracy_df = master_data_CSV[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round (2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                  0.46
XLF                                  0.49
XLU                                  0.46
XLI                                   0.6
XLK                                  0.54
XLV                                   0.6
XLY                                  0.49
IYR                                   0.4
AAPL                                 0.49
XLB                                  0.37
XLP                                  0.51
SPY                                   0.6
^DJI                                 0.54
NDX                                  0.51
Total Accuracy Score                 7.06


In [7]:
#2 Headline Mean Avg for indexes 
accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_headline_mean','Huff_headline_mean','NYT_headline_mean']
  accuracy_df = master_data_CSV[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round (2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                  0.26
XLF                                   0.6
XLU                                  0.51
XLI                                   0.6
XLK                                  0.51
XLV                                   0.6
XLY                                  0.49
IYR                                   0.4
AAPL                                 0.51
XLB                                  0.51
XLP                                  0.51
SPY                                  0.57
^DJI                                 0.57
NDX                                  0.49
Total Accuracy Score                 7.13


In [8]:
#3 Body Median Avg for indexes 
accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_body_median','Huff_body_median','NYT_body_median']
  accuracy_df = master_data_CSV[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round (2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                  0.46
XLF                                  0.31
XLU                                  0.49
XLI                                  0.51
XLK                                  0.54
XLV                                   0.4
XLY                                  0.54
IYR                                  0.54
AAPL                                 0.46
XLB                                  0.31
XLP                                  0.51
SPY                                  0.51
^DJI                                 0.46
NDX                                  0.51
Total Accuracy Score                 6.55


In [9]:
#4 Body Mean Avg for indexes
accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_body_mean','Huff_body_mean','NYT_body_mean']
  accuracy_df = master_data_CSV[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round (2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                   0.4
XLF                                   0.4
XLU                                  0.57
XLI                                  0.51
XLK                                  0.54
XLV                                  0.63
XLY                                  0.54
IYR                                  0.31
AAPL                                 0.46
XLB                                  0.26
XLP                                  0.43
SPY                                  0.46
^DJI                                 0.46
NDX                                  0.46
Total Accuracy Score                 6.43


In [10]:
#5 Min/Mix headline avg 

accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_headline_min','guardian_headline_max','Huff_headline_min','Huff_headline_max','NYT_headline_min','NYT_headline_max']
  accuracy_df = master_data_CSV[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round(2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                  0.49
XLF                                  0.51
XLU                                  0.57
XLI                                   0.6
XLK                                  0.43
XLV                                  0.57
XLY                                  0.37
IYR                                  0.37
AAPL                                 0.49
XLB                                   0.6
XLP                                  0.54
SPY                                   0.6
^DJI                                 0.54
NDX                                  0.46
Total Accuracy Score                 7.14


In [11]:
#6 Min/Mix body avg

accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_body_min','guardian_body_max','Huff_body_min','Huff_body_max','NYT_body_min','NYT_body_max']
  accuracy_df = master_data_CSV[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round (2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                  0.49
XLF                                  0.54
XLU                                  0.46
XLI                                   0.6
XLK                                  0.51
XLV                                  0.51
XLY                                  0.54
IYR                                   0.6
AAPL                                 0.46
XLB                                   0.4
XLP                                  0.49
SPY                                  0.57
^DJI                                 0.51
NDX                                  0.57
Total Accuracy Score                 7.25


## Using Naive Bayes Model to predict future stock prices

In [12]:
# Loading future data set
master_data_future = pd.read_csv('/content/stonks/master_data_future.csv')
master_data_future

Unnamed: 0,Date,XLE,XLF,XLU,XLI,XLK,XLV,XLY,IYR,AAPL,...,Huff_headline_min,Huff_body_min,NYT_headline_mean,NYT_headline_median,NYT_body_mean,NYT_body_median,NYT_headline_max,NYT_headline_min,NYT_body_max,NYT_body_min
0,2022-03-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,...,-0.80,-0.67,-0.05,0.0,-0.03,0.00,0.88,-0.73,0.91,-0.96
1,2022-03-15,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.76,-0.85,-0.09,0.0,0.02,0.00,0.84,-0.86,0.88,-0.91
2,2022-03-16,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.25,-0.56,-0.06,0.0,-0.00,0.00,0.86,-0.80,0.88,-0.90
3,2022-03-17,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.90,-0.49,-0.02,0.0,-0.05,0.00,0.61,-0.84,0.82,-0.89
4,2022-03-18,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,...,-0.60,-0.76,-0.06,0.0,-0.05,-0.08,0.83,-0.82,0.86,-0.97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,2022-09-07,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,...,-0.78,-0.57,0.09,0.0,-0.07,0.00,0.65,-0.86,0.84,-0.93
121,2022-09-08,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.77,-0.83,-0.04,0.0,-0.02,0.00,0.75,-0.90,0.92,-0.93
122,2022-09-09,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.60,0.00,-0.01,0.0,0.14,0.14,0.81,-0.80,0.85,-0.91
123,2022-09-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.79,0.00,-0.10,0.0,-0.06,-0.13,0.75,-0.90,0.89,-0.89


In [13]:
#Seperating Tickers from CSV Dataset
future_tickers = master_data_future.iloc[:,:15]
future_tickers = future_tickers.drop(columns=['Date'])
future_tickers

Unnamed: 0,XLE,XLF,XLU,XLI,XLK,XLV,XLY,IYR,AAPL,XLB,XLP,SPY,^DJI,NDX
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
1,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
121,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
122,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
123,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
#Gettting the Average across News Outlets for each measure (Avg, median, min, max) to get model accuracy for each ticker


#7 Future Headline Median Avg
accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_headline_median','Huff_headline_median','NYT_headline_median']
  accuracy_df = master_data_future[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round (2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                  0.51
XLF                                  0.54
XLU                                  0.51
XLI                                  0.57
XLK                                   0.6
XLV                                  0.54
XLY                                   0.4
IYR                                  0.46
AAPL                                  0.6
XLB                                  0.49
XLP                                   0.4
SPY                                  0.63
^DJI                                  0.6
NDX                                   0.4
Total Accuracy Score                 7.25


In [15]:
#8 Future Headline Mean Avg
accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_headline_mean','Huff_headline_mean','NYT_headline_mean']
  accuracy_df = master_data_future[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round (2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                  0.37
XLF                                  0.57
XLU                                  0.51
XLI                                  0.57
XLK                                  0.51
XLV                                  0.66
XLY                                  0.43
IYR                                  0.54
AAPL                                 0.51
XLB                                   0.6
XLP                                  0.37
SPY                                  0.54
^DJI                                 0.57
NDX                                  0.63
Total Accuracy Score                 7.38


In [16]:
#9 Future Body Median Avg
accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_body_median','Huff_body_median','NYT_body_median']
  accuracy_df = master_data_future[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round (2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                  0.37
XLF                                  0.54
XLU                                  0.57
XLI                                  0.57
XLK                                  0.54
XLV                                  0.29
XLY                                  0.49
IYR                                  0.54
AAPL                                 0.57
XLB                                  0.46
XLP                                  0.37
SPY                                  0.63
^DJI                                 0.46
NDX                                  0.43
Total Accuracy Score                 6.83


In [17]:
#10 Future Body Mean Avg
accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_body_mean','Huff_body_mean','NYT_body_mean']
  accuracy_df = master_data_future[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round (2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                  0.43
XLF                                   0.6
XLU                                  0.51
XLI                                  0.57
XLK                                  0.54
XLV                                  0.57
XLY                                  0.31
IYR                                  0.54
AAPL                                 0.43
XLB                                  0.49
XLP                                  0.54
SPY                                  0.63
^DJI                                  0.6
NDX                                  0.37
Total Accuracy Score                 7.13


In [18]:
 #11 Future Min/Mix headline avg

accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_headline_min','guardian_headline_max','Huff_headline_min','Huff_headline_max','NYT_headline_min','NYT_headline_max']
  accuracy_df = master_data_future[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round (2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                  0.57
XLF                                  0.49
XLU                                  0.51
XLI                                  0.57
XLK                                  0.37
XLV                                  0.66
XLY                                  0.37
IYR                                  0.46
AAPL                                  0.6
XLB                                  0.43
XLP                                  0.37
SPY                                  0.63
^DJI                                 0.37
NDX                                  0.37
Total Accuracy Score                 6.77


In [19]:
#12 Future Min/Mix body avg

accuracy_scores={}
for ticker in tickers.columns:
  selected_var = [ticker,'guardian_body_min','guardian_body_max','Huff_body_min','Huff_body_max','NYT_body_min','NYT_body_max']
  accuracy_df = master_data_future[selected_var]
  y = accuracy_df[ticker]
  X = accuracy_df.drop(columns=[ticker])
  X_train, X_valid, y_train, y_valid = train_test_split (X,y, test_size = 0.28, random_state = 5)
  stock_nnet = BernoulliNB(force_alpha=True)
  stock_nnet.fit(X_train, y_train.ravel())
  y_pred = stock_nnet.predict(X_valid)
  accuracy_scores[ticker]= accuracy_score(y_valid, y_pred).round (2)
accuracy_scores

accuracy_scores_sum = sum(accuracy_scores.values())
accuracy_scores['Total Accuracy Score'] = accuracy_scores_sum.round(2)

print("Accuracy Scores:")
for ticker, score in accuracy_scores.items():
    print("{:<30} {:>10}".format(ticker, score))

Accuracy Scores:
XLE                                  0.63
XLF                                  0.51
XLU                                  0.57
XLI                                  0.57
XLK                                  0.43
XLV                                  0.57
XLY                                  0.43
IYR                                  0.31
AAPL                                 0.51
XLB                                  0.57
XLP                                  0.43
SPY                                  0.63
^DJI                                 0.51
NDX                                  0.43
Total Accuracy Score                  7.1
