# Import Libraries

In [1]:
import os,sys,re,glob,ipykernel,tweepy,stockmarket,nltk,collections,itertools,pandas as pd,numpy as np,\
        seaborn as sns, yfinance as yf, matplotlib.pyplot as plt, statsmodels.formula.api as smf,\
        statsmodels.api as sm, autoreload, importlib
from pathlib import Path
from string import punctuation 
from datetime import date
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
np.random.seed(0)
pd.set_option('display.max_columns', None)

## Set Working Directory: 
    
* /Sentiment_Analysis 
    
* __ file __ isn't available in jupyter notebooks

    

In [2]:
file = os.getcwd().split(os.sep)
while(file[-1] != 'Sentiment_Analysis'): # Check the working directory
    os.chdir('..')
    file = os.getcwd().split(os.sep)
    sys.path.append(os.path.abspath(os.getcwd()))
print(f"root directory: {os.getcwd()}", sep = '\n')

root directory: c:\Code\Public_Github\Sentiment_Analysis


## Load Custom Functions

In [3]:
from src import user_download_helper, user_download, merge_files, merge_all, \
                strip_all_words, sentence_word_probability, download_todays_test, \
                format_model,linear_model, naive_bayes, create_target, normalize_columns, normalize_columns_target

# Twitter API Credentials

In [4]:
# Read in keys from a csv file
autentication_path = os.path.abspath('../Sentiment_Analysis/Stock_Market/authentication/authentication_tokens.csv')
readin_authentication = pd.read_csv(autentication_path, header=0, sep=',')

consumer_key = readin_authentication['consumer_key'][0]
consumer_secret = readin_authentication['consumer_secret'][0]
access_token = readin_authentication['access_token'][0]
access_token_secret = readin_authentication['access_token_secret'][0]
bearer_token = readin_authentication['beaker_token'][0]

# connect to twitter application 
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
redirect_url = auth.get_authorization_url()
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit = True)

# Load Twitter Usernames

    Note:
    * Unvarified user's are not a problem, no one user can have the same ID
    
<div style="padding-left: 15px;">

| Removed User's | reason | 
| ------------ | ------------- |
|DayTradeWarrior|account removed |
|AswathDamodaran |2013-06-19 |
|cstewartcfa_twitter |2013-06-19|
|BobPisani_twitter |2015-11-04| 
|elonmusk|private|
</div>



In [5]:
with open(os.path.normpath(os.getcwd() + '/Stock_Market/user_list/user_list.xlsx'), 'rb') as f:
    user_df = pd.read_excel(f, sheet_name='user_names')
    user_df = user_df.where(pd.notnull(user_df), '')
    f.close()
groups = list(user_df.columns)
user_df

Unnamed: 0,short_term,long_term,controversial
0,DanZanger,jimcramer,JeffBezos
1,prrobbins,KennethLFisher,BillGates
2,markminervini,lei_zhang_lz,
3,bsc_daily,realwillmeade,
4,MITickWatcher,RayDalio,
5,OptionsProVol,GRDecter,
6,script_crypto,andrewrsorkin,
7,MarketMagnifier,EconguyRosie,
8,TwentyonTwenty_,,
9,WatcherGuru,,


## Download Tweets

### WARNING ~ 4 minutes
    If previously loaded SKIP to CHECKPOINT 
    * Download User tweets into csv spreadsheets 

- ( Tweepy limit of 3200 tweets per user )
    

In [6]:
for group in groups:
    print(f"\n{group}:\n")
    users = list(user_df[group][user_df[group]!= ''])
    user_download(api, users, group)
    print(f"")


short_term:

DanZanger prrobbins markminervini bsc_daily MITickWatcher OptionsProVol script_crypto MarketMagnifier TwentyonTwenty_ WatcherGuru DipFinding MacroCharts techbudsolution eWhispers HindenburgRes JehoshaphatRsch ResearchGrizzly biancoresearch muddywatersre 

long_term:

jimcramer KennethLFisher lei_zhang_lz realwillmeade RayDalio GRDecter andrewrsorkin EconguyRosie 

controversial:

JeffBezos BillGates 


## Merge Tweets

In [81]:
merge = []
for group in groups:
    merge.append(merge_files(group, display = 0))
df_all = merge_all('merge/merged_twitter_users', display = 0)

size of merged data sets of short_term: (46000, 7)
size of merged data sets of long_term: (22574, 7)
size of merged data sets of controversial: (3388, 7)
size of merged data sets of merged_twitter_users: (71962, 7)


In [82]:
df_all.head(2)

Unnamed: 0,id,created_at,user,favorite_count,retweet_count,url,text
0,1621286339588112385,2023-02-02 18:16:00-05:00,WatcherGuru_twitter,29,2,https://twitter.com/i/web/status/1621286339588...,
1,1621262627551821825,2023-02-02 16:41:46-05:00,WatcherGuru_twitter,832,120,https://twitter.com/i/web/status/1621262627551...,JUST IN Apple AAPL CEO Tim Cook says the compa...


In [83]:
display(df_all.info(verbose = True, null_counts = None, show_counts=None))

<class 'pandas.core.frame.DataFrame'>
Int64Index: 71962 entries, 0 to 3387
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id              71962 non-null  int64 
 1   created_at      71962 non-null  object
 2   user            71962 non-null  object
 3   favorite_count  71962 non-null  int64 
 4   retweet_count   71962 non-null  int64 
 5   url             71962 non-null  object
 6   text            70084 non-null  object
dtypes: int64(3), object(4)
memory usage: 4.4+ MB


None

- Some users have infrequent tweets and span the 3600 limit over 10 years

In [84]:
df_all.groupby('user')['created_at'].min().sort_values(ascending= True).head(5)

user
BillGates_twitter          2013-07-03 13:04:11-04:00
JeffBezos_twitter          2015-11-24 06:14:26-05:00
DanZanger_twitter          2017-04-26 11:09:50-04:00
techbudsolution_twitter    2017-04-30 12:25:18-04:00
HindenburgRes_twitter      2017-07-31 13:54:11-04:00
Name: created_at, dtype: object

## Drop Old Tweets
- Keep 2017 - 2023

In [91]:
threshold = '2017-01-01'
df_all_upperbound = df_all[df_all.created_at > threshold]
df_all_upperbound.tail(5)

Unnamed: 0,id,created_at,user,favorite_count,retweet_count,url,text
2143,819411084817924096,2017-01-12 00:10:01-05:00,BillGates_twitter,5352,1724,https://twitter.com/i/web/status/8194110848179...,Here are five things that make me more optimis...
2144,818672177537028096,2017-01-09 23:13:51-05:00,BillGates_twitter,5983,2187,https://twitter.com/i/web/status/8186721775370...,Theres a lot to be optimistic about in 2017lif...
2145,818130649626382336,2017-01-08 11:22:01-05:00,BillGates_twitter,4953,1210,https://twitter.com/i/web/status/8181306496263...,I had a first in 2016I sniffed poop perfume Tr...
2146,817743090526121984,2017-01-07 09:42:00-05:00,BillGates_twitter,7296,2227,https://twitter.com/i/web/status/8177430905261...,I got to learn about this fascinating HIVpreve...
2147,817001964487774208,2017-01-05 08:37:02-05:00,BillGates_twitter,8285,2243,https://twitter.com/i/web/status/8170019644877...,When I was in my 20s and early 30s I was fanat...


In [92]:
# Adding nonessential twitter words to remove
stop = nltk.corpus.stopwords.words("english") 
twitter_nonessential_words = ['twitter', 'birds','lists','list', 'source','just','am','pm'\
                              'a','b','c','d','e','f','g','h','i','j','k','l','m','n',\
                              'n','o','p','q','r','s','t','u','v','w','x','y','z']
stop.extend(twitter_nonessential_words) # merge two lists together
stop = sorted(list( dict.fromkeys(stop) )) # remove duplicates

### Create dictionarys of words 
* Remove unnecessary words
* Generate frequency of words per sentence

In [93]:
df_all_words = strip_all_words(df_all_upperbound, stop)
df_all_words_count = df_all_words.explode().replace("", np.nan, regex=True).dropna() # drop NAN's and empty words
all_count = df_all_words_count.value_counts()

In [94]:
print(f"Tweets of Dictionaries: {len(df_all_words)}")
print(f"all words: {len(df_all_words_count)}")
print(f"Dictionary of all words: {len(all_count)}")

Tweets of Dictionaries: 70649
all words: 1007966
Dictionary of all words: 44610


* Nan are tweets w/ images
* ',' are words removed with special cases

In [105]:
print(f"All the words in each individual Sentence:\n{df_all_words[0:5]}")

All the words in each individual Sentence:
0                                                  NaN
1    [, apple, aapl, ceo, tim, cook, says, company,...
2    [, apple, aapl, reports, billion, revenue, exp...
3    [, berkshire, hathaways, charlie, munger, urge...
4    [, billionaire, investor, ray, dalio, says, cr...
Name: text, dtype: object


In [95]:
print(f"5 words from dictionary of all words:\n{all_count[0:5]}", end='\n\n')

5 words from dictionary of all words:
stocks    8781
stock     8692
today     6051
score     5853
top       5620
Name: text, dtype: int64



# Probability small example

p = count(particular word in sentence) / (total particular word in all sentences) * 100 / (total of all unique words)

d{  hat:1, sandwich:2, lemon:1, orange:1, snorkle:1 }

n = LEN(d.KEYS())  -> n = 5

Tweet1: hat sandwich lemon 

Tweet2: snorkle sandwich orange 

Tweet1:

-> 1/1 * 100 + 1/2 * 100 + 1/1 * 100   
-> 100 + 50 + 100 = 250
-> 250/5 = 50%

Tweet2:

-> 1/1 * 100 + 1/2 * 100 + 1/1 * 100 
-> 100 + 50 + 100 
-> 250/5 = 50%

## Probability of individual tweets

In [112]:
# Probabilities
sentence_list, total_probability, individual_probability = sentence_word_probability(all_count, df_all_words)
print(f'sum of probability column = {sum(total_probability)}')

sum of probability column = 99.99999999999787


In [113]:
df_all_prob = df_all_upperbound.reset_index()
df_all_prob['frequency'] = sentence_list
df_all_prob['probability'] = total_probability
df_all_prob = df_all_prob.dropna()
df_all_prob.insert(loc = 0, column = 'date', value = pd.to_datetime(df_all_prob['created_at']).apply(lambda x: x.strftime('%Y-%m-%d')))
df_all_prob.date = pd.to_datetime(df_all_prob['date'], format='%Y-%m-%d')
df_all_prob = df_all_prob.sort_values(by=['date'], ascending=False).drop(columns=['index'])

In [114]:
df_all_prob.head(2)

Unnamed: 0,date,id,created_at,user,favorite_count,retweet_count,url,text,frequency,probability
1,2023-02-02,1621262627551821825,2023-02-02 16:41:46-05:00,WatcherGuru_twitter,832,120,https://twitter.com/i/web/status/1621262627551...,JUST IN Apple AAPL CEO Tim Cook says the compa...,"[{'apple': 0.4132231404958678, 'aapl': 0.15408...",0.000328
60643,2023-02-02,1621263370333687811,2023-02-02 16:44:43-05:00,GRDecter_twitter,725,80,https://twitter.com/i/web/status/1621263370333...,BREAKING Apple reports first profit miss since...,"[{'breaking': 0.3703703703703704, 'apple': 0.4...",4e-05


In [115]:
df_wide1 = df_all_prob.pivot_table(index='date', values=['favorite_count','retweet_count'], aggfunc='sum',fill_value=0 ).sort_values(by='date',ascending=False)
df_wide2 = df_all_prob.pivot_table(index='date', columns=['user'], values=['probability'], aggfunc='sum',fill_value=0 ).sort_values(by='date',ascending=False).droplevel(0, axis=1) 
df_wide_merge = pd.merge(df_wide1, df_wide2, how='inner', on='date')

- Merging Sat/Sun Tweets to Monday and re-merging to data

In [116]:
# Drop Saturday-Monday And replace with Monday
week_end_mask = df_wide_merge.reset_index().date.dt.day_name().isin(['Saturday', 'Sunday', 'Monday'])
week_end = df_wide_merge.reset_index().loc[week_end_mask, :]
monday_group = week_end.groupby([pd.Grouper(key='date', freq='W-MON')])[df_wide_merge.columns].sum().reset_index('date')

df_wide_stripped = df_wide_merge.reset_index().loc[~ week_end_mask, :]
df_wide = pd.merge(df_wide_stripped, monday_group, how='outer').set_index('date')
df_wide.head(5)

Unnamed: 0_level_0,favorite_count,retweet_count,BillGates_twitter,DanZanger_twitter,DipFinding_twitter,EconguyRosie_twitter,GRDecter_twitter,HindenburgRes_twitter,JeffBezos_twitter,JehoshaphatRsch_twitter,KennethLFisher_twitter,MITickWatcher_twitter,MacroCharts_twitter,MarketMagnifier_twitter,OptionsProVol_twitter,RayDalio_twitter,ResearchGrizzly_twitter,TwentyonTwenty__twitter,WatcherGuru_twitter,andrewrsorkin_twitter,biancoresearch_twitter,bsc_daily_twitter,eWhispers_twitter,jimcramer_twitter,lei_zhang_lz_twitter,markminervini_twitter,muddywatersre_twitter,prrobbins_twitter,realwillmeade_twitter,script_crypto_twitter,techbudsolution_twitter
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
2023-02-02,24482,3020,0.001374,0.0,0.071395,0.00759,0.009632,0.0,0.0,0.0,0.055398,0.229941,0.002507,0.0,0.016076,0.015236,0.0,0.019743,0.002178,0.0,0.000823,0.054218,0.018222,0.000637,0.0,0.002625,0.0,0.006658,0.028625,0.005715,0.0
2023-02-01,89591,13306,0.0,0.0,0.148092,0.013025,0.013038,0.0,0.0,0.0,0.03845,0.246047,0.0,0.0,0.017105,0.008334,0.0,0.026854,0.00503,0.002941,0.021307,0.015214,0.015967,0.017537,0.003775,0.013186,0.0,0.002287,0.017328,0.005355,0.002438
2023-01-31,115600,15716,0.001501,0.0,0.095795,0.0,0.049577,0.0,0.004822,0.0,0.016362,0.222957,0.0,0.0,0.015786,0.000313,0.0,0.033054,0.006011,0.0,0.003762,0.035899,0.013049,0.005679,0.000208,0.001397,0.0,0.000162,0.044618,0.004896,0.005568
2023-01-27,105166,18173,5.3e-05,0.0,0.101076,0.0,0.03733,0.0,0.0,0.0,0.036896,0.173331,0.0,0.0,0.016281,0.001669,0.0,0.034225,0.007609,0.0,0.00728,0.025116,0.012157,0.007237,0.008102,0.0,0.002274,0.00253,0.0,0.001996,0.006605
2023-01-26,152590,26798,0.0,0.0,0.20291,0.005628,0.028595,4.3e-05,0.0,0.0,0.012153,0.307354,0.0,0.0,0.019804,0.009191,0.0,0.020523,0.005227,0.0,0.004439,0.012817,0.01309,0.008954,0.0,0.006033,0.0,0.003426,0.0,0.004412,0.0


In [117]:
path_all_merged_twitter_analysts_pivot = f'../Sentiment_Analysis/Stock_Market/data/merge/all_merged_twitter_users' # Create Folders
if not os.path.exists(path_all_merged_twitter_analysts_pivot):
    os.makedirs(path_all_merged_twitter_analysts_pivot)
df_wide.to_csv(path_all_merged_twitter_analysts_pivot +'/all_merged_twitter_users_pivot.csv', index=True) # Export to csv

df_wide.head(5)

Unnamed: 0_level_0,favorite_count,retweet_count,BillGates_twitter,DanZanger_twitter,DipFinding_twitter,EconguyRosie_twitter,GRDecter_twitter,HindenburgRes_twitter,JeffBezos_twitter,JehoshaphatRsch_twitter,KennethLFisher_twitter,MITickWatcher_twitter,MacroCharts_twitter,MarketMagnifier_twitter,OptionsProVol_twitter,RayDalio_twitter,ResearchGrizzly_twitter,TwentyonTwenty__twitter,WatcherGuru_twitter,andrewrsorkin_twitter,biancoresearch_twitter,bsc_daily_twitter,eWhispers_twitter,jimcramer_twitter,lei_zhang_lz_twitter,markminervini_twitter,muddywatersre_twitter,prrobbins_twitter,realwillmeade_twitter,script_crypto_twitter,techbudsolution_twitter
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
2023-02-02,24482,3020,0.001374,0.0,0.071395,0.00759,0.009632,0.0,0.0,0.0,0.055398,0.229941,0.002507,0.0,0.016076,0.015236,0.0,0.019743,0.002178,0.0,0.000823,0.054218,0.018222,0.000637,0.0,0.002625,0.0,0.006658,0.028625,0.005715,0.0
2023-02-01,89591,13306,0.0,0.0,0.148092,0.013025,0.013038,0.0,0.0,0.0,0.03845,0.246047,0.0,0.0,0.017105,0.008334,0.0,0.026854,0.00503,0.002941,0.021307,0.015214,0.015967,0.017537,0.003775,0.013186,0.0,0.002287,0.017328,0.005355,0.002438
2023-01-31,115600,15716,0.001501,0.0,0.095795,0.0,0.049577,0.0,0.004822,0.0,0.016362,0.222957,0.0,0.0,0.015786,0.000313,0.0,0.033054,0.006011,0.0,0.003762,0.035899,0.013049,0.005679,0.000208,0.001397,0.0,0.000162,0.044618,0.004896,0.005568
2023-01-27,105166,18173,5.3e-05,0.0,0.101076,0.0,0.03733,0.0,0.0,0.0,0.036896,0.173331,0.0,0.0,0.016281,0.001669,0.0,0.034225,0.007609,0.0,0.00728,0.025116,0.012157,0.007237,0.008102,0.0,0.002274,0.00253,0.0,0.001996,0.006605
2023-01-26,152590,26798,0.0,0.0,0.20291,0.005628,0.028595,4.3e-05,0.0,0.0,0.012153,0.307354,0.0,0.0,0.019804,0.009191,0.0,0.020523,0.005227,0.0,0.004439,0.012817,0.01309,0.008954,0.0,0.006033,0.0,0.003426,0.0,0.004412,0.0


### CHECKPOINT    
    Load pivot data

In [118]:
path_all_merged_twitter_analysts_pivot = f'../Sentiment_Analysis/Stock_Market/data/merge/all_merged_twitter_users'
df_wide = pd.read_csv(path_all_merged_twitter_analysts_pivot +'/all_merged_twitter_users_pivot.csv').astype({'date':'datetime64[ns]'}).set_index('date')
df_wide.head()

Unnamed: 0_level_0,favorite_count,retweet_count,BillGates_twitter,DanZanger_twitter,DipFinding_twitter,EconguyRosie_twitter,GRDecter_twitter,HindenburgRes_twitter,JeffBezos_twitter,JehoshaphatRsch_twitter,KennethLFisher_twitter,MITickWatcher_twitter,MacroCharts_twitter,MarketMagnifier_twitter,OptionsProVol_twitter,RayDalio_twitter,ResearchGrizzly_twitter,TwentyonTwenty__twitter,WatcherGuru_twitter,andrewrsorkin_twitter,biancoresearch_twitter,bsc_daily_twitter,eWhispers_twitter,jimcramer_twitter,lei_zhang_lz_twitter,markminervini_twitter,muddywatersre_twitter,prrobbins_twitter,realwillmeade_twitter,script_crypto_twitter,techbudsolution_twitter
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
2023-02-02,24482,3020,0.001374,0.0,0.071395,0.00759,0.009632,0.0,0.0,0.0,0.055398,0.229941,0.002507,0.0,0.016076,0.015236,0.0,0.019743,0.002178,0.0,0.000823,0.054218,0.018222,0.000637,0.0,0.002625,0.0,0.006658,0.028625,0.005715,0.0
2023-02-01,89591,13306,0.0,0.0,0.148092,0.013025,0.013038,0.0,0.0,0.0,0.03845,0.246047,0.0,0.0,0.017105,0.008334,0.0,0.026854,0.00503,0.002941,0.021307,0.015214,0.015967,0.017537,0.003775,0.013186,0.0,0.002287,0.017328,0.005355,0.002438
2023-01-31,115600,15716,0.001501,0.0,0.095795,0.0,0.049577,0.0,0.004822,0.0,0.016362,0.222957,0.0,0.0,0.015786,0.000313,0.0,0.033054,0.006011,0.0,0.003762,0.035899,0.013049,0.005679,0.000208,0.001397,0.0,0.000162,0.044618,0.004896,0.005568
2023-01-27,105166,18173,5.3e-05,0.0,0.101076,0.0,0.03733,0.0,0.0,0.0,0.036896,0.173331,0.0,0.0,0.016281,0.001669,0.0,0.034225,0.007609,0.0,0.00728,0.025116,0.012157,0.007237,0.008102,0.0,0.002274,0.00253,0.0,0.001996,0.006605
2023-01-26,152590,26798,0.0,0.0,0.20291,0.005628,0.028595,4.3e-05,0.0,0.0,0.012153,0.307354,0.0,0.0,0.019804,0.009191,0.0,0.020523,0.005227,0.0,0.004439,0.012817,0.01309,0.008954,0.0,0.006033,0.0,0.003426,0.0,0.004412,0.0


In [121]:
with open(os.path.normpath(os.getcwd() + '/Stock_Market/ticker_list/ticker_list.xlsx'), 'rb') as f:
    ticker_df = pd.read_excel(f, sheet_name='ticker_sheet')
    ticker_df = ticker_df.where(pd.notnull(ticker_df), '')
    f.close()
ticker_df.head(10)

Unnamed: 0,ticker_name,ticker_label
0,^GSPC,SandP_500
1,^IXIC,NASDAQ
2,^RUT,RUSSEL
3,^DJI,DOW_JONES
4,AAPL,APPLE
5,ABBV,ABBVIE
6,ABNB,AIRBNB
7,ADBE,ADOBE
8,AMD,AMD
9,AMZN,AMAZON


In [122]:
# downloding index fund's or stock tickers  #.resample('D').ffill()
how_far_back = df_wide.index.min().date()
today = date.today()
column_names = dict(zip(ticker_df.ticker_name, ticker_df.ticker_label))
column_names['Date']='date'
stock_list = list(ticker_df.ticker_name)
stock_str = ' '.join( stock_list )

index_funds_df = yf.download(stock_str, how_far_back, today, interval = '1d', progress=False)['Close'].reset_index('Date').rename(columns=column_names)

convert_dict = dict(zip(ticker_df.ticker_label, ['float64']*len(ticker_df.ticker_label)))
convert_dict['date'] = 'datetime64[ns]'
index_funds_df = index_funds_df.astype(convert_dict)

print(f'{how_far_back} -> {today}')

2017-01-05 -> 2023-02-02


In [123]:
path_index_funds_merge = f'../Sentiment_Analysis/Stock_Market/data/merge/all_merged_index_funds' # Create Folders
if not os.path.exists(path_index_funds_merge):
    os.makedirs(path_index_funds_merge)
index_funds_df.to_csv(path_index_funds_merge +'/all_merged_index_funds.csv', index=False) # Export to csv
index_funds_df.head(5)

Unnamed: 0,date,APPLE,ABBVIE,AIRBNB,ADOBE,AMD,AMAZON,ARK_INNOVATION,ASML_Holding,BROADCOM,BOEING,ALIBABA,BandG_Foods,Biogen,CATERPILLAR,COSTCO,SALESFORCE,CROWDSTRIKE,CISCO,CHEVRON,DANAHER,DISNEY,DEVON_ENERGY,EBAY,ESTEE_LAUDER,ETSY,GENERAL_ELECTRIC,GOOGLE,HALLIBURTON,HONEYWELL,IBM,JNJ,JPMORGAN,KINDER_MORGAN,ELI_LILLY,LEMONADE,MASTERCARD,MICROCHIP,META,MARVELL,MORGAN_STANLEY,MICROSOFT,NETFLIX,NIKE,Service_Now,NUCOR,NVIDIA,NVE,REALTY_INCOME,OKTA,ORACLE,PALO_ALTO,UIPATH,PROCTER_GAMBLE,PAYPAL,QUALCOMM,ROBLOX,STARBUCKS,SHOPIFY,SNOWFLAKE,SPLUNK,SQUARE_BLOCK,CONSTELLATION_BRANDS,SKYWORKS,TELADOC,ATLASSIAN,TESLA,TAIWAN_SEMICONDUCTOR,VISA,VERIZON,WALMART,DOW_JONES,SandP_500,NASDAQ,RUSSEL
0,2017-01-05,29.1525,63.77,,105.910004,11.24,39.022499,21.15,111.239998,174.279999,158.710007,94.370003,44.0,293.570007,93.0,162.910004,72.790001,,30.17,117.309998,80.010002,107.379997,49.009998,30.01,78.599998,12.4,189.275208,39.701,56.209999,111.908539,161.281067,116.860001,86.110001,21.690001,75.589996,,106.989998,31.295,120.669998,14.04,43.220001,62.299999,131.809998,53.060001,79.129997,60.900002,25.434999,71.360001,58.062016,,38.639999,43.990002,,85.059998,41.060001,65.550003,,56.459999,4.768,,54.560001,14.56,146.75,74.57,16.4,24.969999,15.116667,29.799999,81.089996,54.639999,69.209999,19899.289062,2269.0,5487.939941,1371.939941
1,2017-01-06,29.477501,63.790001,,108.300003,11.32,39.7995,21.365,111.120003,176.589996,159.100006,93.889999,43.5,295.0,93.040001,162.830002,73.800003,,30.23,116.839996,80.43,108.980003,48.669998,31.049999,79.160004,13.08,189.815643,40.307499,56.66,113.604996,162.07457,116.300003,86.120003,21.809999,75.669998,,107.760002,31.84,123.410004,14.18,43.849998,62.84,131.070007,53.91,82.099998,60.290001,25.775,71.730003,57.984497,,38.450001,45.016666,,85.029999,41.450001,65.529999,,57.130001,4.69,,56.169998,15.0,149.440002,74.959999,16.549999,24.719999,15.267333,29.629999,82.209999,53.259998,68.260002,19963.800781,2276.97998,5521.060059,1367.280029
2,2017-01-09,29.747499,64.209999,,108.57,11.49,39.846001,21.645,112.489998,176.970001,158.320007,94.720001,43.299999,299.019989,92.370003,160.970001,73.959999,,30.18,115.839996,80.589996,108.360001,46.580002,30.75,78.239998,12.35,188.914917,40.3325,56.07,112.665718,160.277252,116.279999,86.18,21.639999,76.269997,,107.550003,32.209999,124.900002,14.37,42.709999,62.639999,130.949997,53.380001,82.0,59.639999,26.82,71.68,57.55814,,39.029999,44.573334,,84.400002,41.400002,65.650002,,58.200001,4.718,,55.689999,15.06,150.270004,75.650002,17.4,25.030001,15.418667,30.040001,81.75,52.68,68.709999,19887.380859,2268.899902,5531.819824,1357.48999
3,2017-01-10,29.7775,64.07,,108.260002,11.44,39.794998,21.950001,112.650002,180.570007,159.070007,96.75,43.25,297.790009,93.830002,161.660004,73.980003,,30.379999,114.959999,81.099998,108.379997,46.700001,30.25,77.349998,12.77,188.374466,40.239498,54.639999,112.953255,158.240921,116.160004,86.43,21.57,76.269997,,107.32,32.424999,124.349998,14.56,43.060001,62.619999,129.889999,53.110001,81.669998,61.220001,26.6175,71.790001,56.870155,,38.66,44.893333,,83.489998,41.080002,65.629997,,57.880001,4.787,,56.18,14.91,149.240005,77.309998,18.049999,25.24,15.324667,30.07,81.309998,52.759998,68.230003,19855.529297,2268.899902,5551.819824,1370.900024
4,2017-01-11,29.9375,61.139999,,108.989998,11.2,39.951,21.780001,113.480003,179.419998,159.399994,96.940002,43.5,287.109985,94.650002,161.539993,75.769997,,30.15,115.93,81.349998,109.440002,47.400002,30.41,78.330002,12.73,188.97496,40.3955,55.389999,113.106606,160.372849,114.730003,87.080002,21.940001,75.260002,,107.809998,33.240002,126.089996,14.54,43.669998,63.189999,130.5,52.689999,83.900002,61.189999,26.290001,71.349998,56.715115,,39.110001,45.833332,,83.75,41.279999,66.099998,,58.099998,4.833,,57.68,14.9,149.0,78.410004,18.200001,26.0,15.315333,30.17,81.800003,52.459999,68.529999,19954.279297,2275.320068,5563.649902,1373.300049


In [124]:
# Merging the probabilities of words used from twitter and database of index funds
df_merge = pd.merge(index_funds_df, df_wide, how='inner', on='date').set_index('date').fillna(0)
df_merge_original = df_merge.copy()

columns = list(ticker_df.ticker_label) + ['favorite_count', 'retweet_count']
df_merge = normalize_columns(df_merge.copy(), columns)
df_merge.tail(5)

Unnamed: 0_level_0,APPLE,ABBVIE,AIRBNB,ADOBE,AMD,AMAZON,ARK_INNOVATION,ASML_Holding,BROADCOM,BOEING,ALIBABA,BandG_Foods,Biogen,CATERPILLAR,COSTCO,SALESFORCE,CROWDSTRIKE,CISCO,CHEVRON,DANAHER,DISNEY,DEVON_ENERGY,EBAY,ESTEE_LAUDER,ETSY,GENERAL_ELECTRIC,GOOGLE,HALLIBURTON,HONEYWELL,IBM,JNJ,JPMORGAN,KINDER_MORGAN,ELI_LILLY,LEMONADE,MASTERCARD,MICROCHIP,META,MARVELL,MORGAN_STANLEY,MICROSOFT,NETFLIX,NIKE,Service_Now,NUCOR,NVIDIA,NVE,REALTY_INCOME,OKTA,ORACLE,PALO_ALTO,UIPATH,PROCTER_GAMBLE,PAYPAL,QUALCOMM,ROBLOX,STARBUCKS,SHOPIFY,SNOWFLAKE,SPLUNK,SQUARE_BLOCK,CONSTELLATION_BRANDS,SKYWORKS,TELADOC,ATLASSIAN,TESLA,TAIWAN_SEMICONDUCTOR,VISA,VERIZON,WALMART,DOW_JONES,SandP_500,NASDAQ,RUSSEL,favorite_count,retweet_count,BillGates_twitter,DanZanger_twitter,DipFinding_twitter,EconguyRosie_twitter,GRDecter_twitter,HindenburgRes_twitter,JeffBezos_twitter,JehoshaphatRsch_twitter,KennethLFisher_twitter,MITickWatcher_twitter,MacroCharts_twitter,MarketMagnifier_twitter,OptionsProVol_twitter,RayDalio_twitter,ResearchGrizzly_twitter,TwentyonTwenty__twitter,WatcherGuru_twitter,andrewrsorkin_twitter,biancoresearch_twitter,bsc_daily_twitter,eWhispers_twitter,jimcramer_twitter,lei_zhang_lz_twitter,markminervini_twitter,muddywatersre_twitter,prrobbins_twitter,realwillmeade_twitter,script_crypto_twitter,techbudsolution_twitter
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1
2023-01-26,0.751075,0.760264,0.504612,0.446228,0.4307,0.407986,0.126412,0.735982,0.850516,0.340615,0.226072,0.062826,0.45948,0.986014,0.760167,0.389172,0.356334,0.540318,0.998057,0.737023,0.216834,0.830373,0.422773,0.664056,0.43494,0.302306,0.535628,0.678918,0.809469,0.527726,0.771337,0.657143,0.665895,0.910997,0.081142,0.933096,0.819572,0.1991,0.39247,0.848863,0.661301,0.418997,0.605463,0.616672,0.960794,0.561326,0.312465,0.692164,0.239873,0.787417,0.708868,0.171053,0.752761,0.1533,0.604518,0.266998,0.776832,0.265373,0.379333,0.248165,0.251194,0.790554,0.369224,0.040555,0.307992,0.372674,0.576098,0.845619,0.184132,0.812547,0.84346,0.712355,0.569986,0.628212,0.305068,0.311695,0.0,0.0,0.20291,0.005628,0.028595,4.3e-05,0.0,0.0,0.012153,0.307354,0.0,0.0,0.019804,0.009191,0.0,0.020523,0.005227,0.0,0.004439,0.012817,0.01309,0.008954,0.0,0.006033,0.0,0.003426,0.0,0.004412,0.0
2023-01-27,0.763963,0.750522,0.53468,0.454623,0.432275,0.428454,0.142066,0.714763,0.835528,0.336101,0.217449,0.080658,0.45138,1.0,0.771071,0.386769,0.355925,0.545321,0.935739,0.737497,0.215475,0.824177,0.419835,0.65578,0.446285,0.315791,0.549591,0.669963,0.800108,0.527004,0.762522,0.660809,0.669753,0.891333,0.082669,0.922088,0.810136,0.214239,0.390665,0.853312,0.661871,0.411708,0.605463,0.60991,0.940749,0.579521,0.303727,0.700691,0.244636,0.774496,0.712726,0.179511,0.746542,0.157092,0.600515,0.280211,0.780186,0.276022,0.396526,0.246521,0.261048,0.79242,0.363034,0.046991,0.321544,0.416966,0.574751,0.885245,0.197979,0.824116,0.845034,0.716313,0.580327,0.633999,0.210255,0.211375,5.3e-05,0.0,0.101076,0.0,0.03733,0.0,0.0,0.0,0.036896,0.173331,0.0,0.0,0.016281,0.001669,0.0,0.034225,0.007609,0.0,0.00728,0.025116,0.012157,0.007237,0.008102,0.0,0.002274,0.00253,0.0,0.001996,0.006605
2023-01-30,0.744795,0.745042,0.504888,0.442108,0.412915,0.417,0.128184,0.692812,0.81669,0.331761,0.189181,0.083128,0.433156,0.982431,0.77105,0.387739,0.34576,0.536786,0.89651,0.724737,0.198913,0.783836,0.417815,0.65721,0.435497,0.300306,0.524728,0.666604,0.782075,0.537957,0.679311,0.647979,0.621914,0.886009,0.083488,0.912109,0.777597,0.198281,0.361527,0.84219,0.642463,0.398091,0.596306,0.583746,0.937351,0.540643,0.320429,0.686996,0.237199,0.749269,0.700346,0.17117,0.75126,0.14895,0.587861,0.263732,0.772188,0.258437,0.380502,0.236949,0.245969,0.776784,0.344465,0.041166,0.304668,0.388728,0.573224,0.871467,0.213323,0.81191,0.8307,0.695685,0.558765,0.616266,0.462436,0.673975,0.0,3.9e-05,0.063113,0.0,0.033576,0.004585,0.000382,0.0,0.018369,0.259153,0.0,0.0,0.018445,0.005241,0.0,0.094512,0.007065,5e-06,0.007077,0.104913,0.016463,0.019111,0.007604,0.002311,0.021244,0.004607,0.071951,0.004334,0.002384
2023-01-31,0.753234,0.763309,0.512405,0.453988,0.430634,0.434486,0.138669,0.706345,0.82372,0.341396,0.185243,0.08011,0.45499,0.929203,0.788226,0.401316,0.361212,0.550029,0.895166,0.731176,0.206557,0.796227,0.429017,0.677202,0.445519,0.298048,0.542024,0.683022,0.802793,0.531096,0.698277,0.656927,0.641975,0.898153,0.088836,0.910325,0.80916,0.204794,0.376483,0.85912,0.660625,0.399424,0.603884,0.603919,0.955358,0.552762,0.327619,0.688547,0.252279,0.766344,0.706161,0.180451,0.765948,0.155828,0.599156,0.276202,0.781734,0.271093,0.389261,0.249398,0.253173,0.809987,0.35231,0.046739,0.315472,0.405209,0.569633,0.878003,0.232784,0.830167,0.850964,0.718673,0.576812,0.648108,0.231115,0.182797,0.001501,0.0,0.095795,0.0,0.049577,0.0,0.004822,0.0,0.016362,0.222957,0.0,0.0,0.015786,0.000313,0.0,0.033054,0.006011,0.0,0.003762,0.035899,0.013049,0.005679,0.000208,0.001397,0.0,0.000162,0.044618,0.004896,0.005568
2023-02-01,0.760692,0.753306,0.525687,0.477303,0.492912,0.448176,0.151665,0.72908,0.858751,0.34646,0.195559,0.093553,0.458071,0.91331,0.80302,0.417549,0.376902,0.547087,0.87529,0.743502,0.214201,0.77764,0.445546,0.689871,0.45248,0.30992,0.556077,0.675933,0.789288,0.535429,0.718312,0.652938,0.647377,0.892896,0.091127,0.922259,0.866754,0.218945,0.414776,0.869748,0.678217,0.413877,0.621014,0.633922,0.995855,0.5982,0.326181,0.690356,0.260984,0.790801,0.71238,0.200893,0.774633,0.161108,0.636689,0.283848,0.792699,0.277117,0.411108,0.273064,0.261309,0.823306,0.377141,0.051557,0.335534,0.425784,0.587681,0.882065,0.237275,0.838658,0.851344,0.735323,0.59874,0.667996,0.179116,0.154766,0.0,0.0,0.148092,0.013025,0.013038,0.0,0.0,0.0,0.03845,0.246047,0.0,0.0,0.017105,0.008334,0.0,0.026854,0.00503,0.002941,0.021307,0.015214,0.015967,0.017537,0.003775,0.013186,0.0,0.002287,0.017328,0.005355,0.002438


In [125]:
path_twitter_and_index_fund = f'../Sentiment_Analysis/Stock_Market/data/merge/combined'
if not os.path.exists(path_twitter_and_index_fund):
    os.makedirs(path_twitter_and_index_fund)
df_merge.to_csv(path_twitter_and_index_fund +'/index_funds_and_twitter_analysts.csv') # Export to csv

In [126]:
path_twitter_and_index_fund = f'../Sentiment_Analysis/Stock_Market/data/merge/combined'
df_merge = pd.read_csv(path_twitter_and_index_fund +'/index_funds_and_twitter_analysts.csv').set_index('date')
df_merge.head()

Unnamed: 0_level_0,APPLE,ABBVIE,AIRBNB,ADOBE,AMD,AMAZON,ARK_INNOVATION,ASML_Holding,BROADCOM,BOEING,ALIBABA,BandG_Foods,Biogen,CATERPILLAR,COSTCO,SALESFORCE,CROWDSTRIKE,CISCO,CHEVRON,DANAHER,DISNEY,DEVON_ENERGY,EBAY,ESTEE_LAUDER,ETSY,GENERAL_ELECTRIC,GOOGLE,HALLIBURTON,HONEYWELL,IBM,JNJ,JPMORGAN,KINDER_MORGAN,ELI_LILLY,LEMONADE,MASTERCARD,MICROCHIP,META,MARVELL,MORGAN_STANLEY,MICROSOFT,NETFLIX,NIKE,Service_Now,NUCOR,NVIDIA,NVE,REALTY_INCOME,OKTA,ORACLE,PALO_ALTO,UIPATH,PROCTER_GAMBLE,PAYPAL,QUALCOMM,ROBLOX,STARBUCKS,SHOPIFY,SNOWFLAKE,SPLUNK,SQUARE_BLOCK,CONSTELLATION_BRANDS,SKYWORKS,TELADOC,ATLASSIAN,TESLA,TAIWAN_SEMICONDUCTOR,VISA,VERIZON,WALMART,DOW_JONES,SandP_500,NASDAQ,RUSSEL,favorite_count,retweet_count,BillGates_twitter,DanZanger_twitter,DipFinding_twitter,EconguyRosie_twitter,GRDecter_twitter,HindenburgRes_twitter,JeffBezos_twitter,JehoshaphatRsch_twitter,KennethLFisher_twitter,MITickWatcher_twitter,MacroCharts_twitter,MarketMagnifier_twitter,OptionsProVol_twitter,RayDalio_twitter,ResearchGrizzly_twitter,TwentyonTwenty__twitter,WatcherGuru_twitter,andrewrsorkin_twitter,biancoresearch_twitter,bsc_daily_twitter,eWhispers_twitter,jimcramer_twitter,lei_zhang_lz_twitter,markminervini_twitter,muddywatersre_twitter,prrobbins_twitter,realwillmeade_twitter,script_crypto_twitter,techbudsolution_twitter
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1
2017-01-05,0.0,0.032794,0.0,0.0,0.011222,0.0,0.0,0.0,0.012658,0.184312,0.122918,0.902606,0.466743,0.008611,0.02725,0.0,0.0,0.005592,0.471419,0.002844,0.197129,0.600303,0.071074,0.001226,0.009883,1.0,0.0,0.962687,0.06176,0.85065,0.076399,0.076334,0.903549,0.004592,0.0,0.006344,0.055479,0.108296,0.00116,0.190435,0.0,0.004675,0.017603,0.0,0.220697,0.003579,0.310917,0.436154,0.0,0.0,0.045988,0.0,0.151388,0.005503,0.115456,0.0,0.102167,0.000304,0.0,0.007399,0.002501,0.264526,0.099683,0.0,0.0,0.008003,0.004579,0.0,0.721931,0.037682,0.071802,0.012348,0.0,0.262321,0.018601,0.030602,0.002867,0.0,0.0,0.0,0.0,0.0,0.002264,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-01-09,0.003893,0.036621,0.0,0.004567,0.012863,0.005581,0.003655,0.001606,0.01797,0.183183,0.124296,0.883402,0.490734,0.00497,0.023011,0.004933,0.0,0.005886,0.460435,0.005135,0.205453,0.566846,0.084665,0.0,0.009709,0.997675,0.005689,0.960075,0.06757,0.838569,0.068652,0.077089,0.899691,0.006854,0.0,0.008264,0.070365,0.12272,0.005415,0.184132,0.001211,0.003147,0.020129,0.00461,0.212136,0.008055,0.314456,0.423135,0.0,0.005999,0.049347,0.0,0.144312,0.006767,0.116171,0.0,0.124613,0.0,0.0,0.014035,0.004367,0.287176,0.107456,0.003595,0.000139,0.008761,0.006734,0.003886,0.648578,0.032374,0.071148,0.012309,0.004152,0.252366,0.036451,0.065414,0.006898,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-01-12,0.004318,0.011134,0.0,0.004601,0.008072,0.011247,0.005021,0.004537,0.021702,0.183097,0.128942,0.90535,0.441211,0.014333,0.024978,0.010204,0.0,0.001766,0.462826,0.007901,0.198403,0.565193,0.077319,0.003065,0.010127,0.994963,0.005558,0.94347,0.068158,0.842021,0.046481,0.077736,0.94213,0.009782,0.0,0.012242,0.087123,0.128585,0.00606,0.195502,0.001104,0.0,0.012393,0.006955,0.218727,0.004953,0.310474,0.427391,0.0,0.008614,0.054415,0.0,0.138308,0.007362,0.119531,0.0,0.12242,0.000852,0.0,0.017793,0.003173,0.269545,0.115374,0.005393,0.004248,0.008478,0.0,0.001649,0.648578,0.02452,0.071347,0.012911,0.005634,0.254833,0.0107,0.020052,0.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-01-17,0.005544,0.01618,0.0,0.003588,0.001903,0.009919,0.004209,0.006002,0.022195,0.181303,0.129808,0.939643,0.420258,0.011905,0.029217,0.009403,0.0,0.000294,0.463723,0.0064,0.20214,0.565469,0.076217,0.005483,0.011171,0.990314,0.00477,0.940298,0.065216,0.84133,0.04982,0.048733,0.969907,0.008784,0.0,0.011179,0.074677,0.132847,0.003481,0.177212,0.000819,0.006595,0.022261,0.003662,0.215533,0.00307,0.312465,0.441412,0.0,0.007076,0.058426,0.0,0.152997,0.006284,0.105733,0.0,0.122033,0.001424,0.0,0.0,0.002986,0.30944,0.116381,0.003595,0.003832,0.009481,0.002963,0.001001,0.650823,0.029296,0.06782,0.011914,0.004805,0.248805,0.005834,0.014411,0.00202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-01-18,0.005528,0.01444,0.0,0.004945,0.002297,0.00916,0.0048,0.015075,0.033866,0.183183,0.129336,0.946502,0.422899,0.010518,0.030222,0.012607,0.0,0.001471,0.461182,0.007506,0.203754,0.563817,0.078053,0.003508,0.011623,0.988764,0.005428,0.942537,0.071321,0.828788,0.047549,0.052938,0.96142,0.011046,0.0,0.012071,0.087285,0.133017,0.003352,0.185986,0.000712,0.007253,0.019261,0.005975,0.230754,0.004557,0.311802,0.432649,0.0,0.00846,0.059021,0.0,0.149995,0.006581,0.112454,0.0,0.127838,0.001704,0.0,0.005872,0.002799,0.288527,0.133511,0.002157,0.005148,0.009947,0.001796,0.002944,0.632485,0.026006,0.066609,0.013477,0.006406,0.253104,0.013715,0.029288,0.00266,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Prediction of Today 

- trained with 5 day averages ( 0 for a bad day and 1  for a good day )

In [127]:
# Todays Data
todays_test = download_todays_test(ticker_df, df_wide, df_merge_original)
Xnew = sm.add_constant(todays_test, has_constant='add')

model = {} # Model Build For Each index fund
print(f"date: { todays_test.index.date.max() }")
output = pd.DataFrame(columns=['index', 'prediction'])
for t in ticker_df.ticker_label:
    data_with_target = create_target(df_merge.copy(), day = 5, ticker = t)
    m = linear_model(data_with_target,split=0.20,summary = False)
    y_pred = m['lm'].predict(Xnew)
    model[t] = (y_pred, m)
    output = pd.concat([output, pd.DataFrame.from_records([(t, y_pred[0])], columns=['index', 'prediction'])])
    
pd.set_option('display.max_rows', 500)
display(output.sort_values(by=['prediction'], ascending=False))

date: 2023-02-02


Unnamed: 0,index,prediction
0,NUCOR,1.563896
0,META,1.42192
0,RUSSEL,1.394102
0,BROADCOM,1.357357
0,AMD,1.272599
0,EBAY,1.232197
0,STARBUCKS,1.230151
0,SandP_500,1.212524
0,TAIWAN_SEMICONDUCTOR,1.199474
0,DISNEY,1.198313
