# Import Libraries

In [1]:
import os,sys,re,glob,ipykernel,tweepy,stockmarket,nltk,collections,itertools,pandas as pd,numpy as np,\
        seaborn as sns, yfinance as yf, matplotlib.pyplot as plt, statsmodels.formula.api as smf,\
        statsmodels.api as sm, autoreload, importlib
from pathlib import Path
from string import punctuation 
from datetime import date
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
np.random.seed(0)
pd.set_option('display.max_columns', None)

## Set Working Directory: 
    
* /Sentiment_Analysis 
    
* __ file __ isn't available in jupyter notebooks

    

In [2]:
file = os.getcwd().split(os.sep)
while(file[-1] != 'Sentiment_Analysis'): # Check the working directory
    os.chdir('..')
    file = os.getcwd().split(os.sep)
    sys.path.append(os.path.abspath(os.getcwd()))
print(f"root directory: {os.getcwd()}", sep = '\n')

root directory: c:\Code\Public_Github\Sentiment_Analysis


## Load Custom Functions

In [3]:
from src import user_download_helper, user_download, merge_files, merge_all, \
                strip_all_words, sentence_word_probability, download_todays_test, \
                format_model,linear_model, naive_bayes, create_target, normalize_columns, normalize_columns_target

# Twitter API Credentials

In [4]:
# Read in keys from a csv file
autentication_path = os.path.abspath('../Sentiment_Analysis/Stock_Market/authentication/authentication_tokens.csv')
readin_authentication = pd.read_csv(autentication_path, header=0, sep=',')

consumer_key = readin_authentication['consumer_key'][0]
consumer_secret = readin_authentication['consumer_secret'][0]
access_token = readin_authentication['access_token'][0]
access_token_secret = readin_authentication['access_token_secret'][0]
bearer_token = readin_authentication['beaker_token'][0]

# connect to twitter application 
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
redirect_url = auth.get_authorization_url()
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit = True)

# Load Twitter Usernames

    Note:
    * Unvarified user's are not a problem, no one user can have the same ID
    
<div style="padding-left: 50px;">

| Removed User's | reason | 
| ------------ | ------------- |
|DayTradeWarrior|account removed |
|AswathDamodaran |2013-06-19 |
|cstewartcfa_twitter |2013-06-19|
|BobPisani_twitter |2015-11-04| 
|elonmusk|private|
</div>



In [5]:
with open(os.path.normpath(os.getcwd() + '/Stock_Market/user_list/user_list.xlsx'), 'rb') as f:
    user_df = pd.read_excel(f, sheet_name='user_names')
    user_df = user_df.where(pd.notnull(user_df), '')
    f.close()
groups = list(user_df.columns)
user_df

Unnamed: 0,short_term,long_term,controversial
0,DanZanger,jimcramer,JeffBezos
1,prrobbins,KennethLFisher,BillGates
2,markminervini,lei_zhang_lz,
3,bsc_daily,realwillmeade,
4,MITickWatcher,RayDalio,
5,OptionsProVol,GRDecter,
6,script_crypto,andrewrsorkin,
7,MarketMagnifier,EconguyRosie,
8,TwentyonTwenty_,,
9,WatcherGuru,,


## Download Tweets

### WARNING ~ 4 minutes
    If previously loaded SKIP to CHECKPOINT 
    * Download User tweets into csv spreadsheets 

- ( Tweepy limit of 3200 tweets per user )
    

In [40]:
for group in groups:
    print(f"\n{group}:\n")
    users = list(user_df[group][user_df[group]!= ''])
    user_download(api, users, group)
    print(f"")


short_term:

DanZanger prrobbins markminervini bsc_daily MITickWatcher OptionsProVol script_crypto MarketMagnifier TwentyonTwenty_ WatcherGuru DipFinding MacroCharts techbudsolution eWhispers HindenburgRes JehoshaphatRsch ResearchGrizzly biancoresearch muddywatersre 

long_term:

jimcramer KennethLFisher lei_zhang_lz realwillmeade RayDalio GRDecter andrewrsorkin EconguyRosie 

controversial:

JeffBezos BillGates 


## Merge Tweets

In [41]:
merge = []
for group in groups:
    merge.append(merge_files(group, display = 0))
df_short_term,df_long_term  = merge[0],merge[1]  
df_all = merge_all('merge/merged_twitter_users', display = 0)

size of merged data sets of short_term: (46016, 7)
size of merged data sets of long_term: (21741, 7)
size of merged data sets of controversial: (3388, 7)
size of merged data sets of merged_twitter_users: (71145, 7)


In [42]:
df_all.head(2)

Unnamed: 0,id,created_at,user,favorite_count,retweet_count,url,text
0,1621166369172754432,2023-02-02 10:19:16-05:00,WatcherGuru_twitter,228,43,https://twitter.com/i/web/status/1621166369172...,
1,1621164341419732992,2023-02-02 10:11:13-05:00,WatcherGuru_twitter,3155,462,https://twitter.com/i/web/status/1621164341419...,JUST IN Berkshire Hathaways Charlie Munger urg...


In [43]:
display(df_all.info(verbose = True, null_counts = None, show_counts=None))

<class 'pandas.core.frame.DataFrame'>
Int64Index: 71145 entries, 0 to 3387
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id              71145 non-null  int64 
 1   created_at      71145 non-null  object
 2   user            71145 non-null  object
 3   favorite_count  71145 non-null  int64 
 4   retweet_count   71145 non-null  int64 
 5   url             71145 non-null  object
 6   text            69267 non-null  object
dtypes: int64(3), object(4)
memory usage: 4.3+ MB


None

- Some Analysts have infrequent tweets and have 2k limit of tweets going farther back

In [44]:
df_all.groupby('user')['created_at'].min().sort_values(ascending= True).head(5)

user
BillGates_twitter          2013-07-03 13:04:11-04:00
JeffBezos_twitter          2015-11-24 06:14:26-05:00
DanZanger_twitter          2017-04-24 07:05:38-04:00
techbudsolution_twitter    2017-04-30 12:25:18-04:00
HindenburgRes_twitter      2017-07-31 13:54:11-04:00
Name: created_at, dtype: object

In [45]:
# Adding nonessential twitter words to remove
stop = nltk.corpus.stopwords.words("english") 
twitter_nonessential_words = ['twitter', 'birds','lists','list', 'source','just','am','pm'\
                              'a','b','c','d','e','f','g','h','i','j','k','l','m','n',\
                              'n','o','p','q','r','s','t','u','v','w','x','y','z']
stop.extend(twitter_nonessential_words) # merge two lists together
stop = sorted(list( dict.fromkeys(stop) )) # remove duplicates

### Create dictionarys of words 
* Remove unnecessary words
* Generate frequency of words per sentence

In [46]:
df_all_words = strip_all_words(df_all, stop)
df_all_words_count = df_all_words.explode().replace("", np.nan, regex=True).dropna() # drop NAN's and empty words
all_count = df_all_words_count.value_counts()

In [47]:
print(f"Tweets of Dictionaries: {len(df_all_words)}")
print(f"all words: {len(df_all_words_count)}")
print(f"Dictionary of all words: {len(all_count)}")

Tweets of Dictionaries: 71145
all words: 1008353
Dictionary of all words: 44798


In [48]:
print(f"5 words from dictionary of all words:\n{all_count[0:5]}", end='\n\n')

5 words from dictionary of all words:
stocks    8705
stock     8606
today     6005
score     5844
top       5585
Name: text, dtype: int64



In [49]:
print(f"All the words in each individual Sentence:\n{df_all_words[0:4]}")

All the words in each individual Sentence:
0                                                  NaN
1    [, berkshire, hathaways, charlie, munger, urge...
2    [, billionaire, investor, ray, dalio, says, cr...
3                                                  NaN
Name: text, dtype: object


    Note the [','] and 'NaN' variables
* Nan is a placeholder for tweets w/ images
* [','] are words removed with special cases

# Probability small example

p = count(particular word in sentence) / (total particular word in all sentences) * 100 / (total of all unique words)

d{  hat:1, sandwich:2, lemon:1, orange:1, snorkle:1 }

n = LEN(d.KEYS())  -> n = 5

Tweet1: hat sandwich lemon 

Tweet2: snorkle sandwich orange 

Tweet1:

-> 1/1 * 100 + 1/2 * 100 + 1/1 * 100   
-> 100 + 50 + 100 = 250
-> 250/5 = 50%

Tweet2:

-> 1/1 * 100 + 1/2 * 100 + 1/1 * 100 
-> 100 + 50 + 100 
-> 250/5 = 50%

## Probability of individual tweets

In [50]:
# Probabilities
sentence_list, total_probability, individual_probability = sentence_word_probability(all_count, df_all_words)
print(f'sum of probability column = {sum(total_probability)}')

sum of probability column = 99.99999999999758


In [51]:
df_all_prob = df_all.reset_index()
df_all_prob['frequency'] = sentence_list
df_all_prob['probability'] = total_probability
df_all_prob = df_all_prob.dropna()
df_all_prob.insert(loc = 0, column = 'date', value = pd.to_datetime(df_all_prob['created_at']).apply(lambda x: x.strftime('%Y-%m-%d')))
df_all_prob.date = pd.to_datetime(df_all_prob['date'], format='%Y-%m-%d')
df_all_prob = df_all_prob.sort_values(by=['date'], ascending=False).drop(columns=['index'])

In [52]:
df_all_prob.head(2)

Unnamed: 0,date,id,created_at,user,favorite_count,retweet_count,url,text,frequency,probability
1,2023-02-02,1621164341419732992,2023-02-02 10:11:13-05:00,WatcherGuru_twitter,3155,462,https://twitter.com/i/web/status/1621164341419...,JUST IN Berkshire Hathaways Charlie Munger urg...,"[{'berkshire': 4.545454545454546, 'hathaways':...",0.001408
34215,2023-02-02,1621201198274551809,2023-02-02 12:37:40-05:00,DipFinding_twitter,0,0,https://twitter.com/i/web/status/1621201198274...,ANY up 1384 at 04326 Volume 1M AvgVolume 2M in...,"[{'volume': 0.030807147258163897, 'avgvolume':...",3e-06


In [53]:
df_wide1 = df_all_prob.pivot_table(index='date', values=['favorite_count','retweet_count'], aggfunc='sum',fill_value=0 ).sort_values(by='date',ascending=False)
df_wide2 = df_all_prob.pivot_table(index='date', columns=['user'], values=['probability'], aggfunc='sum',fill_value=0 ).sort_values(by='date',ascending=False).droplevel(0, axis=1) 
df_wide_merge = pd.merge(df_wide1, df_wide2, how='inner', on='date')

- Merging Sat/Sun Tweets to Monday and re-merging to data

In [54]:
# Drop Saturday-Monday And replace with Monday
week_end_mask = df_wide_merge.reset_index().date.dt.day_name().isin(['Saturday', 'Sunday', 'Monday'])
week_end = df_wide_merge.reset_index().loc[week_end_mask, :]
monday_group = week_end.groupby([pd.Grouper(key='date', freq='W-MON')])[df_wide_merge.columns].sum().reset_index('date')

df_wide_stripped = df_wide_merge.reset_index().loc[~ week_end_mask, :]
df_wide = pd.merge(df_wide_stripped, monday_group, how='outer').set_index('date')
df_wide.head(5)

Unnamed: 0_level_0,favorite_count,retweet_count,BillGates_twitter,DanZanger_twitter,DipFinding_twitter,EconguyRosie_twitter,GRDecter_twitter,HindenburgRes_twitter,JeffBezos_twitter,JehoshaphatRsch_twitter,KennethLFisher_twitter,MITickWatcher_twitter,MacroCharts_twitter,MarketMagnifier_twitter,OptionsProVol_twitter,RayDalio_twitter,ResearchGrizzly_twitter,TwentyonTwenty__twitter,WatcherGuru_twitter,andrewrsorkin_twitter,biancoresearch_twitter,bsc_daily_twitter,eWhispers_twitter,jimcramer_twitter,lei_zhang_lz_twitter,markminervini_twitter,muddywatersre_twitter,prrobbins_twitter,realwillmeade_twitter,script_crypto_twitter,techbudsolution_twitter
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
2023-02-02,17651,2190,0.00126,0.0,0.041901,0.003703,0.008881,0.0,0.0,0.0,0.054732,0.168508,0.002496,0.0,0.016105,0.015196,0.0,0.019407,0.001779,0.0,0.000721,0.053926,0.011947,0.000358,0.0,0.002619,0.0,0.006605,0.024045,0.003446,0.0
2023-02-01,88288,13021,0.0,0.0,0.150755,0.012954,0.012935,0.0,0.0,0.0,0.03823,0.246646,0.0,0.0,0.017018,0.00831,0.0,0.02676,0.00496,0.002806,0.021293,0.015187,0.016148,0.017516,0.003758,0.013198,0.0,0.002277,0.017028,0.005556,0.002471
2023-01-31,115185,15522,0.001234,0.0,0.09532,0.0,0.047406,0.0,0.004802,0.0,0.017413,0.222762,0.0,0.0,0.015731,0.000312,0.0,0.033009,0.005985,0.0,0.003765,0.035379,0.013004,0.005635,0.000213,0.001528,0.0,0.000162,0.045088,0.004866,0.005536
2023-01-27,105132,18170,5.2e-05,0.0,0.099267,0.0,0.036684,0.0,0.0,0.0,0.036613,0.172977,0.0,0.0,0.016358,0.001659,0.0,0.033853,0.007567,0.0,0.007241,0.02493,0.012123,0.007249,0.008072,0.0,0.002266,0.002507,0.0,0.001991,0.006577
2023-01-26,152501,26777,0.0,0.0,0.203219,0.005643,0.028303,4.2e-05,0.0,0.0,0.012057,0.30664,0.0,0.0,0.019858,0.009083,0.0,0.020352,0.005561,0.0,0.004433,0.012741,0.013053,0.010569,0.0,0.006002,0.0,0.003408,0.0,0.004414,0.0


In [55]:
path_all_merged_twitter_analysts_pivot = f'../Sentiment_Analysis/Stock_Market/data/merge/all_merged_twitter_users' # Create Folders
if not os.path.exists(path_all_merged_twitter_analysts_pivot):
    os.makedirs(path_all_merged_twitter_analysts_pivot)
df_wide.to_csv(path_all_merged_twitter_analysts_pivot +'/all_merged_twitter_users_pivot.csv', index=True) # Export to csv

df_wide.head(5)

Unnamed: 0_level_0,favorite_count,retweet_count,BillGates_twitter,DanZanger_twitter,DipFinding_twitter,EconguyRosie_twitter,GRDecter_twitter,HindenburgRes_twitter,JeffBezos_twitter,JehoshaphatRsch_twitter,KennethLFisher_twitter,MITickWatcher_twitter,MacroCharts_twitter,MarketMagnifier_twitter,OptionsProVol_twitter,RayDalio_twitter,ResearchGrizzly_twitter,TwentyonTwenty__twitter,WatcherGuru_twitter,andrewrsorkin_twitter,biancoresearch_twitter,bsc_daily_twitter,eWhispers_twitter,jimcramer_twitter,lei_zhang_lz_twitter,markminervini_twitter,muddywatersre_twitter,prrobbins_twitter,realwillmeade_twitter,script_crypto_twitter,techbudsolution_twitter
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
2023-02-02,17651,2190,0.00126,0.0,0.041901,0.003703,0.008881,0.0,0.0,0.0,0.054732,0.168508,0.002496,0.0,0.016105,0.015196,0.0,0.019407,0.001779,0.0,0.000721,0.053926,0.011947,0.000358,0.0,0.002619,0.0,0.006605,0.024045,0.003446,0.0
2023-02-01,88288,13021,0.0,0.0,0.150755,0.012954,0.012935,0.0,0.0,0.0,0.03823,0.246646,0.0,0.0,0.017018,0.00831,0.0,0.02676,0.00496,0.002806,0.021293,0.015187,0.016148,0.017516,0.003758,0.013198,0.0,0.002277,0.017028,0.005556,0.002471
2023-01-31,115185,15522,0.001234,0.0,0.09532,0.0,0.047406,0.0,0.004802,0.0,0.017413,0.222762,0.0,0.0,0.015731,0.000312,0.0,0.033009,0.005985,0.0,0.003765,0.035379,0.013004,0.005635,0.000213,0.001528,0.0,0.000162,0.045088,0.004866,0.005536
2023-01-27,105132,18170,5.2e-05,0.0,0.099267,0.0,0.036684,0.0,0.0,0.0,0.036613,0.172977,0.0,0.0,0.016358,0.001659,0.0,0.033853,0.007567,0.0,0.007241,0.02493,0.012123,0.007249,0.008072,0.0,0.002266,0.002507,0.0,0.001991,0.006577
2023-01-26,152501,26777,0.0,0.0,0.203219,0.005643,0.028303,4.2e-05,0.0,0.0,0.012057,0.30664,0.0,0.0,0.019858,0.009083,0.0,0.020352,0.005561,0.0,0.004433,0.012741,0.013053,0.010569,0.0,0.006002,0.0,0.003408,0.0,0.004414,0.0


### CHECKPOINT    
    Load pivot data

In [6]:
path_all_merged_twitter_analysts_pivot = f'../Sentiment_Analysis/Stock_Market/data/merge/all_merged_twitter_users'
df_wide = pd.read_csv(path_all_merged_twitter_analysts_pivot +'/all_merged_twitter_users_pivot.csv').astype({'date':'datetime64[ns]'}).set_index('date')

In [7]:
with open(os.path.normpath(os.getcwd() + '/Stock_Market/ticker_list/ticker_list.xlsx'), 'rb') as f:
    ticker_df = pd.read_excel(f, sheet_name='ticker_sheet')
    ticker_df = ticker_df.where(pd.notnull(ticker_df), '')
    f.close()
ticker_df

Unnamed: 0,ticker_name,ticker_label
0,^GSPC,SandP_500
1,^IXIC,NASDAQ
2,^RUT,RUSSEL
3,^DJI,DOW_JONES
4,AAPL,APPLE
...,...,...
69,TSLA,TESLA
70,TSM,TAIWAN_SEMICONDUCTOR
71,V,VISA
72,VZ,VERIZON


In [8]:
# downloding index fund's or stock tickers  #.resample('D').ffill()
how_far_back = df_wide.index.min().date()
today = date.today()
column_names = dict(zip(ticker_df.ticker_name, ticker_df.ticker_label))
column_names['Date']='date'
stock_list = list(ticker_df.ticker_name)
stock_str = ' '.join( stock_list )

index_funds_df = yf.download(stock_str, how_far_back, today, interval = '1d', progress=False)['Close'].reset_index('Date').rename(columns=column_names)

convert_dict = dict(zip(ticker_df.ticker_label, ['float64']*len(ticker_df.ticker_label)))
convert_dict['date'] = 'datetime64[ns]'
index_funds_df = index_funds_df.astype(convert_dict)

print(f'{how_far_back} -> {today}')

2013-07-03 -> 2023-02-02


In [9]:
path_index_funds_merge = f'../Sentiment_Analysis/Stock_Market/data/merge/all_merged_index_funds' # Create Folders
if not os.path.exists(path_index_funds_merge):
    os.makedirs(path_index_funds_merge)
index_funds_df.to_csv(path_index_funds_merge +'/all_merged_index_funds.csv', index=False) # Export to csv
index_funds_df.head(5)

Unnamed: 0,date,APPLE,ABBVIE,AIRBNB,ADOBE,AMD,AMAZON,ARK_INNOVATION,ASML_Holding,BROADCOM,BOEING,ALIBABA,BandG_Foods,Biogen,CATERPILLAR,COSTCO,SALESFORCE,CROWDSTRIKE,CISCO,CHEVRON,DANAHER,DISNEY,DEVON_ENERGY,EBAY,ESTEE_LAUDER,ETSY,GENERAL_ELECTRIC,GOOGLE,HALLIBURTON,HONEYWELL,IBM,JNJ,JPMORGAN,KINDER_MORGAN,ELI_LILLY,LEMONADE,MASTERCARD,MICROCHIP,META,MARVELL,MORGAN_STANLEY,MICROSOFT,NETFLIX,NIKE,Service_Now,NUCOR,NVIDIA,NVE,REALTY_INCOME,OKTA,ORACLE,PALO_ALTO,UIPATH,PROCTER_GAMBLE,PAYPAL,QUALCOMM,ROBLOX,STARBUCKS,SHOPIFY,SNOWFLAKE,SPLUNK,SQUARE_BLOCK,CONSTELLATION_BRANDS,SKYWORKS,TELADOC,ATLASSIAN,TESLA,TAIWAN_SEMICONDUCTOR,VISA,VERIZON,WALMART,DOW_JONES,SandP_500,NASDAQ,RUSSEL
0,2013-07-03,15.028571,42.23,,46.419998,4.06,14.2015,,79.68,38.59,102.889999,,34.220001,217.5,81.849998,110.910004,38.549999,,24.59,119.080002,48.241093,63.610001,53.5,22.377947,67.25,,137.572815,22.077995,42.650002,74.849045,184.751434,86.779999,52.77,38.419998,50.139999,,58.618,18.9,24.52,11.93,24.040001,34.009998,31.558571,31.43,41.740002,43.549999,3.5325,46.98,42.015503,,30.700001,14.3,,78.57,,60.91,,33.650002,,,48.41,,50.459999,21.809999,,,7.682667,18.120001,46.759998,51.009998,74.760002,14988.370117,1615.410034,3443.669922,991.130005
1,2013-07-05,14.907857,42.720001,,47.0,4.07,14.294,,82.099998,38.810001,104.199997,,34.290001,221.649994,82.139999,111.760002,38.939999,,24.57,120.510002,48.976498,63.82,54.560001,22.664141,67.93,,139.554428,22.253838,43.709999,76.326767,186.357559,87.870003,53.990002,38.439999,50.580002,,59.502998,19.115,24.370001,12.03,24.57,34.209999,32.157143,31.82,43.310001,43.849998,3.56,48.080002,41.337208,,31.190001,14.096667,,78.339996,,60.950001,,33.860001,,,49.32,,50.59,21.85,,,8.006,18.209999,47.697498,51.299999,75.209999,15135.839844,1631.890015,3479.379883,1005.390015
2,2013-07-08,14.823214,43.68,,46.630001,4.0,14.5295,,81.529999,37.27,104.370003,,34.330002,218.070007,83.199997,112.620003,38.43,,24.629999,121.239998,48.855194,64.709999,54.82,22.605219,67.919998,,140.034836,22.542753,43.919998,76.908318,186.40535,88.589996,54.700001,38.970001,51.130001,,58.929001,18.719999,24.709999,11.54,24.879999,34.330002,33.299999,31.860001,42.869999,43.880001,3.5425,48.560001,41.327518,,31.65,14.28,,78.760002,,59.990002,,34.145,,,48.25,,50.970001,20.99,,,8.107333,17.6,47.025002,51.169998,76.709999,15224.69043,1640.459961,3484.830078,1009.25
3,2013-07-09,15.083929,43.32,,47.259998,4.05,14.5765,,82.129997,37.580002,104.68,,34.84,219.259995,85.330002,113.349998,38.720001,,25.16,123.269997,50.507961,64.940002,55.610001,22.933502,66.589996,,141.836304,22.54649,44.310001,77.594749,182.887192,88.879997,54.889999,39.52,51.790001,,59.359001,18.9,25.48,11.73,25.440001,34.349998,35.34,31.965,43.290001,44.790001,3.56,48.330002,41.976746,,31.52,14.363333,,79.57,,59.389999,,34.064999,,,48.650002,,51.259998,21.35,,,8.23,17.84,46.799999,50.959999,77.029999,15300.339844,1652.319946,3504.26001,1018.049988
4,2013-07-10,15.026071,43.91,,47.25,3.98,14.6165,,83.849998,38.330002,105.5,,34.810001,219.699997,84.959999,113.650002,39.740002,,25.41,122.900002,51.402576,64.910004,55.27,23.097643,67.029999,,141.355911,22.56517,44.130001,77.423141,183.79541,89.239998,54.830002,39.32,51.419998,,59.091,19.014999,25.799999,11.71,25.34,34.700001,34.831429,31.799999,43.470001,44.16,3.6075,48.509998,41.850777,,31.23,14.74,,79.800003,,60.439999,,34.049999,,,49.810001,,51.23,21.469999,,,8.151333,17.84,46.674999,50.540001,76.769997,15291.660156,1652.619995,3520.76001,1020.419983


In [11]:
# Merging the probabilities of words used from twitter and database of index funds
df_merge = pd.merge(index_funds_df, df_wide, how='inner', on='date').set_index('date').fillna(0)
df_merge_original = df_merge.copy()

columns = list(ticker_df.ticker_label) + ['favorite_count', 'retweet_count']
df_merge = normalize_columns(df_merge.copy(), columns)
df_merge.tail(5)

Unnamed: 0_level_0,APPLE,ABBVIE,AIRBNB,ADOBE,AMD,AMAZON,ARK_INNOVATION,ASML_Holding,BROADCOM,BOEING,ALIBABA,BandG_Foods,Biogen,CATERPILLAR,COSTCO,SALESFORCE,CROWDSTRIKE,CISCO,CHEVRON,DANAHER,DISNEY,DEVON_ENERGY,EBAY,ESTEE_LAUDER,ETSY,GENERAL_ELECTRIC,GOOGLE,HALLIBURTON,HONEYWELL,IBM,JNJ,JPMORGAN,KINDER_MORGAN,ELI_LILLY,LEMONADE,MASTERCARD,MICROCHIP,META,MARVELL,MORGAN_STANLEY,MICROSOFT,NETFLIX,NIKE,Service_Now,NUCOR,NVIDIA,NVE,REALTY_INCOME,OKTA,ORACLE,PALO_ALTO,UIPATH,PROCTER_GAMBLE,PAYPAL,QUALCOMM,ROBLOX,STARBUCKS,SHOPIFY,SNOWFLAKE,SPLUNK,SQUARE_BLOCK,CONSTELLATION_BRANDS,SKYWORKS,TELADOC,ATLASSIAN,TESLA,TAIWAN_SEMICONDUCTOR,VISA,VERIZON,WALMART,DOW_JONES,SandP_500,NASDAQ,RUSSEL,favorite_count,retweet_count,BillGates_twitter,DanZanger_twitter,DipFinding_twitter,EconguyRosie_twitter,GRDecter_twitter,HindenburgRes_twitter,JeffBezos_twitter,JehoshaphatRsch_twitter,KennethLFisher_twitter,MITickWatcher_twitter,MacroCharts_twitter,MarketMagnifier_twitter,OptionsProVol_twitter,RayDalio_twitter,ResearchGrizzly_twitter,TwentyonTwenty__twitter,WatcherGuru_twitter,andrewrsorkin_twitter,biancoresearch_twitter,bsc_daily_twitter,eWhispers_twitter,jimcramer_twitter,lei_zhang_lz_twitter,markminervini_twitter,muddywatersre_twitter,prrobbins_twitter,realwillmeade_twitter,script_crypto_twitter,techbudsolution_twitter
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1
2023-01-26,0.772219,0.79236,0.504612,0.498741,0.458793,0.493683,0.244412,0.746964,0.881396,0.340615,0.380179,0.056627,0.361878,0.988288,0.779561,0.466232,0.356334,0.642726,0.998057,0.765754,0.346492,0.81401,0.479967,0.678854,0.453134,0.286858,0.602293,0.524276,0.844161,0.444339,0.829482,0.738185,0.249494,0.918223,0.081142,0.942301,0.842662,0.343287,0.436364,0.85949,0.695121,0.504917,0.658291,0.638471,0.960794,0.58895,0.312465,0.731316,0.239873,0.810555,0.74205,0.171053,0.760166,0.261919,0.621925,0.266998,0.813487,0.285875,0.379333,0.335703,0.288102,0.845771,0.508496,0.093977,0.34571,0.379935,0.622048,0.873728,0.184132,0.828411,0.87058,0.768596,0.639677,0.63756,0.304895,0.311451,0.0,0.0,0.203219,0.005643,0.028303,4.2e-05,0.0,0.0,0.012057,0.30664,0.0,0.0,0.019858,0.009083,0.0,0.020352,0.005561,0.0,0.004433,0.012741,0.013053,0.010569,0.0,0.006002,0.0,0.003408,0.0,0.004414,0.0
2023-01-27,0.784012,0.783922,0.53468,0.506341,0.460291,0.511188,0.257951,0.726628,0.869505,0.336101,0.373274,0.0727,0.355498,1.0,0.789584,0.464132,0.355925,0.646615,0.935739,0.766177,0.345358,0.807936,0.47732,0.670943,0.464114,0.299653,0.614252,0.517361,0.836504,0.443731,0.822908,0.740985,0.25094,0.900156,0.082669,0.932807,0.834433,0.355701,0.434689,0.863626,0.695634,0.498706,0.658291,0.632093,0.940749,0.605999,0.303727,0.738759,0.244636,0.79904,0.745468,0.179511,0.754134,0.265225,0.618097,0.280211,0.81629,0.296226,0.396526,0.33425,0.29747,0.847145,0.503673,0.100054,0.358523,0.423715,0.620847,0.906139,0.197979,0.839001,0.871881,0.771781,0.648342,0.643202,0.21019,0.211341,5.2e-05,0.0,0.099267,0.0,0.036684,0.0,0.0,0.0,0.036613,0.172977,0.0,0.0,0.016358,0.001659,0.0,0.033853,0.007567,0.0,0.007241,0.02493,0.012123,0.007249,0.008072,0.0,0.002266,0.002507,0.0,0.001991,0.006577
2023-01-30,0.766472,0.779176,0.504888,0.495012,0.441887,0.501393,0.245945,0.70559,0.854558,0.331761,0.350634,0.074926,0.341145,0.985288,0.789564,0.464979,0.34576,0.639982,0.89651,0.754811,0.331538,0.76839,0.4755,0.67231,0.453673,0.28496,0.592958,0.514767,0.821755,0.452953,0.760857,0.731187,0.233015,0.895265,0.083488,0.924201,0.806058,0.342616,0.407656,0.853286,0.678164,0.487102,0.650361,0.607417,0.937351,0.569569,0.320429,0.726806,0.237199,0.776559,0.734499,0.17117,0.75871,0.258127,0.606001,0.263732,0.809606,0.279132,0.380502,0.325793,0.283134,0.835631,0.489204,0.094554,0.342567,0.395803,0.619486,0.89487,0.213323,0.827827,0.860031,0.755186,0.630275,0.625915,0.46185,0.65419,0.0,3.9e-05,0.062655,0.0,0.033419,0.004565,0.000381,0.0,0.018301,0.260436,0.0,0.0,0.018486,0.00557,0.0,0.093974,0.007026,4e-06,0.007052,0.10441,0.016406,0.018881,0.007552,0.002572,0.020976,0.004585,0.071158,0.004302,0.002374
2023-01-31,0.774194,0.794997,0.512405,0.505766,0.458731,0.516347,0.255013,0.71856,0.860135,0.341396,0.347481,0.072206,0.358341,0.940715,0.805351,0.476843,0.361212,0.650274,0.895166,0.760547,0.337916,0.780537,0.485593,0.691421,0.463373,0.282817,0.607771,0.527446,0.838701,0.447177,0.775,0.738021,0.240532,0.906423,0.088836,0.922663,0.833582,0.347956,0.421531,0.869026,0.694512,0.488238,0.656924,0.626443,0.955358,0.580925,0.327619,0.728159,0.252279,0.791775,0.739652,0.180451,0.772959,0.264123,0.616799,0.276202,0.817584,0.291435,0.389261,0.336792,0.289983,0.860081,0.495317,0.099817,0.352782,0.412093,0.616284,0.900216,0.232784,0.844539,0.876784,0.773679,0.645396,0.656956,0.230289,0.180541,0.001234,0.0,0.09532,0.0,0.047406,0.0,0.004802,0.0,0.017413,0.222762,0.0,0.0,0.015731,0.000312,0.0,0.033009,0.005985,0.0,0.003765,0.035379,0.013004,0.005635,0.000213,0.001528,0.0,0.000162,0.045088,0.004866,0.005536
2023-02-01,0.781018,0.786333,0.525687,0.52687,0.517936,0.528056,0.266254,0.740349,0.88793,0.34646,0.355742,0.084322,0.360768,0.927406,0.818949,0.491028,0.376902,0.647987,0.87529,0.771525,0.344295,0.762316,0.500484,0.703532,0.470109,0.294082,0.619806,0.521971,0.827654,0.450825,0.78994,0.734974,0.242556,0.901593,0.091127,0.932955,0.883805,0.359559,0.457057,0.878906,0.710348,0.500554,0.67176,0.65474,0.995855,0.623502,0.326181,0.729738,0.260984,0.813571,0.745162,0.200893,0.781383,0.268726,0.652679,0.283848,0.826748,0.297291,0.411108,0.357703,0.297718,0.869889,0.514665,0.104366,0.37175,0.432431,0.632375,0.903538,0.237275,0.852312,0.877098,0.787074,0.663771,0.676344,0.176514,0.151451,0.0,0.0,0.150755,0.012954,0.012935,0.0,0.0,0.0,0.03823,0.246646,0.0,0.0,0.017018,0.00831,0.0,0.02676,0.00496,0.002806,0.021293,0.015187,0.016148,0.017516,0.003758,0.013198,0.0,0.002277,0.017028,0.005556,0.002471


In [12]:
path_twitter_and_index_fund = f'../Sentiment_Analysis/Stock_Market/data/merge/combined'
if not os.path.exists(path_twitter_and_index_fund):
    os.makedirs(path_twitter_and_index_fund)
df_merge.to_csv(path_twitter_and_index_fund +'/index_funds_and_twitter_analysts.csv') # Export to csv

In [13]:
path_twitter_and_index_fund = f'../Sentiment_Analysis/Stock_Market/data/merge/combined'
df_merge = pd.read_csv(path_twitter_and_index_fund +'/index_funds_and_twitter_analysts.csv').set_index('date')
df_merge.head()

Unnamed: 0_level_0,APPLE,ABBVIE,AIRBNB,ADOBE,AMD,AMAZON,ARK_INNOVATION,ASML_Holding,BROADCOM,BOEING,ALIBABA,BandG_Foods,Biogen,CATERPILLAR,COSTCO,SALESFORCE,CROWDSTRIKE,CISCO,CHEVRON,DANAHER,DISNEY,DEVON_ENERGY,EBAY,ESTEE_LAUDER,ETSY,GENERAL_ELECTRIC,GOOGLE,HALLIBURTON,HONEYWELL,IBM,JNJ,JPMORGAN,KINDER_MORGAN,ELI_LILLY,LEMONADE,MASTERCARD,MICROCHIP,META,MARVELL,MORGAN_STANLEY,MICROSOFT,NETFLIX,NIKE,Service_Now,NUCOR,NVIDIA,NVE,REALTY_INCOME,OKTA,ORACLE,PALO_ALTO,UIPATH,PROCTER_GAMBLE,PAYPAL,QUALCOMM,ROBLOX,STARBUCKS,SHOPIFY,SNOWFLAKE,SPLUNK,SQUARE_BLOCK,CONSTELLATION_BRANDS,SKYWORKS,TELADOC,ATLASSIAN,TESLA,TAIWAN_SEMICONDUCTOR,VISA,VERIZON,WALMART,DOW_JONES,SandP_500,NASDAQ,RUSSEL,favorite_count,retweet_count,BillGates_twitter,DanZanger_twitter,DipFinding_twitter,EconguyRosie_twitter,GRDecter_twitter,HindenburgRes_twitter,JeffBezos_twitter,JehoshaphatRsch_twitter,KennethLFisher_twitter,MITickWatcher_twitter,MacroCharts_twitter,MarketMagnifier_twitter,OptionsProVol_twitter,RayDalio_twitter,ResearchGrizzly_twitter,TwentyonTwenty__twitter,WatcherGuru_twitter,andrewrsorkin_twitter,biancoresearch_twitter,bsc_daily_twitter,eWhispers_twitter,jimcramer_twitter,lei_zhang_lz_twitter,markminervini_twitter,muddywatersre_twitter,prrobbins_twitter,realwillmeade_twitter,script_crypto_twitter,techbudsolution_twitter
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1
2013-07-03,0.000387,0.0,0.0,0.002378,0.015222,0.000884,0.0,0.002722,0.004027,0.0228,0.0,0.571711,0.103869,0.115859,0.001466,0.0,0.0,0.099497,0.484645,0.0,0.019844,0.649075,0.036838,0.00827,0.0,0.632358,0.007596,0.548048,0.0,0.954079,0.011653,0.020171,0.822203,0.007612,0.0,0.0,0.000497,0.0,0.048086,0.026999,0.009168,0.0,0.001265,0.000227,0.102806,0.0,0.041256,0.145979,0.0,0.0,0.003248,0.0,0.109308,0.0,0.122676,0.0,0.003719,0.0,0.0,0.09106,0.0,0.00218,0.00258,0.0,0.0,0.001025,0.018974,0.016747,0.586078,0.173047,0.009619,0.0,0.0,0.025124,0.00031,0.006293,0.001909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-07-09,0.000718,0.008212,0.0,0.003683,0.01516,0.003058,0.0,0.00574,0.002444,0.02798,0.0,0.587043,0.109971,0.132701,0.006367,0.000626,0.0,0.112534,0.515953,0.007977,0.02927,0.677554,0.04603,0.006121,0.0,0.658461,0.01121,0.571964,0.017233,0.935187,0.03257,0.037626,0.854004,0.012656,0.0,0.002191,0.000497,0.002684,0.045694,0.043084,0.010258,0.005728,0.004923,0.002575,0.111232,8.3e-05,0.056188,0.145105,0.0,0.011241,0.003571,0.0,0.119709,0.0,0.112288,0.0,0.008194,0.0,0.0,0.092305,0.0,0.00597,0.0,0.0,0.0,0.002384,0.016732,0.01694,0.584207,0.195103,0.023785,0.011603,0.004803,0.043203,0.00065,0.004955,0.00051,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-07-10,0.000372,0.012657,0.0,0.003668,0.014723,0.003289,0.0,0.007858,0.003619,0.030352,0.0,0.586301,0.111496,0.13091,0.00697,0.004385,0.0,0.118253,0.513188,0.011125,0.029057,0.672965,0.048746,0.007553,0.0,0.65552,0.011355,0.56937,0.016156,0.944391,0.036155,0.037132,0.848222,0.011525,0.0,0.001399,0.002128,0.003579,0.045455,0.041935,0.01138,0.004958,0.003794,0.002848,0.106951,0.000227,0.058179,0.142264,0.0,0.007265,0.005493,0.0,0.122101,0.0,0.119464,0.0,0.008032,0.0,0.0,0.098324,0.0,0.005828,0.000673,0.0,0.0,0.002189,0.016732,0.016338,0.568488,0.192577,0.02339,0.011697,0.006112,0.044795,0.002203,0.017773,0.001318,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-07-15,0.001807,0.018006,0.0,0.00502,0.017344,0.007416,0.0,0.01164,0.003118,0.030815,0.0,0.604105,0.127548,0.142719,0.011469,0.012638,0.0,0.130146,0.526489,0.012725,0.036286,0.684168,0.059679,0.008888,0.0,0.658829,0.014948,0.569082,0.022678,0.961345,0.047709,0.037626,0.876843,0.011892,0.0,0.002866,0.011634,0.004921,0.048565,0.057445,0.016092,0.008022,0.00294,0.005559,0.114561,0.000363,0.062382,0.18597,0.0,0.017957,0.009898,0.0,0.139886,0.0,0.125205,0.0,0.016441,0.0,0.0,0.101645,0.0,0.010661,0.003309,0.0,0.0,0.003015,0.021215,0.020612,0.546781,0.195103,0.032136,0.02109,0.012987,0.06016,0.000688,0.009352,0.007114,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013-07-16,0.002397,0.011904,0.0,0.004041,0.017531,0.007503,0.0,0.01265,0.004465,0.026677,0.0,0.615727,0.122417,0.146445,0.012754,0.012122,0.0,0.125114,0.522304,0.011845,0.029908,0.685383,0.062186,0.006739,0.0,0.651476,0.013972,0.562167,0.021062,0.959891,0.047709,0.038614,0.860653,0.010241,0.0,0.001068,0.013336,0.005033,0.047249,0.053883,0.016412,0.008563,0.000718,0.003636,0.111028,0.000424,0.063931,0.186407,0.0,0.01782,0.009387,0.0,0.134269,0.0,0.129101,0.0,0.015686,0.0,0.0,0.098895,0.0,0.011324,0.007795,0.0,0.0,0.0,0.021455,0.019564,0.558757,0.198407,0.030664,0.019128,0.012275,0.057105,0.000354,0.00435,0.002871,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Prediction of Today 
* ( Between 0 & 1)

In [14]:
# Todays Data
todays_test = download_todays_test(ticker_df, df_wide, df_merge_original)
Xnew = sm.add_constant(todays_test, has_constant='add')

model = {} # Model Build For Each index fund
print(f"date: { todays_test.index.date.max() }")
output = pd.DataFrame(columns=['index', 'prediction'])
for t in ticker_df.ticker_label:
    data_with_target = create_target(df_merge.copy(), day = 5, ticker = t)
    m = linear_model(data_with_target,split=0.20,summary = False)
    y_pred = m['lm'].predict(Xnew)
    model[t] = (y_pred, m)
    output = pd.concat([output, pd.DataFrame.from_records([(t, y_pred[0])], columns=['index', 'prediction'])])
    
pd.set_option('display.max_rows', 500)
display(output.sort_values(by=['prediction'], ascending=False))

AttributeError: 'DataFrame' object has no attribute 'date'