In [26]:
import pandas as pd
import datetime
import numpy as np

In [27]:
# importing tweet csv files and cleaning them
sonos_tweets_df = pd.read_csv('sonos_tweets.csv')
sonos_tweets_df['time_stamp'] = pd.to_datetime(sonos_tweets_df['time_stamp'])
sonos_tweets_df['time_stamp'] = sonos_tweets_df['time_stamp'].dt.round('30min')
sonos_tweets_df_ = sonos_tweets_df.set_index(['time_stamp'])

# reading stock.csv and cleaning the df
running_stocks_thirty = pd.read_csv('stock.csv', index_col = 0)
running_stocks_thirty.index = pd.to_datetime(running_stocks_thirty.index)

# updating stock data
running_stocks_thirty = running_stocks_thirty[running_stocks_thirty.index.day >= 24].sort_index()
running_stocks_thirty['increase_decrease'] = running_stocks_thirty['4. close'] - running_stocks_thirty['1. open']
running_stocks_thirty['up_down'] = np.where(running_stocks_thirty['increase_decrease']>=0, 1, 0)
running_stocks_thirty['movement(%)'] = np.round(((running_stocks_thirty['4. close'] - running_stocks_thirty['1. open'])/running_stocks_thirty['4. close'])*100, 2)

# reading tech indicators
sonos_tech_indic = pd.read_csv('sonos_tech_indicators.csv', index_col = 0 )
sonos_tech_indic.index = pd.to_datetime(sonos_tech_indic.index)
sonos_tech_indic_ = sonos_tech_indic[sonos_tech_indic.index.day >= 24]
sonos_tech_indic_df = sonos_tech_indic_[sonos_tech_indic_.index.day < 30]

In [28]:
# drop all the tweets before the 24th
sonos_tweets_df = sonos_tweets_df_[sonos_tweets_df_.index.day >= 24]
sonos_tweets_master_df = sonos_tweets_df.drop(['t_id', 'text', 'Unnamed: 0'], axis = 1)

In [29]:
sonos_tweets_df_24 = sonos_tweets_master_df[sonos_tweets_master_df.index.day ==24]
sonos_tweets_df_25 = sonos_tweets_master_df[sonos_tweets_master_df.index.day ==25]
sonos_tweets_df_26 = sonos_tweets_master_df[sonos_tweets_master_df.index.day ==26]
sonos_tweets_df_27 = sonos_tweets_master_df[sonos_tweets_master_df.index.day ==27]
sonos_tweets_df_28 = sonos_tweets_master_df[sonos_tweets_master_df.index.day ==28]
sonos_tweets_df_29 = sonos_tweets_master_df[sonos_tweets_master_df.index.day ==29]

In [30]:
# seperate during/after market hrs
def during_after(df):
    df_during = df.between_time('9:30', "15:30")
    df_after = df.between_time('15:30', "9:30")
    return df_during, df_after

In [31]:
open_hours_tweets_24, after_hour_tweets_24 = during_after(sonos_tweets_df_24)
open_hours_tweets_25, after_hour_tweets_25 = during_after(sonos_tweets_df_25)
open_hours_tweets_26, after_hour_tweets_26 = during_after(sonos_tweets_df_26)
open_hours_tweets_27, after_hour_tweets_27 = during_after(sonos_tweets_df_27)
open_hours_tweets_28, after_hour_tweets_28 = during_after(sonos_tweets_df_28)
open_hours_tweets_29, after_hour_tweets_29 = during_after(sonos_tweets_df_29)

In [32]:
# aggregate the scores 
def aggregate(df):
    sent_avg = df.groupby(df.index).agg({'vader_sentiment': 'mean'}).rename(columns = {'vader_sentiment': 'sentiment_avg_30'}).shift()
    sent_count = df.groupby(df.index).agg({'sentiment': 'value_counts'}).unstack().rename(columns = {'sentiment': 'sentiment_30'}).shift()
    df_ = pd.concat([sent_avg, sent_count], axis = 1)
    df_ini = df_.fillna(df_.mean())
    df_hour_i = df_ini.shift().rolling(window=2, center = True, min_periods=2).mean().rename(columns = {'sentiment_avg_30': 'sentiment_avg_60'})
    df_hour = df_hour_i.fillna(df_hour_i.mean())
    df_two_hours_i = df_ini.shift(2).rolling(window=4, center = True, min_periods=4).mean().rename(columns = {'sentiment_avg_30': 'sentiment_avg_120'})
    df_two_hours = df_two_hours_i.fillna(df_two_hours_i.mean())
    df_all = pd.concat([df_ini, df_hour, df_two_hours], axis = 1)
    return df_all

In [33]:
open_hours_tweets_24 = aggregate(open_hours_tweets_24)
open_hours_tweets_25 = aggregate(open_hours_tweets_25)
open_hours_tweets_26 = aggregate(open_hours_tweets_26)
open_hours_tweets_27 = aggregate(open_hours_tweets_27)
open_hours_tweets_28 = aggregate(open_hours_tweets_28)
open_hours_tweets_29 = aggregate(open_hours_tweets_29)

In [34]:
# seperate night-before and morning-day-of market hrs
def night_morning(df):
    df_night = df.between_time('15:30', "0:00")
    df_morning = df.between_time('0:00', "9:30")
    df_night_avg = df_night.groupby(df_night.index.day).mean()
    df_night_ct = df_night.groupby(df_night.index.day).agg({'sentiment': 'value_counts'}).unstack()
    night_df = pd.concat([df_night_avg, df_night_ct], axis = 1)
    night_df.columns = ['vader_sentiment_night', 'sent_neg, night_before', 'sent_neu, night_before', 'sent_pos, night_before']
    df_morning_avg = df_morning.groupby(df_morning.index.day).mean()
    df_morning_ct = df_morning.groupby(df_morning.index.day).agg({'sentiment': 'value_counts'}).unstack()
    morning_df = pd.concat([df_morning_avg, df_morning_ct], axis = 1)
#     morning_df.columns = ['vader_sentiment_morning', 'sent_neg, morning', 'sent_neu, morning', 'sent_pos, morning']
    pd.to_datetime(morning_df.index)
    pd.to_datetime(night_df.index)
    return night_df, morning_df

In [35]:
sonos_tweets_df_23 = sonos_tweets_df_[sonos_tweets_df_.index.day == 23]
sonos_tweets_23_df = sonos_tweets_df_23.drop(['t_id', 'text', 'Unnamed: 0'], axis = 1)
# sonos_tweets_23_night = sonos_tweets_23_df.between_time('15:30', "9:30")
after_hour_tweets_24_night, after_hour_tweets_23_morning = night_morning(sonos_tweets_23_df)

In [36]:
after_hour_tweets_25_night, after_hour_tweets_24_morning = night_morning(after_hour_tweets_24)
after_hour_tweets_26_night, after_hour_tweets_25_morning = night_morning(after_hour_tweets_25)
after_hour_tweets_27_night, after_hour_tweets_26_morning = night_morning(after_hour_tweets_26)
after_hour_tweets_28_night, after_hour_tweets_27_morning = night_morning(after_hour_tweets_27)
after_hour_tweets_29_night, after_hour_tweets_28_morning = night_morning(after_hour_tweets_28)
after_hour_tweets_30_night, after_hour_tweets_29_morning = night_morning(after_hour_tweets_29)

In [37]:
# update the index of the night df to the next day
after_hour_tweets_24_night.index = [24]
after_hour_tweets_25_night.index = [25]
after_hour_tweets_26_night.index = [26]
after_hour_tweets_27_night.index = [27]
after_hour_tweets_28_night.index = [28]
after_hour_tweets_29_night.index = [29]

In [38]:
night_morning_24 = pd.concat([after_hour_tweets_24_night, after_hour_tweets_24_morning], axis = 1)
night_morning_25 = pd.concat([after_hour_tweets_25_night, after_hour_tweets_25_morning], axis = 1)
night_morning_26 = pd.concat([after_hour_tweets_26_night, after_hour_tweets_26_morning], axis = 1)
night_morning_27 = pd.concat([after_hour_tweets_27_night, after_hour_tweets_27_morning], axis = 1)
night_morning_28 = pd.concat([after_hour_tweets_28_night, after_hour_tweets_28_morning], axis = 1)
night_morning_29 = pd.concat([after_hour_tweets_29_night, after_hour_tweets_29_morning], axis = 1)

In [39]:
night_morning_28[night_morning_24.columns[-3]] = [0]
night_morning_28[night_morning_24.columns[-2]] = [0]
night_morning_25[night_morning_24.columns[-2]] = [0]

In [None]:
night_morning_28

In [None]:
list_of_columns

In [None]:
np.full((len(open_hours_tweets_24),1), night_morning_24[list_of_columns[-1]].values[0])

In [None]:
night_morning_2

In [40]:
list_of_columns = list(night_morning_25.columns)
def concat_main_night_morning(open_hour_df, night_morning_df):
    for i in range(0,len(list_of_columns)):
        open_hour_df[list_of_columns[i]] = np.full((len(open_hour_df),1), night_morning_df[list_of_columns[i]].values[0])

In [41]:
concat_main_night_morning(open_hours_tweets_24, night_morning_24)
concat_main_night_morning(open_hours_tweets_25, night_morning_25)
concat_main_night_morning(open_hours_tweets_26, night_morning_26)
concat_main_night_morning(open_hours_tweets_27, night_morning_27)
concat_main_night_morning(open_hours_tweets_28, night_morning_28)
concat_main_night_morning(open_hours_tweets_29, night_morning_29)

TypeError: Cannot convert bool to numpy.ndarray

In [None]:
sonos_tweets_master = pd.concat([open_hours_tweets_24, open_hours_tweets_25, open_hours_tweets_26, open_hours_tweets_27, open_hours_tweets_28, open_hours_tweets_29])
sonos_tweets_master.columns = ['sentiment_avg_30','coun_neg, 30', 'coun_neu, 30','coun_pos, 30', 'sentiment_avg_60', 'coun_neg, 60', 'coun_neu, 60','coun_pos, 60','sentiment_avg_120', 'coun_neg, 120', 'coun_neu, 120','coun_pos, 120', 'sentiment_night_before_avg', 'count_neg, night_before', 'count_neu, night_before', 'count_pos, night_before','sentiment_morning_avg', 'count_neg, morning', 'count_neu, morning', 'count_pos, morning']
# take out weekend
sonos_tweets_master = sonos_tweets_master[(sonos_tweets_master.index.day != 25) & (sonos_tweets_master.index.day != 26)]

In [None]:
# put stock and tweet data together
fb_tweets_stocks = pd.concat([running_stocks_thirty, fb_tech_indic_df, fb_tweets_master, fb_articles_df], axis = 1)