In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import yfinance as yf
import os
plt.style.use('ggplot')

In [None]:
twitter_data = pd.read_csv('../DATA/sentiment_data.csv')
twitter_data['date'] = pd.to_datetime(twitter_data['date'])
twitter_data = twitter_data.set_index(['date','symbol'])
twitter_data['engagement_ratio'] = twitter_data['twitterComments'] / twitter_data['twitterLikes']
twitter_data = twitter_data[(twitter_data['twitterLikes']>20)&(twitter_data['twitterComments']>10)]
twitter_data

## 2. Aggregate Monthly and calculate average sentiment for the month

* Aggregate on a monthly level and calculate average monthly metric, for the one we choose.

In [None]:
aggragated_data = (twitter_data.reset_index('symbol').groupby([pd.Grouper(freq='M'),'symbol'])[['engagement_ratio']].mean())

aggragated_data['rank'] = (aggragated_data.groupby(level=0)['engagement_ratio'].transform(lambda x:x.rank(ascending=False)))

aggragated_data

## 3. Select Top 5 Stocks based on their cross-sectional ranking for each month

* Select top 5 stocks by rank for each month and fix the date to start at beginning of next month.

In [None]:
filtered_data = aggragated_data[aggragated_data['rank']<6].copy()
filtered_data = filtered_data.reset_index(level=1)
filtered_data.index = filtered_data.index + pd.DateOffset(1)
filtered_data = filtered_data.reset_index().set_index(['date','symbol'])
filtered_data.head(20)

## 4. Extract the stocks to form portfolios with at the start of each new month

* Create a dictionary containing start of month and corresponded selected stocks.

In [None]:
dates = filtered_data.index.get_level_values('date').unique().tolist()

fixed_dates = {}
for d in dates:
  fixed_dates[d.strftime('%Y-%m-%d')] = filtered_data.xs(d,level=0).index.tolist()
  
fixed_dates

## 5. Download fresh stock prices for only selected/shortlisted stocks

In [None]:
stocks_list = twitter_data.index.get_level_values('symbol').unique().tolist()

prices_data = yf.download(tickers=stocks_list,start='2021-01-01',end='2023-03-01')

## 6. Calculate Portfolio Returns with monthly rebalancing

In [None]:
returns_data = np.log(prices_data['Adj Close']).diff().dropna()

portfolio_data = pd.DataFrame()

for start_date in fixed_dates.keys():
  end_date = (pd.to_datetime(start_date)+pd.offsets.MonthEnd()).strftime('%Y-%m-%d')
  
  cols = fixed_dates[start_date]
  
  temp_data = returns_data[start_date:end_date][cols].mean(axis=1).to_frame('portfolio_return')
  
  portfolio_data = pd.concat([portfolio_data,temp_data],axis=0)
  
portfolio_data

## 7. Download NASDAQ/QQQ prices and calculate returns to compare to our strategy

In [None]:
qqq_data = yf.download(tickers='QQQ',start='2021-01-01',end='2023-03-01')

qqq_returns = np.log(qqq_data['Adj Close']).diff().to_frame('nasdaq_return')

portfolio_data = portfolio_data.merge(qqq_returns,left_index=True,right_index=True)

portfolio_data

In [None]:
import matplotlib.ticker as mtick

In [None]:
portfolios_cumulative_return = np.exp(np.log1p(portfolio_data).cumsum()).sub(1)

portfolios_cumulative_return.plot(figsize=(16,6))

plt.title('Twitter Engagement Ratio Strategy Return Over Time')

plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1))

plt.ylabel('Return')

plt.show()