In [132]:
#A notebook that gets daily closing prices, calculates log returns, alpha, beta, and Sharpe Ratio
#TODO: Scrape earnings reports so we have some more graphs to work with
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from datetime import datetime
import twint
%matplotlib inline 

In [30]:
def createDataFrame(stockList, startDate, endDate):
    stockDataFrame = pd.DataFrame(columns = stockList)
    for stock in stockList:
        stockDataFrame[stock] = yf.download(stock, start=startDate, end=endDate, progress=False)['Close']
    return stockDataFrame

def logReturns(stockDataFrame):
    for stock in stockDataFrame.columns:
        stockDataFrame[stock] = np.log(stockDataFrame[stock]) - np.log(stockDataFrame[stock].shift(1))
    stockDataFrame.dropna(inplace=True)
    return stockDataFrame

#We use rolling alpha, beta in this case. Comparing with the SP500 for linear regression
def marketAlphaBeta(logReturnDF, benchmarkDF):
    alphaDataFrame = pd.DataFrame(columns = logReturnDF.columns, index=logReturnDF.index)
    betaDataFrame = pd.DataFrame(columns = logReturnDF.columns, index=logReturnDF.index)
    obs = logReturnDF.shape[0]
    lagWindow = 30
    for i in range((obs-lagWindow)):
        for stock in logReturnDF.columns:
            regressor = LinearRegression()
            regressor.fit(benchmarkDF['^GSPC'].to_numpy()[i : i +lagWindow+1].reshape(-1,1), logReturnDF[stock].to_numpy()[i : i +lagWindow+1])
            betaDataFrame[stock][i+lagWindow]  = regressor.coef_[0]
            alphaDataFrame[stock][i+lagWindow]  = regressor.intercept_
    alphaDataFrame.dropna(inplace=True)
    betaDataFrame.dropna(inplace=True)
    return alphaDataFrame, betaDataFrame

#We use rolling Sharpe ratio in this case. We use 10 year Treasury Note (^TNX) yield as "risk-free" rate
def rollingSharpeRatio(logReturnDF, logBenchmark):
    sharpeDataFrame = pd.DataFrame(columns = logReturnDF.columns, index = logReturnDF.index)
    obs = logReturnDF.shape[0]
    lagWindow = 60
    for i in range((obs-lagWindow)):
        for stock in logReturnDF.columns:
            netReturn = logReturnDF[stock][i : i +lagWindow+1].mean() - logBenchmark['^TNX'][i : i +lagWindow+1].mean()
            stdDev = logReturnDF[stock][i : i +lagWindow+1].std()
            sharpeDataFrame[stock][i+lagWindow]  = netReturn/stdDev
    sharpeDataFrame.dropna(inplace=True)
    return sharpeDataFrame

In [129]:
#Variables that we can modify to get our data
memeStocks = ['AAPL','GOOG','TSLA','KO','OXY','BAC']
benchmarks = ['^GSPC','^TNX']
startDate = '2022-3-24'
endDate = '2023-3-24'

In [130]:
#Run once variables are filled out
newFrame = createDataFrame(memeStocks, startDate, endDate)
newFrame.to_csv("stockPrices.csv")
benchmarkFrame = createDataFrame(benchmarks, startDate, endDate)
benchmarkFrame.to_csv("benchmarkPrices.csv")
logDataFrame = logReturns(newFrame)
logDataFrame.to_csv("logReturnsStock.csv")
logBenchmark = logReturns(benchmarkFrame)
logBenchmark.to_csv("logReturnsBenchmark.csv")
alphaFrame, betaFrame = marketAlphaBeta(logDataFrame, logBenchmark)
alphaFrame.to_csv("alphas.csv")
betaFrame.to_csv("betas.csv")
sharpeDataFrame = rollingSharpeRatio(logDataFrame, logBenchmark)
sharpeDataFrame.to_csv("sharpeRatios.csv")
newFrame = createDataFrame(memeStocks, startDate, endDate)

In [131]:
df = pd.merge(newFrame, benchmarkFrame, left_index=True, right_index=True)
df['Date'] = df.index
df = pd.melt(df, id_vars=[i for i in df.columns.values if i not in memeStocks], value_vars=memeStocks, var_name='Ticker', value_name='Price')
df = df.set_index('Date')
logDataFrame['Date'] = logDataFrame.index
log_melted = pd.melt(logDataFrame, id_vars=['Date'],value_vars=memeStocks, var_name='Ticker', value_name='Price')
log_melted = log_melted.set_index('Date')
pd.merge(df, log_melted, left_index=True, right_index=True, how='outer',suffixes=['Stock','LogReturn'])

Unnamed: 0_level_0,^GSPC,^TNX,TickerStock,PriceStock,TickerLogReturn,PriceLogReturn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-03-25,0.005053,0.062507,AAPL,174.720001,AAPL,0.003727
2022-03-25,0.005053,0.062507,AAPL,174.720001,GOOG,0.001481
2022-03-25,0.005053,0.062507,AAPL,174.720001,TSLA,-0.003240
2022-03-25,0.005053,0.062507,AAPL,174.720001,KO,0.008979
2022-03-25,0.005053,0.062507,AAPL,174.720001,OXY,0.016487
...,...,...,...,...,...,...
2023-03-23,0.002980,-0.027224,BAC,26.969999,GOOG,0.019385
2023-03-23,0.002980,-0.027224,BAC,26.969999,TSLA,0.005582
2023-03-23,0.002980,-0.027224,BAC,26.969999,KO,-0.002167
2023-03-23,0.002980,-0.027224,BAC,26.969999,OXY,-0.003414


In [133]:
# import nest_asyncio
# nest_asyncio.apply()

In [145]:
# i=0
# dfs=[]
# dates = pd.date_range(start=startDate, end=endDate).values
# for ticker in memeStocks:
#     for i in range(0,len(dates[:10])):
#         c = twint.Config()
#         c.Search = [ticker,"stock"]
#         c.Limit = 60
#         c.Lang = 'en'
#         c.Verified = True
#         # c.Since = '2021-01-30 01:00:00'
#         print(dates[i])
#         c.Until = dates[i]
#         c.Pandas = True
