In [2]:
%matplotlib inline
import pandas as pd
import numpy as np
import os
# Import the main functionality from the SimFin Python API.
import simfin as sf

# Import names used for easy access to SimFin's data-columns.
from simfin.names import *

import yfinance as yf
import yahoo_fin.stock_info as si
# import pandas_datareader.data as web

from dateutil.relativedelta import relativedelta
from datetime import datetime

### Part.1 Create Target T0_5 and T1_5

In [None]:
#load daily price data and earning dates data
df_dailyprice=pd.read_csv('data/sp500_dailyprice.csv')
df_earningdate=pd.read_csv('data/sp500_earningdate.csv')
# df_dailyprice[df_dailyprice['ticker']=='TSLA']

In [None]:
df_dailyprice.rename(columns={'adjclose':'T0'},inplace=True)
df_target=df_dailyprice[['date','ticker','T0']]
df_target['T0_pre1']=df_target.groupby('ticker')['T0'].shift(1)
df_target['T1']=df_target.groupby('ticker')['T0'].shift(-1)
df_target['T5']=df_target.groupby('ticker')['T0'].shift(-5)
df_target['T0_5']=(df_target['T5']-df_target['T0'])/df_target['T0']
df_target['T1_5']=(df_target['T5']-df_target['T1'])/df_target['T1']
df_target['T0_fromlastday']=(df_target['T0']-df_target['T0_pre1'])/df_target['T0_pre1']
df_target=df_target.drop(columns=['T1','T5'])
df_target.dropna(inplace=True)
# df_target.iloc[1500:1520]

In [None]:
df_target=pd.merge(df_earningdate[['Ticker','Earning Date']]
                   , df_target
                   , how='left'
                   , left_on=['Ticker','Earning Date']
                   , right_on=['ticker','date'])
df_target.drop(columns=['ticker','date'],inplace=True)

In [None]:
df_target.to_csv("data/target.csv",index=False)

### Part.2 Create Technical indicators

In [13]:
#load daily price data and earning dates data
df_dailyprice=pd.read_csv('data/sp500_dailyprice.csv')
df_earningdate=pd.read_csv('data/sp500_earningdate.csv')

In [4]:
# Simple Moving Average 
def SMA(data, ndays): 
    SMA = pd.Series(data['adjclose'].rolling(ndays).mean(), name = 'SMA_' + str(ndays)) 
    data = data.join(SMA) 
    return data

# Exponentially-weighted Moving Average 
def EWMA(data, ndays): 
    EMA = pd.Series(data['close'].ewm(span = ndays, min_periods = ndays - 1).mean(), 
                 name = 'EWMA_' + str(ndays)) 
    data = data.join(EMA) 
    return data

In [5]:
# Compute the Bollinger Bands 
def BBANDS(data, window):
    MA = data.adjclose.rolling(window).mean()
    SD = data.adjclose.rolling(window).std()
    data['MiddleBand'] = MA
    data['UpperBand'] = MA + (2 * SD) 
    data['LowerBand'] = MA - (2 * SD)
    return data

In [6]:
# Returns RSI values
def rsi(data, periods = 14):
    
    close_delta = data['adjclose'].diff()

    # Make two series: one for lower closes and one for higher closes
    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)
    
    ma_up = up.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
    ma_down = down.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()

    rsi = ma_up / ma_down
    rsi = 100 - (100/(1 + rsi))
    data['RSI']=rsi
    return data

In [7]:
# Calculate money flow index
def gain(x):
    return ((x > 0) * x).sum()
def loss(x):
    return ((x < 0) * x).sum()
def mfi(high, low, close, volume, n=14):
    typical_price = (high + low + close)/3
    money_flow = typical_price * volume
    mf_sign = np.where(typical_price > typical_price.shift(1), 1, -1)
    signed_mf = money_flow * mf_sign
    mf_avg_gain = signed_mf.rolling(n).apply(gain, raw=True)
    mf_avg_loss = signed_mf.rolling(n).apply(loss, raw=True)
    return (100 - (100 / (1 + (mf_avg_gain / abs(mf_avg_loss))))).to_numpy()

In [8]:
def createratio(data, column1, column2): 
    ratio = pd.Series(data[column1]/data[column2], name = column1+'_'+column2) 
    data = data.join(ratio) 
    return data

In [14]:
df_dailyprice=SMA(df_dailyprice,12)
df_dailyprice=SMA(df_dailyprice,30)
df_dailyprice=EWMA(df_dailyprice,12)
df_dailyprice=EWMA(df_dailyprice,30)
df_dailyprice=BBANDS(df_dailyprice,30)
df_dailyprice=rsi(df_dailyprice)
df_dailyprice['MFI']=mfi(df_dailyprice['high'],df_dailyprice['low'],df_dailyprice['adjclose'],df_dailyprice['volume'])

In [15]:
for column in ['SMA_12','SMA_30','EWMA_12','EWMA_30','UpperBand','LowerBand']:
    df_dailyprice=createratio(df_dailyprice,'adjclose',column)

df_dailyprice=createratio(df_dailyprice,'SMA_12','SMA_30')
df_dailyprice=createratio(df_dailyprice,'EWMA_12','EWMA_30')
df_dailyprice=createratio(df_dailyprice,'UpperBand','LowerBand')

In [16]:
df_dailyprice.drop(columns=['open','MiddleBand','high', 'low', 'close', 'volume','adjclose','SMA_12','SMA_30','EWMA_12','EWMA_30','UpperBand','LowerBand'],inplace=True)

In [18]:
df_techind=pd.merge(df_earningdate[['Ticker','Earning Date']]
                   , df_dailyprice
                   , how='left'
                   , left_on=['Ticker','Earning Date']
                   , right_on=['ticker','date'])
df_techind.drop(columns=['ticker','date'],inplace=True)
df_techind.dropna(inplace=True)

In [None]:
df_techind.to_csv("data/techind.csv",index=False)