In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import os
# Import the main functionality from the SimFin Python API.
import simfin as sf

# Import names used for easy access to SimFin's data-columns.
from simfin.names import *

import yfinance as yf
import yahoo_fin.stock_info as si
# import pandas_datareader.data as web

from dateutil.relativedelta import relativedelta
from datetime import datetime

### Part.1 Create Target T0_5 and T1_5

In [None]:
#load daily price data and earning dates data
df_dailyprice=pd.read_csv('data/sp500_dailyprice.csv')
df_earningdate=pd.read_csv('data/sp500_earningdate.csv')
# df_dailyprice[df_dailyprice['ticker']=='TSLA']

In [None]:
df_dailyprice.rename(columns={'adjclose':'T0'},inplace=True)
df_target=df_dailyprice[['date','ticker','T0']]
df_target['T0_pre1']=df_target.groupby('ticker')['T0'].shift(1)
df_target['T1']=df_target.groupby('ticker')['T0'].shift(-1)
df_target['T5']=df_target.groupby('ticker')['T0'].shift(-5)
df_target['T0_5']=(df_target['T5']-df_target['T0'])/df_target['T0']
df_target['T1_5']=(df_target['T5']-df_target['T1'])/df_target['T1']
df_target['T0_fromlastday']=(df_target['T0']-df_target['T0_pre1'])/df_target['T0_pre1']
df_target=df_target.drop(columns=['T1','T5'])
df_target.dropna(inplace=True)
# df_target.iloc[1500:1520]

In [None]:
df_target=pd.merge(df_earningdate[['Ticker','Earning Date']]
                   , df_target
                   , how='left'
                   , left_on=['Ticker','Earning Date']
                   , right_on=['ticker','date'])
df_target.drop(columns=['ticker','date'],inplace=True)

In [None]:
df_target.to_csv("data/target.csv",index=False)

### Part.2 Create Technical indicators

In [None]:
#load daily price data and earning dates data
df_dailyprice=pd.read_csv('data/sp500_dailyprice.csv')
df_earningdate=pd.read_csv('data/sp500_earningdate.csv')

In [None]:
# Simple Moving Average 
def SMA(data, ndays): 
    SMA = pd.Series(data['adjclose'].rolling(ndays).mean(), name = 'SMA_' + str(ndays)) 
    data = data.join(SMA) 
    return data

# Exponentially-weighted Moving Average 
def EWMA(data, ndays): 
    EMA = pd.Series(data['close'].ewm(span = ndays, min_periods = ndays - 1).mean(), 
                 name = 'EWMA_' + str(ndays)) 
    data = data.join(EMA) 
    return data

In [None]:
# Compute the Bollinger Bands 
def BBANDS(data, window):
    MA = data.adjclose.rolling(window).mean()
    SD = data.adjclose.rolling(window).std()
    data['MiddleBand'] = MA
    data['UpperBand'] = MA + (2 * SD) 
    data['LowerBand'] = MA - (2 * SD)
    return data

In [None]:
# Returns RSI values
def rsi(data, periods = 14):
    
    close_delta = data['adjclose'].diff()

    # Make two series: one for lower closes and one for higher closes
    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)
    
    ma_up = up.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
    ma_down = down.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()

    rsi = ma_up / ma_down
    rsi = 100 - (100/(1 + rsi))
    data['RSI']=rsi
    return data

In [None]:
# Calculate money flow index
def gain(x):
    return ((x > 0) * x).sum()
def loss(x):
    return ((x < 0) * x).sum()
def mfi(high, low, close, volume, n=14):
    typical_price = (high + low + close)/3
    money_flow = typical_price * volume
    mf_sign = np.where(typical_price > typical_price.shift(1), 1, -1)
    signed_mf = money_flow * mf_sign
    mf_avg_gain = signed_mf.rolling(n).apply(gain, raw=True)
    mf_avg_loss = signed_mf.rolling(n).apply(loss, raw=True)
    return (100 - (100 / (1 + (mf_avg_gain / abs(mf_avg_loss))))).to_numpy()

In [None]:
def createratio(data, column1, column2): 
    ratio = pd.Series(data[column1]/data[column2], name = column1+'_'+column2) 
    data = data.join(ratio) 
    return data

In [None]:
df_dailyprice=SMA(df_dailyprice,12)
df_dailyprice=SMA(df_dailyprice,30)
df_dailyprice=EWMA(df_dailyprice,12)
df_dailyprice=EWMA(df_dailyprice,30)
df_dailyprice=BBANDS(df_dailyprice,30)
df_dailyprice=rsi(df_dailyprice)
df_dailyprice['MFI']=mfi(df_dailyprice['high'],df_dailyprice['low'],df_dailyprice['adjclose'],df_dailyprice['volume'])

In [None]:
for column in ['SMA_12','SMA_30','EWMA_12','EWMA_30','UpperBand','LowerBand']:
    df_dailyprice=createratio(df_dailyprice,'adjclose',column)

df_dailyprice=createratio(df_dailyprice,'SMA_12','SMA_30')
df_dailyprice=createratio(df_dailyprice,'EWMA_12','EWMA_30')
df_dailyprice=createratio(df_dailyprice,'UpperBand','LowerBand')

In [None]:
df_dailyprice.drop(columns=['open','MiddleBand','high', 'low', 'close', 'volume','adjclose','SMA_12','SMA_30','EWMA_12','EWMA_30','UpperBand','LowerBand'],inplace=True)

In [None]:
df_techind=pd.merge(df_earningdate[['Ticker','Earning Date']]
                   , df_dailyprice
                   , how='left'
                   , left_on=['Ticker','Earning Date']
                   , right_on=['ticker','date'])
df_techind.drop(columns=['ticker','date'],inplace=True)
df_techind.dropna(inplace=True)

In [None]:
df_techind.to_csv("data/techind.csv",index=False)

### Part.3 Append segment features to earning data

In [2]:
#load daily price data and earning dates data
df_dailyprice=pd.read_csv('data/sp500_dailyprice.csv')
df_fundamental=pd.read_csv('data/us_fundamental.csv')
df_earningdate=pd.read_csv('data/sp500_earningdate.csv')

In [3]:
start_date= '2015-01-01'
end_date= '2021-12-30'

In [75]:
# calculate 1-year average return and volatility
sp500 = si.get_data('^GSPC', start_date=start_date, end_date=end_date,index_as_date=False)[['date','close']]
sp500.columns = ['date','SP500']
sp500['date']=sp500['date'].astype(str)
sp500['sp500_r']=sp500['SP500'].pct_change()
sp500['sp500_var']=sp500['sp500_r'].rolling(21).var()

In [76]:
beta_calc=pd.merge(df_dailyprice[['date','ticker','adjclose']]
                   ,sp500
                   ,how='left'
                   ,on='date')

In [77]:
beta_calc['ticker_r']=beta_calc.groupby('ticker')['adjclose'].pct_change()

In [78]:
df_temp=beta_calc[['date','ticker','ticker_r','sp500_r']].set_index('date')

In [79]:
df_cov = df_temp.groupby('ticker').rolling(21).cov().unstack()['ticker_r']['sp500_r']

In [80]:
beta_calc=pd.merge(beta_calc
                   ,df_cov
                   ,how='left'
                   ,on=['date','ticker'])

In [81]:
beta_calc['beta']=beta_calc['sp500_r_y']/beta_calc['sp500_var']

In [82]:
beta_calc['beta_30avg']=beta_calc['beta'].rolling(30,min_periods=2).mean()

In [83]:
merge2=pd.merge(df_earningdate
                   , beta_calc[['ticker','date','beta_30avg','adjclose']]
                   , how='left'
                   , left_on=['Ticker','Earning Date']
                   , right_on=['ticker','date'])

merge2.drop(columns=['date','ticker'],inplace=True)

In [84]:
sp_500_fundamental=pd.merge(merge2
                   , df_fundamental
                   , how='left'
                   , on=['Ticker','Publish Date']
                   )

In [85]:
sp_500_fundamental['EPSsuprise_ind']=sp_500_fundamental['epssurprisepct']>0

sp_500_fundamental['Revenue_qoq']=sp_500_fundamental.groupby('Ticker')['Revenue'].pct_change()
sp_500_fundamental['Revenue_yoy']=sp_500_fundamental.groupby('Ticker')['Revenue'].pct_change(4)
sp_500_fundamental['Net_Income_qoq']=sp_500_fundamental.groupby('Ticker')['Net Income'].pct_change()
sp_500_fundamental['Net_Income_yoy']=sp_500_fundamental.groupby('Ticker')['Net Income'].pct_change(4)

sp_500_fundamental['Gross_margin']=sp_500_fundamental['Gross Profit']/sp_500_fundamental['Revenue']
sp_500_fundamental['Gross_margin_qoq']=sp_500_fundamental.groupby('Ticker')['Gross_margin'].diff()
sp_500_fundamental['Gross_margin_yoy']=sp_500_fundamental.groupby('Ticker')['Gross_margin'].diff(4)
sp_500_fundamental['Operating_margin']=sp_500_fundamental['Operating Income (Loss)']/sp_500_fundamental['Revenue']
sp_500_fundamental['Operating_margin_qoq']=sp_500_fundamental.groupby('Ticker')['Operating_margin'].diff()
sp_500_fundamental['Operating_margin_yoy']=sp_500_fundamental.groupby('Ticker')['Operating_margin'].diff(4)
sp_500_fundamental['Net_margin']=sp_500_fundamental['Net Income']/sp_500_fundamental['Revenue']
sp_500_fundamental['Net_margin_qoq']=sp_500_fundamental.groupby('Ticker')['Net_margin'].diff()
sp_500_fundamental['Net_margin_yoy']=sp_500_fundamental.groupby('Ticker')['Net_margin'].diff(4)
sp_500_fundamental['Cash_yoy']=sp_500_fundamental.groupby('Ticker')['Cash, Cash Equivalents & Short Term Investments'].pct_change(4)
sp_500_fundamental['Assets_yoy']=sp_500_fundamental.groupby('Ticker')['Total Assets'].pct_change(4)
sp_500_fundamental['Leverage']=sp_500_fundamental['Total Assets']/sp_500_fundamental['Total Liabilities']
sp_500_fundamental['CFO_qoq']=sp_500_fundamental.groupby('Ticker')['Net Cash from Operating Activities'].pct_change()
sp_500_fundamental['FCF_qoq']=sp_500_fundamental.groupby('Ticker')['Net Change in Cash'].pct_change()
sp_500_fundamental[10:20]

sp_500_fundamental.drop(columns=['Publish Date', 'epsestimate', 'epsactual','epssurprisepct', 'Report Date','Gross Profit'
                                 , 'Operating Income (Loss)','Total Liabilities', 'Total Equity','Net Cash from Investing Activities', 
       'Net Cash from Financing Activities', 'Next_publish_date'],inplace=True)
sp_500_fundamental.columns

Index(['Ticker', 'Earning Date', 'beta_30avg', 'adjclose', 'Fiscal Year',
       'Fiscal Period', 'Revenue', 'Net Income', 'Shares (Basic)',
       'Cash, Cash Equivalents & Short Term Investments', 'Total Assets',
       'Net Cash from Operating Activities',
       'Net Change in Long Term Investment', 'Dividends Paid',
       'Net Change in Cash', 'EPSsuprise_ind', 'Revenue_qoq', 'Revenue_yoy',
       'Net_Income_qoq', 'Net_Income_yoy', 'Gross_margin', 'Gross_margin_qoq',
       'Gross_margin_yoy', 'Operating_margin', 'Operating_margin_qoq',
       'Operating_margin_yoy', 'Net_margin', 'Net_margin_qoq',
       'Net_margin_yoy', 'Cash_yoy', 'Assets_yoy', 'Leverage', 'CFO_qoq',
       'FCF_qoq'],
      dtype='object')

In [86]:
sp_500_fundamental.rename(columns={
    'Net Income':'Net_income',
    'Shares (Basic)':'Shares',
    'Cash, Cash Equivalents & Short Term Investments':'Cash',
    'Net Cash from Operating Activities':'CFO',
    'Total Assets':'Assets',
    'Net Change in Long Term Investment':'CAPX',
    'Dividends Paid':'Dividends',
    'Net Change in Cash':'FCF'
     },inplace=True)

In [87]:
sp_500_fundamental.columns

Index(['Ticker', 'Earning Date', 'beta_30avg', 'adjclose', 'Fiscal Year',
       'Fiscal Period', 'Revenue', 'Net_income', 'Shares', 'Cash', 'Assets',
       'CFO', 'CAPX', 'Dividends', 'FCF', 'EPSsuprise_ind', 'Revenue_qoq',
       'Revenue_yoy', 'Net_Income_qoq', 'Net_Income_yoy', 'Gross_margin',
       'Gross_margin_qoq', 'Gross_margin_yoy', 'Operating_margin',
       'Operating_margin_qoq', 'Operating_margin_yoy', 'Net_margin',
       'Net_margin_qoq', 'Net_margin_yoy', 'Cash_yoy', 'Assets_yoy',
       'Leverage', 'CFO_qoq', 'FCF_qoq'],
      dtype='object')

In [88]:
#valuationmetrics
sp_500_fundamental['PE_new']=sp_500_fundamental['adjclose']/(sp_500_fundamental['Net_income']/sp_500_fundamental['Shares'])
sp_500_fundamental['PS_new']=sp_500_fundamental['adjclose']/(sp_500_fundamental['Revenue']/sp_500_fundamental['Shares'])
sp_500_fundamental['PB_new']=sp_500_fundamental['adjclose']/(sp_500_fundamental['Assets']/sp_500_fundamental['Shares'])
sp_500_fundamental['Dividend_yields_new']=(sp_500_fundamental['Dividends']/sp_500_fundamental['Shares'])/sp_500_fundamental['adjclose']
sp_500_fundamental['Market_cap_mm']=sp_500_fundamental['Shares']*sp_500_fundamental['adjclose']/1000000

In [89]:
sp_500_fundamental.shape

(7889, 39)

In [90]:
sp_500_fundamental.to_csv("data/fundamental.csv",index=False)