In [25]:
%matplotlib inline
import pandas as pd
import numpy as np
import os
# Import the main functionality from the SimFin Python API.
import simfin as sf

# Import names used for easy access to SimFin's data-columns.
from simfin.names import *

import yfinance as yf
import yahoo_fin.stock_info as si
# import pandas_datareader.data as web

from dateutil.relativedelta import relativedelta
from datetime import datetime

In [80]:
fdmt=pd.read_csv('data/fundamental.csv')
seg=pd.read_csv('data/industry_sector.csv') 
tech=pd.read_csv('data/techind.csv') 
tech2=pd.read_csv('data/techind_model2.csv')
mkts=pd.read_csv('data/fin_mkts.csv') 
senti=pd.read_csv('data/mean_news_sentiment.csv') 
target=pd.read_csv('data/target.csv') 

## Create dataset for Model 1 - "Before-Earning-Call Betting"
### Predicting likelihood of positive return without knowing the newest information

In [81]:
df_model1=pd.merge(fdmt, seg
                   , how='left'
                   , left_on=['Ticker']
                   , right_on=['Ticker'])

df_model1['Next_EarningDate']=df_model1.groupby('Ticker')['Earning Date'].shift(-1)

df_model1.drop(columns=[
    'PE_new',
    'PS_new',
    'PB_new',
    'Dividend_yields_new',
    'Market_cap_mm'
],inplace=True)

df_model1=pd.merge(df_model1, tech
                   , how='left'
                   , left_on=['Ticker','Next_EarningDate']
                   , right_on=['Ticker','Earning Date'])

df_model1=pd.merge(df_model1, mkts
                   , how='left'
                   , left_on=['Ticker','Next_EarningDate']
                   , right_on=['Ticker','Earning Date'])

df_model1=pd.merge(df_model1, senti
                   , how='left'
                   , left_on=['Ticker','Next_EarningDate']
                   , right_on=['ticker','release_date'])

df_model1.drop(columns=['ticker','release_date'],inplace=True)

In [82]:
df_model1=pd.merge(df_model1, target
                   , how='left'
                   , left_on=['Ticker','Next_EarningDate']
                   , right_on=['Ticker','Earning Date'])

  after removing the cwd from sys.path.


In [83]:
df_model1.drop(columns=['T1','T1_5','open','overnight_jump','Earning Date_x','Earning Date_y'],inplace=True)

In [84]:
df_model1.shape

(8211, 79)

In [85]:
#valuationmetrics
df_model1['PE_new']=df_model1['T0']/(df_model1['Net_income']/df_model1['Shares'])
df_model1['PS_new']=df_model1['T0']/(df_model1['Revenue']/df_model1['Shares'])
df_model1['PB_new']=df_model1['T0']/(df_model1['Assets']/df_model1['Shares'])
df_model1['Dividend_yields_new']=(df_model1['Dividends']/df_model1['Shares'])/df_model1['T0']
df_model1['Market_cap_mm']=df_model1['Shares']*df_model1['T0']/1000000

In [86]:
df_model1.columns

Index(['Ticker', 'beta_30avg', 'adjclose', 'Fiscal Year', 'Fiscal Period',
       'Revenue', 'Net_income', 'Shares', 'Cash', 'Assets', 'CFO', 'CAPX',
       'Dividends', 'FCF', 'EPSsuprise_ind', 'Revenue_qoq', 'Revenue_yoy',
       'Net_Income_qoq', 'Net_Income_yoy', 'Gross_margin', 'Gross_margin_qoq',
       'Gross_margin_yoy', 'Operating_margin', 'Operating_margin_qoq',
       'Operating_margin_yoy', 'Net_margin', 'Net_margin_qoq',
       'Net_margin_yoy', 'Cash_yoy', 'Assets_yoy', 'Leverage', 'CFO_qoq',
       'FCF_qoq', 'Sector', 'Industry', 'Next_EarningDate', 'RSI', 'MFI',
       'adjclose_SMA_12', 'adjclose_SMA_30', 'adjclose_EWMA_12',
       'adjclose_EWMA_30', 'adjclose_UpperBand', 'adjclose_LowerBand',
       'SMA_12_SMA_30', 'EWMA_12_EWMA_30', 'UpperBand_LowerBand', '^GSPC',
       '^GSPC_pctchange_5', '^GSPC_pctchange_21', '^GSPC_pctchange_yoy',
       '^RUT', '^RUT_pctchange_5', '^RUT_pctchange_21', '^RUT_pctchange_yoy',
       '^VIX', '^VIX_pctchange_5', '^VIX_pctchange_2

In [87]:
df_model1.to_csv("data/model_1.csv",index=False)

## Create dataset for Model 2 - "After-Earning-Call Betting"
### Predicting likelihood of positive return after earning release and price action taking place (acting on T1)

In [70]:
df_model2=pd.merge(fdmt, seg
                   , how='left'
                   , left_on=['Ticker']
                   , right_on=['Ticker'])

In [71]:
df_model2=pd.merge(df_model2, tech2
                   , how='left'
                   , left_on=['Ticker','Earning Date']
                   , right_on=['Ticker','Earning Date'])

In [72]:
df_model2=pd.merge(df_model2, mkts
                   , how='left'
                   , left_on=['Ticker','Earning Date']
                   , right_on=['Ticker','Earning Date'])

In [73]:
df_model2=pd.merge(df_model2, senti
                   , how='left'
                   , left_on=['Ticker','Earning Date']
                   , right_on=['ticker','release_date'])

df_model2.drop(columns=['ticker','release_date'],inplace=True)

In [74]:
df_model2=pd.merge(df_model2, target
                   , how='left'
                   , left_on=['Ticker','Earning Date']
                   , right_on=['Ticker','Earning Date'])

In [75]:
df_model2.drop(columns=['T0','open','T0_5'],inplace=True)

In [76]:
df_model2.shape

(8225, 88)

In [77]:
df_model2.columns

Index(['Ticker', 'Earning Date', 'beta_30avg', 'adjclose', 'Fiscal Year',
       'Fiscal Period', 'Revenue', 'Net_income', 'Shares', 'Cash', 'Assets',
       'CFO', 'CAPX', 'Dividends', 'FCF', 'EPSsuprise_ind', 'Revenue_qoq',
       'Revenue_yoy', 'Net_Income_qoq', 'Net_Income_yoy', 'Gross_margin',
       'Gross_margin_qoq', 'Gross_margin_yoy', 'Operating_margin',
       'Operating_margin_qoq', 'Operating_margin_yoy', 'Net_margin',
       'Net_margin_qoq', 'Net_margin_yoy', 'Cash_yoy', 'Assets_yoy',
       'Leverage', 'CFO_qoq', 'FCF_qoq', 'PE_new', 'PS_new', 'PB_new',
       'Dividend_yields_new', 'Market_cap_mm', 'Sector', 'Industry', 'date',
       'RSI', 'MFI', 'adjclose_SMA_12', 'adjclose_SMA_30', 'adjclose_EWMA_12',
       'adjclose_EWMA_30', 'adjclose_UpperBand', 'adjclose_LowerBand',
       'SMA_12_SMA_30', 'EWMA_12_EWMA_30', 'UpperBand_LowerBand', 'nextday',
       'lastday', '^GSPC', '^GSPC_pctchange_5', '^GSPC_pctchange_21',
       '^GSPC_pctchange_yoy', '^RUT', '^RUT_pctcha

In [78]:
df_model2.to_csv("data/model_2.csv",index=False)