In [1]:
%matplotlib inline
import pandas as pd
import os
# Import the main functionality from the SimFin Python API.
import simfin as sf

# Import names used for easy access to SimFin's data-columns.
from simfin.names import *

import yahoo_fin.stock_info as si
# import pandas_datareader.data as web

from dateutil.relativedelta import relativedelta
from datetime import datetime

In [None]:
# sample base
tickers = si.tickers_sp500()

In [None]:
cwd = os.getcwd()
cwd

In [None]:
sf.set_data_dir('/Users/feiyiyang/Documents/Bootcamp/Team9-Project2/Bootcamp-Project2/simfin_data/')

In [None]:
sf.load_api_key(path='/Users/feiyiyang/Documents/Bootcamp/Team9-Project2/Bootcamp-Project2/simfin_data/simfin_api_key.txt')

### Part 1: Grab quarterly fundamentals from Simfin API

In [None]:
df_income = sf.load(dataset='income', variant='quarterly', market='us')
df_balance = sf.load(dataset='balance', variant='quarterly', market='us')
df_cashflow = sf.load(dataset='cashflow', variant='quarterly', market='us')

In [None]:
df_cashflow.columns

In [None]:
df_fundamentals = pd.merge(df_income[['Ticker','Fiscal Year','Fiscal Period','Report Date', 'Publish Date','Revenue','Gross Profit','Operating Income (Loss)','Net Income']], 
                           df_balance[['Ticker','Fiscal Year','Fiscal Period','Shares (Basic)','Cash, Cash Equivalents & Short Term Investments','Total Assets','Total Liabilities','Total Equity']],  
                           how='left', 
                           on=['Ticker','Fiscal Year','Fiscal Period'])

In [None]:
df_fundamentals= pd.merge(df_fundamentals,
                          df_cashflow[['Ticker','Fiscal Year','Fiscal Period','Net Cash from Operating Activities','Net Change in Long Term Investment','Net Cash from Investing Activities', 'Dividends Paid', 'Net Cash from Financing Activities', 'Net Change in Cash']],
                           how='left', 
                           on=['Ticker','Fiscal Year','Fiscal Period'])

In [None]:
df_fundamentals.tail()

In [None]:
# add a column for next earning date - to set the start/end date for append daily price
df_fundamentals['Next_publish_date']=df_fundamentals.groupby("Ticker")['Publish Date'].shift(-1)
u = (pd.to_datetime(df_fundamentals['Publish Date']) + pd.DateOffset(months=3)).dt.date
df_fundamentals.Next_publish_date.fillna(u,inplace=True)

In [None]:
df_fundamentals.to_csv('data/us_fundamental.csv',index=False)

### Part 2: Grab daily stock price from yahoo finance API (yahoo_fin.stock_info)

In [None]:
# load fundamental data
df_fundamentals=pd.read_csv('data/us_fundamental.csv')

In [None]:
# sample base and starting date for training
tickers_sp500 = list(si.tickers_sp500())
start_date = '2016-01-01'
end_date = '2021-12-31'

In [None]:
# append daily price data to fundamental where the date is within the current publish date and next publish date
frames=[]
for ticker in tickers_sp500:
    try:
        df1=df_fundamentals[df_fundamentals['Ticker']==ticker]
        df2=si.get_data(ticker , start_date = start_date, end_date=end_date,index_as_date=False)
    except:
        print(f"No data available for {ticker}")
    else:
        df_merge=pd.merge(df1,df2,how='left',left_on='Ticker',right_on='ticker')
        df_merge= df_merge[(df_merge.date > df_merge['Publish Date']) & (df_merge.date <= df_merge['Next_publish_date'])]
        frames.append(df_merge)
    

df_sp500 = pd.concat(frames)

In [None]:
# this is for testing if the merge was correct
#list(df_sp500[(df_sp500['Publish Date']== '2021-05-06') & (df_sp500['Ticker']== 'AES')].date)

In [None]:
df_sp500.to_csv('data/sp500_fundamental_dailyprice.csv',index=False)

### Part 3: Append Other Data 

#### 1.Industry/Sector

In [2]:
# load fundamental data
df_sp500=pd.read_csv('data/sp500_fundamental_dailyprice.csv')


In [3]:
df_industry_sector=pd.DataFrame(list(df_sp500.Ticker.unique()),columns=['Ticker'])

In [15]:
test=df_industry_sector.iloc[:5]

In [23]:
df2 = {'Ticker': 'KK'}
test = test.append(df2, ignore_index = True)
test

Unnamed: 0,Ticker
0,A
1,AAL
2,AAP
3,AAPL
4,ABBV
5,TTWO
6,KK


In [None]:
sector_df= pd.DataFrame(columns=['Ticker', 'Sector', 'Industry'])

for ticker in df_industry_sector.Ticker.unique():
    try:
        sector_df=sector_df.append({
            'Ticker':ticker,
            'Sector':si.get_company_info(ticker).loc['sector'].Value,
            'Industry':si.get_company_info(ticker).loc['industry'].Value
        }, ignore_index=True)
        # print(ticker)
    except:
        print(f"No industry data available for {ticker}")


In [28]:
sector_df.tail()

Unnamed: 0,Ticker,Sector,Industry
408,XYL,Industrials,Specialty Industrial Machinery
409,YUM,Consumer Cyclical,Restaurants
410,ZBH,Healthcare,Medical Devices
411,ZBRA,Technology,Communication Equipment
412,ZTS,Healthcare,Drug Manufacturers—Specialty & Generic


In [30]:
sector_df.to_csv('data/industry_sector.csv',index=False)

In [31]:
df_sp500=pd.merge(df_sp500,sector_df,how='left', on='Ticker')

In [33]:
len(df_industry_sector.Ticker.unique())

413

#### 2.Earning Surprise

In [53]:
dftest=pd.DataFrame.from_dict(si.get_earnings_history("AAPL"))
dftest=dftest[['ticker','startdatetime','epsestimate','epsactual','epssurprisepct']]
dftest.startdatetime.date()

AttributeError: 'Series' object has no attribute 'date'

In [51]:
dftest['startdatetime']=dftest['startdatetime'].dt.strftime()

AttributeError: Can only use .dt accessor with datetimelike values

In [None]:
dftest