In [23]:
def get_sp100():
    '''
    Scrapes wikipedia page to get tickers for the S&P100 companies. Returns list.
    '''
    import requests 
    import pandas as pd
    from bs4 import BeautifulSoup as bs
    url  = 'https://en.wikipedia.org/wiki/S%26P_100'
    r = requests.get(url)
    soup = bs(r.text, 'lxml')
    table_soup = soup.find("table",{"class":"wikitable sortable"})
    tickers = []

    row_soup = table_soup.find_all('tr')

    for row in row_soup[1:]:      
        td_soup = row.find_all('td')
        tickers.append(td_soup[0].text.replace('\n', '').replace('BRK.B','BRK-B'))
    return tickers

In [26]:
ticker = get_sp100()

In [47]:
def get_yahoo_data(start_date,end_date,tickers):
    '''
    Uses Yahoo financials to get pricing info on each company. Creates columns for 1, 2, 3, 5, 10 daily returns. Returns dataframe.
    '''
    from yahoofinancials import YahooFinancials
    import pandas as pd
    dftot = pd.DataFrame(columns=['formatted_date','high','low','adjclose','volume','1daily_return','2daily_return','3daily_return','5daily_return','10daily_return'])
    for i in tickers:
        try:
            yahoo_financials = YahooFinancials(i)
            historical_stock_prices = yahoo_financials.get_historical_price_data(start_date, end_date, 'daily')
            df = pd.DataFrame(historical_stock_prices[i]['prices'])
            df1 = df[['formatted_date','high','low','adjclose','volume']].copy()
            df1['1daily_return'] = (df1.adjclose.shift(-1) - df1.adjclose)/df1.adjclose
            df1['2daily_return'] = (df1.adjclose.shift(-2) -df1.adjclose)/df1.adjclose
            df1['3daily_return'] = (df1.adjclose.shift(-3) - df1.adjclose)/df1.adjclose
            df1['5daily_return'] = (df1.adjclose.shift(-5) - df1.adjclose)/df1.adjclose
            df1['10daily_return'] = (df1.adjclose.shift(-10) - df1.adjclose)/df1.adjclose
            df1['ticker'] = i
            dftot = dftot.append(df1)
        except:
            print(f'{i} has no data for these dates or there is an error')
    print('Complete!')
    return dftot
    

In [48]:
df_returns = get_yahoo_data('2000-01-01', '2020-08-01', tickers)
df_returns.to_csv(r'C:\Users\EmilyThomson\OneDrive - Kubrick Group\Python\EDGAR\stock_returns_daily_test.csv', index = False)

Complete!
