In [1]:
def get_sp100():
    '''
    Scrapes wikipedia page to get tickers for the S&P100 companies. Returns list.
    '''
    import requests 
    import pandas as pd
    from bs4 import BeautifulSoup as bs
    url  = 'https://en.wikipedia.org/wiki/S%26P_100'
    r = requests.get(url)
    soup = bs(r.text, 'lxml')
    table_soup = soup.find("table",{"class":"wikitable sortable"})
    tickers = []

    row_soup = table_soup.find_all('tr')

    for row in row_soup[1:]:      
        td_soup = row.find_all('td')
        tickers.append(td_soup[0].text.replace('\n', '').replace('BRK.B','BRK-B'))
    return tickers

In [2]:
print(get_sp100())

['AAPL', 'ABBV', 'ABT', 'ACN', 'ADBE', 'AIG', 'AMGN', 'AMT', 'AMZN', 'AVGO', 'AXP', 'BA', 'BAC', 'BIIB', 'BK', 'BKNG', 'BLK', 'BMY', 'BRK-B', 'C', 'CAT', 'CHTR', 'CL', 'CMCSA', 'COF', 'COP', 'COST', 'CRM', 'CSCO', 'CVS', 'CVX', 'DD', 'DHR', 'DIS', 'DOW', 'DUK', 'EMR', 'EXC', 'F', 'FB', 'FDX', 'GD', 'GE', 'GILD', 'GM', 'GOOG', 'GOOGL', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM', 'KHC', 'KO', 'LIN', 'LLY', 'LMT', 'LOW', 'MA', 'MCD', 'MDLZ', 'MDT', 'MET', 'MMM', 'MO', 'MRK', 'MS', 'MSFT', 'NEE', 'NFLX', 'NKE', 'NVDA', 'ORCL', 'PEP', 'PFE', 'PG', 'PM', 'PYPL', 'QCOM', 'RTX', 'SBUX', 'SO', 'SPG', 'T', 'TGT', 'TMO', 'TMUS', 'TSLA', 'TXN', 'UNH', 'UNP', 'UPS', 'USB', 'V', 'VZ', 'WBA', 'WFC', 'WMT', 'XOM']


In [3]:
def get_yahoo_data(start_date,end_date,tickers):
    '''
    Uses Yahoo financials to get pricing info on each company. Creates columns for 1, 2, 3, 5, 10 daily returns. Returns dataframe.
    '''
    from yahoofinancials import YahooFinancials
    import pandas as pd
    dftot = pd.DataFrame(columns=['formatted_date','high','low','adjclose','volume','1daily_return','2daily_return','3daily_return','5daily_return','10daily_return'])
    for i in tickers:
        try:
            yahoo_financials = YahooFinancials(i)
            historical_stock_prices = yahoo_financials.get_historical_price_data(start_date, end_date, 'daily')
            df = pd.DataFrame(historical_stock_prices[i]['prices'])
            df1 = df[['formatted_date','high','low','adjclose','volume']]
            df1['1daily_return'] = (df1.adjclose.shift(-1) - df1.adjclose)/df1.adjclose
            df1['2daily_return'] = (df1.adjclose.shift(-2) -df1.adjclose)/df1.adjclose
            df1['3daily_return'] = (df1.adjclose.shift(-3) - df1.adjclose)/df1.adjclose
            df1['5daily_return'] = (df1.adjclose.shift(-5) - df1.adjclose)/df1.adjclose
            df1['10daily_return'] = (df1.adjclose.shift(-10) - df1.adjclose)/df1.adjclose
            df1['ticker'] = i
            dftot = dftot.append(df1)
        except:
            print(f'{i} has no data for these dates or there is an error')

    return dftot
    

In [4]:
get_yahoo_data('2020-01-01','2020-01-30',get_sp100())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['1daily_return'] = (df1.adjclose.shift(-1) - df1.adjclose)/df1.adjclose
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['2daily_return'] = (df1.adjclose.shift(-2) -df1.adjclose)/df1.adjclose


Unnamed: 0,formatted_date,high,low,adjclose,volume,1daily_return,2daily_return,3daily_return,5daily_return,10daily_return,ticker
0,2020-01-02,75.150002,73.797501,74.207466,135480400,-0.009722,-0.001831,-0.006526,0.030897,0.049575,AAPL
1,2020-01-03,75.144997,74.125000,73.486023,146322800,0.007968,0.003228,0.019366,0.043372,0.071614,AAPL
2,2020-01-06,74.989998,73.187500,74.071579,118387200,-0.004703,0.011308,0.032788,0.057238,0.055937,AAPL
3,2020-01-07,75.224998,74.370003,73.723213,108872000,0.016086,0.037669,0.040015,0.047890,0.064714,AAPL
4,2020-01-08,76.110001,74.290001,74.909149,132079200,0.021241,0.023550,0.045417,0.026881,0.052904,AAPL
...,...,...,...,...,...,...,...,...,...,...,...
14,2020-01-23,66.930000,66.050003,59.926407,19605900,-0.006739,-0.030403,-0.031751,,,XOM
15,2020-01-24,66.680000,66.040001,59.522533,18238600,-0.023824,-0.025181,-0.033323,,,XOM
16,2020-01-27,65.730003,64.699997,58.104473,21231000,-0.001390,-0.009731,,,,XOM
17,2020-01-28,65.199997,64.629997,58.023705,19664700,-0.008353,,,,,XOM


In [5]:
import pandas as pd
df = pd.read_csv('Words.csv')
display(df)
'''
Didn't finish this. Iterate through the table to make dictionary of each type of word (negative : debt)
'''

Unnamed: 0,Word,Sequence Number,Word Count,Word Proportion,Average Proportion,Std Dev,Doc Count,Negative,Positive,Uncertainty,Litigious,Constraining,Superfluous,Interesting,Modal,Irr_Verb,Harvard_IV,Syllables,Source
0,AARDVARK,1,277,1.480000e-08,1.240000e-08,3.560000e-06,84,0,0,0,0,0,0,0,0,0,0,2,12of12inf
1,AARDVARKS,2,3,1.600000e-10,9.730000e-12,9.860000e-09,1,0,0,0,0,0,0,0,0,0,0,2,12of12inf
2,ABACI,3,8,4.280000e-10,1.390000e-10,6.230000e-08,7,0,0,0,0,0,0,0,0,0,0,3,12of12inf
3,ABACK,4,12,6.410000e-10,3.160000e-10,9.380000e-08,12,0,0,0,0,0,0,0,0,0,0,2,12of12inf
4,ABACUS,5,7250,3.870000e-07,3.680000e-07,3.370000e-05,914,0,0,0,0,0,0,0,0,0,0,3,12of12inf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86481,ZYGOTE,86482,46,2.460000e-09,9.690000e-10,2.060000e-07,31,0,0,0,0,0,0,0,0,0,0,2,12of12inf
86482,ZYGOTES,86483,1,5.340000e-11,2.040000e-11,2.070000e-08,1,0,0,0,0,0,0,0,0,0,0,2,12of12inf
86483,ZYGOTIC,86484,0,0.000000e+00,0.000000e+00,0.000000e+00,0,0,0,0,0,0,0,0,0,0,0,3,12of12inf
86484,ZYMURGIES,86485,0,0.000000e+00,0.000000e+00,0.000000e+00,0,0,0,0,0,0,0,0,0,0,0,3,12of12inf
