In [9]:
import pandas as pd
import yfinance as yf
import datetime as dt
import ssl
from pandas_datareader import data as pdr
yf.pdr_override()
ssl._create_default_https_context = ssl._create_unverified_context

In [10]:
def get_spy_list():
    # Get list of S&P stocks from wiki
    sp500url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    data_table = pd.read_html(sp500url)
    # Place list of stocks in a table
    snp_list = data_table[0]
    # remove unneeded columns
    snp_list = snp_list.drop(columns=["CIK","Founded","Headquarters Location"], inplace=False)
    # set index
    snp_list = snp_list.set_index("Symbol",inplace=False)
    # set convert add dates to datetime format
    snp_list['Date Added'] = pd.to_datetime(snp_list['Date added'], errors='coerce')
    snp_list = snp_list.drop(columns = ["Date added"])
    
    # Create list of added and removed stocks
    add_remove_df = data_table[1]
    # set the index
    add_remove_df = add_remove_df.set_index("Date",inplace=False)

    # Creat list of removed stocks
    remove_list = pd.DataFrame()
    remove_list["Symbol"] = add_remove_df["Removed"]["Ticker"]
    remove_list["Security"] = add_remove_df["Removed"]["Security"]

    # Convert list of dates into datetime format
    dates = add_remove_df["Removed"].index.values.tolist()
    def makethisdate(date):
        s = str(date)
        new = dt.datetime.strptime(s, "('%B %d, %Y',)")
        new = new.strftime('%Y-%m-%d')
        return new
    dates = list(map(makethisdate, dates))
    remove_list["Date Removed"] = dates
    remove_list["Date Removed"] = pd.to_datetime(remove_list["Date Removed"])
    # update the index to match that of the snp_list
    remove_list = remove_list.set_index("Symbol", inplace = False)
    remove_list = remove_list.dropna()

    # Create List of Added Stocks
    added_list = pd.DataFrame()
    added_list["Symbol"] = add_remove_df["Added"]["Ticker"]

    # Convert list of dates into datetime format
    dates = add_remove_df["Added"].index.values.tolist()
    def makethisdate(date):
        s = str(date)
        new = dt.datetime.strptime(s, "('%B %d, %Y',)")
        new = new.strftime('%Y-%m-%d')
        return new
    dates = list(map(makethisdate, dates))
    added_list["Date Added"] = dates
    added_list["Date Added"] = pd.to_datetime(added_list["Date Added"])
    # update the index to match that of the snp_list
    added_list = added_list.set_index("Symbol", inplace = False)
    added_list = added_list.dropna()

    # Update removed list with any that have been added and removed
    remove_list = remove_list.join(added_list)
    remove_list["Date Added"] = remove_list["Date Added"].fillna(dt.date.today() - dt.timedelta(days = 20*366))
    # remove symbols dropped prior to twenty years ago
    remove_list = remove_list[remove_list["Date Removed"]>(dt.datetime.now() - dt.timedelta(days = 20*365))]
    remove_list = remove_list.groupby("Symbol").max()

    # Add the two tables for a total list
    snp_list = snp_list["Date Added"].fillna(dt.date.today() - dt.timedelta(days = 20*366))
    snp_total_list = snp_list.append(remove_list)
    
    return snp_total_list



In [11]:
def getData(stocks, start, end):
    stockdata = pdr.get_data_yahoo(stocks, start=start, end=end)
    stockdata = stockdata['Adj Close']
    
    returns = stockdata.pct_change().dropna(how='all').dropna(axis=1)
    return_list = returns.columns.T.to_list()
    mean_returns = returns.mean()
    cov_matrix = returns.cov()
    return mean_returns, cov_matrix, return_list


In [12]:
stock_list = get_spy_list()
pull_list = stock_list.index.T.to_list()
end1 = dt.date.today()
start1 = end1 - dt.timedelta(days=365)
start = start1.strftime('%Y-%m-%d')
end = end1.strftime('%Y-%m-%d')

mean_returns, cov_matrix, return_list = getData(stocks=pull_list,start=start,end=end)

TypeError: to_append should be a Series or list/tuple of Series, got DataFrame