In [1]:
# Import Libraries
import pandas as pd 
import numpy as np 
import yfinance as yf
from datetime import datetime, timedelta

### Stock Price Data

In [2]:
# Get S&P500 symbols ranked by market cap 
sp500_by_marketcap = pd.read_csv('SP500_By_MarketCap.csv')
sp500_by_marketcap.head()

Unnamed: 0,Symbol,Name,Industry,Market Cap
0,AAPL,Apple Inc.,Electronic Technology,"$ 2,986,128,347,290.24"
1,MSFT,Microsoft Corporation,Technology Services,"$ 2,513,296,516,647.36"
2,GOOG,Alphabet Inc.,Technology Services,"$ 1,927,101,773,229.48"
3,GOOGL,Alphabet Inc.,Technology Services,"$ 1,923,705,624,039.54"
4,AMZN,"Amazon.com, Inc.",Retail Trade,"$ 1,728,404,755,739.39"


In [3]:
# Get tickers in a list
tickers = sp500_by_marketcap['Symbol'].head(50).tolist()
print(tickers)

['AAPL', 'MSFT', 'GOOG', 'GOOGL', 'AMZN', 'TSLA', 'FB', 'NVDA', 'BRK-B', 'JPM', 'UNH', 'V', 'JNJ', 'HD', 'WMT', 'PG', 'BAC', 'MA', 'PFE', 'DIS', 'AVGO', 'XOM', 'ACN', 'CSCO', 'NFLX', 'NKE', 'LLY', 'KO', 'TMO', 'CRM', 'COST', 'ABT', 'ABBV', 'PEP', 'ORCL', 'CMCSA', 'CVX', 'PYPL', 'DHR', 'VZ', 'INTC', 'QCOM', 'WFC', 'MCD', 'MRK', 'UPS', 'T', 'AMD', 'NEE', 'MS']


In [4]:
# Set timeframe 
delta = 365
end = datetime.now()
start = datetime.now() - timedelta(delta)

In [5]:
# Get stock data from yfinance 
df_stock_data=pd.DataFrame()
for ticker in tickers:
    ticker_df = yf.download(ticker, start=start, end=end,interval="1D")
    ticker_df = ticker_df.reset_index()
    ticker_df["Stock"] = ticker
    # Creating a unique key by combining Ticker and Publish date
    ticker_df["unique_key"] = ticker_df["Stock"]+ticker_df["Date"].astype(str)
    df_stock_data =pd.concat([df_stock_data,ticker_df],axis=0)
df_stock_data = df_stock_data.set_index("unique_key")
df_stock_data.head()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Unnamed: 0_level_0,Date,Open,High,Low,Close,Adj Close,Volume,Stock
unique_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AAPL2021-04-15,2021-04-15,133.820007,135.0,133.639999,134.5,133.705322,89347100,AAPL
AAPL2021-04-16,2021-04-16,134.300003,134.669998,133.279999,134.160004,133.36734,84922400,AAPL
AAPL2021-04-19,2021-04-19,133.509995,135.470001,133.339996,134.839996,134.04332,94264200,AAPL
AAPL2021-04-20,2021-04-20,135.020004,135.529999,131.809998,133.110001,132.323532,94812300,AAPL
AAPL2021-04-21,2021-04-21,132.360001,133.75,131.300003,133.5,132.711212,68847100,AAPL


In [6]:
df_stock_data.to_csv('stock_price_data.csv')