## Data Ingestion.

In [12]:
## if packages aren't installed yet, run the following line
# !pip3 install matplotlib seaborn pandas numpy pandas_datareader requests_cache

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline
## For reading stock data from yahoo
from pandas_datareader.data import DataReader
## the following package is used with the datareader to prevent overburdening the site.
import requests_cache
## For time stamps
from datetime import datetime, timedelta, date

In [13]:
## The tech stocks we'll use for this analysis
stock_list = ['GSIT', 'ICAD', 'XAIR', 'LTRN', 'ARKK', 'ARKF', 'ARKW']
## Set up End and Start times for data grab
end = datetime.now()
start = datetime(end.year - 1, end.month, end.day)

## set up cache
expire_after = timedelta(days=1)
session = requests_cache.CachedSession(cache_name='cache', backend='sqlite', expire_after=expire_after)

## For loop for grabing yahoo finance data and setting as a dataframe
for stock in stock_list:   
    ## Set DataFrame as the Stock Ticker
    globals()[stock] = DataReader(stock, 'yahoo', start, end, session=session)
## Quick note: Using globals() is a sloppy way of setting the DataFrame names, but its simple

## these are from the globals() variables
company_list = [GSIT, ICAD, XAIR, LTRN, ARKK, ARKF, ARKW]

for company, stock_name in zip(company_list, stock_list):
    company["company_name"] = stock_name

## adds all the dataframes into one larger
stocks_df = pd.concat(company_list, axis=0)
## shows a random selection of rows
stocks_df.sample(n=10) 

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,company_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-09-01,10.85,10.06,10.74,10.3,191000.0,10.3,ICAD
2020-12-18,50.679001,50.150002,50.679001,50.41,1780400.0,50.220947,ARKF
2020-02-27,11.18,10.18,11.11,10.91,401200.0,10.91,ICAD
2021-01-26,147.339996,141.720001,147.25,141.830002,8106400.0,141.830002,ARKK
2020-08-04,6.6,5.84,5.86,6.52,106000.0,6.52,GSIT
2021-02-16,18.362101,17.379999,18.030001,17.82,40036.0,17.82,ICAD
2020-07-27,82.129997,79.320999,79.800003,81.970001,2314000.0,80.679192,ARKK
2020-07-14,33.799999,32.68,33.459999,33.77,347600.0,33.643353,ARKF
2020-12-29,5.28,5.11,5.19,5.18,175600.0,5.18,XAIR
2020-04-01,9.6,7.85,7.85,9.42,882000.0,9.42,XAIR


In [14]:
stocks_df.to_pickle('./data/stocks_df_{}.pickle'.format(date.today()))