## Data Ingestion.

In [9]:
## if packages aren't installed yet, run the following line
# !pip install matplotlib seaborn pandas numpy pandas_datareader requests_cache

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline
## For reading stock data from yahoo
from pandas_datareader.data import DataReader
## the following package is used with the datareader to prevent overburdening the site.
import requests_cache
## For time stamps
from datetime import datetime, timedelta, date

In [11]:
## The tech stocks we'll use for this analysis
stock_list = ['GSIT', 'ICAD', 'XAIR', 'LTRN', 'ARKK', 'ARKF', 'ARKW']
## Set up End and Start times for data grab
end = datetime.now()
start = datetime(end.year - 5, end.month, end.day)

## set up cache
expire_after = timedelta(days=1)
session = requests_cache.CachedSession(cache_name='cache', backend='sqlite', expire_after=expire_after)

## For loop for grabing yahoo finance data and setting as a dataframe
for stock in stock_list:   
    ## Set DataFrame as the Stock Ticker
    globals()[stock] = DataReader(stock, 'yahoo', start, end, session=session)
## Quick note: Using globals() is a sloppy way of setting the DataFrame names, but its simple

## these are from the globals() variables
company_list = [GSIT, ICAD, XAIR, LTRN, ARKK, ARKF, ARKW]

for company, stock_name in zip(company_list, stock_list):
    company["company_name"] = stock_name

## adds all the dataframes into one larger
stocks_df = pd.concat(company_list, axis=0)
## shows a random selection of rows
stocks_df.sample(n=10) 

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,company_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-06-19,7.76,7.61,7.76,7.66,45300.0,7.66,GSIT
2018-05-30,7.68,7.37,7.47,7.62,57800.0,7.62,GSIT
2016-03-31,19.23,19.129999,19.143999,19.129999,3000.0,17.943487,ARKK
2016-11-15,20.594999,20.594999,20.594999,20.594999,500.0,19.317619,ARKK
2019-04-04,48.459999,47.509998,48.380001,48.130001,366100.0,47.197308,ARKK
2019-03-19,8.42,7.92,7.97,8.22,47400.0,8.22,GSIT
2016-06-07,5.98,5.6,5.71,5.67,7800.0,5.67,ICAD
2020-05-15,11.0,10.5,10.84,10.68,267800.0,10.68,ICAD
2016-11-14,5.1,5.05,5.1,5.09,12200.0,5.09,GSIT
2020-04-20,8.38,7.56,8.03,7.9,442900.0,7.9,XAIR


In [16]:
for name in stock_list:
    print(name, len(stocks_df[stocks_df.company_name==name]['Adj Close'].values))

GSIT 1260
ICAD 1260
XAIR 677
LTRN 174
ARKK 1260
ARKF 515
ARKW 1260
