## Data Ingestion.

In [4]:
## if packages aren't installed yet, run the following line
# !pip install matplotlib seaborn pandas numpy pandas_datareader requests_cache

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline
## For reading stock data from yahoo
from pandas_datareader.data import DataReader
## the following package is used with the datareader to prevent overburdening the site.
import requests_cache
## For time stamps
from datetime import datetime, timedelta, date

In [7]:
## The tech stocks we'll use for this analysis
stock_list = ['GSIT', 'ICAD', 'XAIR', 'LTRN', 'ARKK', 'ARKF', 'ARKW']
## Set up End and Start times for data grab
end = datetime.now()
start = datetime(end.year - 5, end.month, end.day)

## set up cache
expire_after = timedelta(days=1)
session = requests_cache.CachedSession(cache_name='cache', backend='sqlite', expire_after=expire_after)

## For loop for grabing yahoo finance data and setting as a dataframe
for stock in stock_list:   
    ## Set DataFrame as the Stock Ticker
    globals()[stock] = DataReader(stock, 'yahoo', start, end, session=session)
## Quick note: Using globals() is a sloppy way of setting the DataFrame names, but its simple

## these are from the globals() variables
company_list = [GSIT, ICAD, XAIR, LTRN, ARKK, ARKF, ARKW]

for company, stock_name in zip(company_list, stock_list):
    company["company_name"] = stock_name

## adds all the dataframes into one larger
stocks_df = pd.concat(company_list, axis=0)
## shows a random selection of rows
stocks_df.sample(n=10) 
stocks_df.to_pickle('../data/stocks_df_{}.pickle'.format(date.today()))

In [6]:
for name in stock_list:
    print(name, len(stocks_df[stocks_df.company_name==name]['Adj Close'].values))

GSIT 1260
ICAD 1260
XAIR 678
LTRN 175
ARKK 1260
ARKF 516
ARKW 1260
