In [168]:
import numpy as np 
import pandas as pd
import os

from scipy import stats
from scipy.stats import norm

from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline

import pickle

import warnings
warnings.filterwarnings('ignore')

# custom general helper functions for this project
import custom_utils as cu
import importlib

In [169]:
# reload imports as needed
importlib.reload(cu);

In [170]:
# Customize matplotlib default settings
matplotlib.rcParams.update({'font.size': 16})
plt.rcParams["figure.figsize"] = (20,10)

In [171]:
# plotly
import plotly.plotly as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go

In [173]:
# Install Alpha Vantage to get stock data
# !pip install alpha_vantage

In [174]:
from alpha_vantage.timeseries import TimeSeries

## 1. Get stock data

In [175]:
# I've saved my Alpha Vantage API key in a separate file, so that I can just read it in here with my helper function
cu.list_api_keys()

['alpha_vantage']

In [176]:
av_api_key = cu.get_api_key("alpha_vantage")

In [44]:
# ts = TimeSeries(key='YOUR_API_KEY', output_format='pandas')
ts = TimeSeries(key=av_api_key, output_format='pandas')

In [45]:
# Get the daily stock data and the call's metadata
stock_data, stock_meta_data = ts.get_daily('GOOGL', outputsize='full')

In [46]:
stock_meta_data

{'1. Information': 'Daily Prices (open, high, low, close) and Volumes',
 '2. Symbol': 'GOOGL',
 '3. Last Refreshed': '2019-04-17',
 '4. Output Size': 'Full size',
 '5. Time Zone': 'US/Eastern'}

In [47]:
stock_data.head()

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-08-19,100.01,104.06,95.96,100.335,44659000.0
2004-08-20,101.01,109.08,100.5,108.31,22834300.0
2004-08-23,110.76,113.48,109.05,109.4,18256100.0
2004-08-24,111.24,111.6,103.57,104.87,15247300.0
2004-08-25,104.76,108.0,103.88,106.0,9188600.0


In [48]:
# rename columns so that they only have varname-acceptible characters in them
stock_data.columns = ["open", "high", "low", "close", "volume"]

In [49]:
# pickle the data 
myoutfile = "pickles/google_stock_data.pkl"
with open(myoutfile, 'wb') as picklefile:
     pickle.dump(stock_data, picklefile)

In [177]:
# unpickle
with open("pickles/google_stock_data.pkl", 'rb') as picklefile: 
    stock_data = pickle.load(picklefile)

stock_data.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-08-19,100.01,104.06,95.96,100.335,44659000.0
2004-08-20,101.01,109.08,100.5,108.31,22834300.0
2004-08-23,110.76,113.48,109.05,109.4,18256100.0
2004-08-24,111.24,111.6,103.57,104.87,15247300.0
2004-08-25,104.76,108.0,103.88,106.0,9188600.0


In [178]:
stock_data.tail()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-04-11,1208.9,1212.74,1204.54,1209.59,849947.0
2019-04-12,1215.62,1223.05,1213.29,1222.73,1215610.0
2019-04-15,1224.09,1229.3,1214.56,1226.53,1189974.0
2019-04-16,1230.0,1235.98,1225.04,1231.91,1131099.0
2019-04-17,1237.0,1245.1,1232.9,1240.14,1471225.0


## 2. Data exploration

In [179]:
stock_data.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-08-19,100.01,104.06,95.96,100.335,44659000.0
2004-08-20,101.01,109.08,100.5,108.31,22834300.0
2004-08-23,110.76,113.48,109.05,109.4,18256100.0
2004-08-24,111.24,111.6,103.57,104.87,15247300.0
2004-08-25,104.76,108.0,103.88,106.0,9188600.0


In [180]:
stock_data.describe()

Unnamed: 0,open,high,low,close,volume
count,3691.0,3691.0,3691.0,3691.0,3691.0
mean,635.167468,641.086776,628.712554,634.970676,7387645.0
std,259.306949,260.807566,257.59584,259.319778,8095545.0
min,99.09,101.74,95.96,100.01,38459.0
25%,469.795,474.105,464.68,469.975,2091950.0
50%,580.0,585.0,574.19,580.11,4645500.0
75%,794.975,799.955,788.355,793.485,9610600.0
max,1289.12,1291.44,1263.0,1285.5,82151100.0


In [181]:
print("Date range:", stock_data.index.min(), "to", stock_data.index.max())

Date range: 2004-08-19 to 2019-04-17


In [182]:
# datarow with the max closing price
stock_data[stock_data.close == stock_data.close.max()]

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-07-26,1267.18,1287.4,1263.0,1285.5,2734302.0


In [183]:
# datarow with the max volume
stock_data[stock_data.volume == stock_data.volume.max()]

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006-01-20,437.5,440.03,394.74,399.4601,82151100.0


In [184]:
trace_open = go.Scatter(
                    x = stock_data.index,
                    y = stock_data.open,
                    mode = "lines",
                    name = "open",
                    marker = dict(color = 'blue'),
                    text= stock_data.index)

trace_close = go.Scatter(
                    x = stock_data.index,
                    y = stock_data.close,
                    mode = "lines",
                    name = "close",
                    marker = dict(color = 'green'),
                    text= stock_data.index)

trace_high = go.Scatter(
                    x = stock_data.index,
                    y = stock_data.high,
                    mode = "lines",
                    name = "high",
                    marker = dict(color = 'red'),
                    text= stock_data.index)

trace_low = go.Scatter(
                    x = stock_data.index,
                    y = stock_data.low,
                    mode = "lines",
                    name = "low",
                    marker = dict(color = 'grey'),
                    text= stock_data.index)


layout = dict(title = 'GOOGL Stock Daily Prices',
              xaxis= dict(zeroline= False),
              yaxis= dict(title= 'Stock price'),
              autosize=False,
              width=980,
              height=600
             )
fig = dict(data = [trace_high, trace_low, trace_open, trace_close], layout = layout)
iplot(fig)

The chart above shows the daily open, close, high and low share prices for the Google stock.  
We can see lots of interesting patterns in the data. There is an overall long term growth trend, with some gradual and some sharp drops in price. The open, close, high and low parameters all vary relatively closely together.

In [185]:
trace_diff = go.Bar( x = stock_data.index,
                     y = (stock_data.high - stock_data.low),
                     base = stock_data.low,
                     marker = dict(
                       color = 'orange'
                     ),
                     name = 'range',
                     hovertext  = "range: " + (stock_data.high - stock_data.low).round(2).astype(str) 
                                + "<br>high: " + stock_data.high.astype(str) 
                                + "<br>low: " + stock_data.low.astype(str) 
                                + "<br>date: " + stock_data.index,
                     hoverinfo = 'text'
                   )



layout = dict(title = 'GOOGL Stock Daily Prices: Highs, Lows and Ranges',
              xaxis= dict(zeroline= False),
              yaxis= dict(title= 'Stock price'),
              autosize=False,
              width=980,
              height=600
             )


fig = dict(data = [trace_diff], layout = layout)
iplot(fig)

In [210]:
trace_diff2 = go.Scatter(
                    x = stock_data.index,
                    y = (stock_data.high - stock_data.low),
                    mode = "markers",
                    name = "range",
                    marker = dict(size = 3, color = 'blue'),
                    hovertext  = "range: " + (stock_data.high - stock_data.low).round(2).astype(str) 
                                + "<br>high: " + stock_data.high.astype(str) 
                                + "<br>low: " + stock_data.low.astype(str) 
                                + "<br>date: " + stock_data.index,
                    hoverinfo = 'text'
                    )


layout = dict(title = 'GOOGL Stock Daily Price Ranges (absolute)',
              xaxis= dict(zeroline= False),
              yaxis= dict(title= 'Daily stock price range'),
              autosize=False,
              width=980,
              height=600
             )


fig = dict(data = [trace_diff2], layout = layout)
iplot(fig)

In [209]:
trace_diff3 = go.Scatter(
                    x = stock_data.index,
                    y = (stock_data.high - stock_data.low)/stock_data.low,
                    mode = "markers",
                    name = "range",
                    marker = dict(size = 3, color = 'green'),
                    hovertext  = "range: " + (stock_data.high - stock_data.low).round(2).astype(str) 
                                + "<br>range / low: " 
                                    + ((stock_data.high - stock_data.low)/stock_data.low).round(2).astype(str) 
                                + "<br>high: " + stock_data.high.astype(str) 
                                + "<br>low: " + stock_data.low.astype(str) 
                                + "<br>date: " + stock_data.index,
                    hoverinfo = 'text'
                    )


layout = dict(title = 'GOOGL Stock Daily Price Ranges (relative)',
              xaxis= dict(zeroline= False),
              yaxis= dict(title= 'Daily stock price range / low'),
              autosize=False,
              width=980,
              height=600
             )


fig = dict(data = [trace_diff3], layout = layout)
iplot(fig)

In [208]:
trace_volume = go.Scatter(
                    x = stock_data.index,
                    y = stock_data.volume,
                    mode = "lines",
                    name = "volume",
                    marker = dict(color = 'blue'),
                    text= stock_data.index)


layout = dict(title = 'GOOGL Stock Daily Volumes',
              xaxis= dict(zeroline= False),
              yaxis= dict(title= 'Daily stock volume'),
              autosize=False,
              width=980,
              height=700
             )
fig = dict(data = [trace_volume], layout = layout)
iplot(fig)

In [244]:
trace_bubble = go.Scatter(
                            x = stock_data.index,
                            y = stock_data.close,
                            mode = "markers",
                            name = "volume",
                            marker = dict(size = stock_data.volume, 
                                          sizemode='area',
                                          sizeref=2.*max(stock_data.volume)/(30.**2),
                                          sizemin=2,
                                          color = 'black',
                                          opacity = 0.2,
                                          line = {"width": 0}),
                            hovertext  = "close: " + stock_data.close.astype(str) 
                                        + "<br>volume: " + (stock_data.volume/1000000).round(2).astype(str) + "M"
                                        + "<br>date: " + stock_data.index,
                            hoverinfo = 'text'
                        )


layout = dict(title = 'GOOGL Stock Daily Close Prices and Volumes' 
                    + '<br><br><span style="font-size: 16px; color: darkgrey">Bubble size = Daily volume</span>',
              xaxis= dict(zeroline= False),
              yaxis= dict(title= 'Close price'),
              autosize=False,
              width=980,
              height=700
             )
fig = dict(data = [trace_bubble], layout = layout)
iplot(fig)