### Class Task: aggregation on MultiIndex
#### Objective:
  - Get daily stock prices for markers 'AAPL', 'QQQ', 'BBBY' for Q3 2018
  - Select only Closing Price and store each stock into a different variable as a DataFrame [['Close']]
  - Join / Merge the three DataFrames based on their Date
  - Ensure that the Data of the joined DataFrame is a column
  - Extract Year and Month from the Data column
  - Create MultiIndex for the DataFrame with Index at level0=Year and level1=Month
  - Perform a summary stats method such as mean over axis=0 and level [Year, Month] or [0, 1]

In [1]:
#Importing libraries needed for pulls from Google
from pandas_datareader import data
import pandas as pd
import numpy as np
import datetime
from datetime import date

In [2]:
# Objective: to Perform summary aggregation on groups using MultiIndex 
#Define the instruments to download. NASDAQ index
aapl_marker = 'AAPL'
qqq_marker = 'QQQ'
bbby_marker ='BBBY'

# Specify the start and end timestamp
start_date = datetime.datetime(2018, 7, 1)
end_date = datetime.datetime(2018, 9, 30)

In [3]:
# Install pandas_datareader with (pip install pandas_datareader or !pip install pandas_datareader)
# Use pandas_reader.data.DataReader to load the desired daily
#   data.DataReader(name, data_source, start, end)
#    name: the name of the stock. can be a scaler or list. For now we use a scalar marker for each
#    data_source= "google", "fred", "ff", "yahoo"

# get aapl stocks and store it to an instance. Store only the Closing value 
#  use [['Close']] to store it as a DataFrame rather than using ['Close'] that would retrive and store as Series
aapl = data.DataReader(name=aapl_marker, data_source='yahoo', start=start_date, end=end_date)[['Close']]
# get qqq stocks and store it to an instance
qqq = data.DataReader(name=qqq_marker, data_source='yahoo', start=start_date, end=end_date)[['Close']]
# get bbby stocks and store it to an instance
bbby = data.DataReader(name=bbby_marker, data_source='yahoo', start=start_date, end=end_date)[['Close']]

In [4]:
# display the shapes of each DataFrame. Verify that they have the same shape
print('aapl:', aapl.shape, 'qqq', qqq.shape, 'bbby', bbby.shape)

aapl: (63, 1) qqq (63, 1) bbby (63, 1)


In [5]:
# Visualize the datasets. Print their head(2)
print('aapl:\n', aapl.head(2), 'qqq:\n', qqq.head(2), 'bbby:\n', bbby.head(2))

aapl:
                 Close
Date                 
2018-07-02  46.794998
2018-07-03  45.980000 qqq:
                  Close
Date                  
2018-07-02  172.800003
2018-07-03  170.800003 bbby:
                 Close
Date                 
2018-07-02  19.330000
2018-07-03  20.030001


In [6]:
# Join the three instances and store them into variable stocks. Remember suffixes
stocks = aapl.join(qqq, lsuffix='_l', rsuffix='_r').join(bbby)
# Rename the columns as ['aapl', 'qqq', 'bbby']
stocks.columns = ['aapl', 'qqq', 'bbby']

In [7]:
# Display the top 5 records and view
stocks.head()

Unnamed: 0_level_0,aapl,qqq,bbby
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-07-02,46.794998,172.800003,19.33
2018-07-03,45.98,170.800003,20.030001
2018-07-05,46.349998,172.919998,20.809999
2018-07-06,46.9925,175.610001,21.07
2018-07-09,47.645,177.190002,21.23


In [8]:
# reset the stocks indices inplace as we want to later create a MultiIndex [Year, Month]
stocks.reset_index(inplace=True)

In [9]:
# Display the top 5 records and view
stocks.head()

Unnamed: 0,Date,aapl,qqq,bbby
0,2018-07-02,46.794998,172.800003,19.33
1,2018-07-03,45.98,170.800003,20.030001
2,2018-07-05,46.349998,172.919998,20.809999
3,2018-07-06,46.9925,175.610001,21.07
4,2018-07-09,47.645,177.190002,21.23


In [11]:
# Get the value for month and year from its current index Date. 
# Convert it to DateTime by calling the date instance method
# Store it to a new DateFrame stocksdt
stocksdt = stocks['Date'].map(lambda x: [x.date().year, x.date().month])
# Add a new column to stocks with value for only Year (index 0)
stocks['Year'] = stocksdt.map(lambda x: x[0])
# Add a new column to stocks with value for only Month (index 1)
stocks['Month'] = stocksdt.map(lambda x: x[1])

In [12]:
# Display the top 5 records and view
stocks.head()

Unnamed: 0,Date,aapl,qqq,bbby,Year,Month
0,2018-07-02,46.794998,172.800003,19.33,2018,7
1,2018-07-03,45.98,170.800003,20.030001,2018,7
2,2018-07-05,46.349998,172.919998,20.809999,2018,7
3,2018-07-06,46.9925,175.610001,21.07,2018,7
4,2018-07-09,47.645,177.190002,21.23,2018,7


In [13]:
# Drop column Date from stocks. Do it inplace
stocks.drop('Date', axis = 1, inplace=True)
# show top 5 records of stocks
stocks.head()

Unnamed: 0,aapl,qqq,bbby,Year,Month
0,46.794998,172.800003,19.33,2018,7
1,45.98,170.800003,20.030001,2018,7
2,46.349998,172.919998,20.809999,2018,7
3,46.9925,175.610001,21.07,2018,7
4,47.645,177.190002,21.23,2018,7


In [14]:
# Use sort_values(['Year', 'Month']) to sort the values in stocks. Do it inplace
stocks.sort_values(['Year', 'Month'], inplace=True)

# Use instance method set_index to set Year and Month as a MultiIndex for stocks. Do it inplace
stocks.set_index(['Year', 'Month'], inplace=True)

In [15]:
# show top 5 records of stocks
stocks.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,aapl,qqq,bbby
Year,Month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,7,46.794998,172.800003,19.33
2018,7,45.98,170.800003,20.030001
2018,7,46.349998,172.919998,20.809999
2018,7,46.9925,175.610001,21.07
2018,7,47.645,177.190002,21.23


In [16]:
# Find the Mean Closing price per (year, month) for each stock. use level=['Year','Month']
stocks.mean(axis=0, level=['Year','Month'])

  stocks.mean(axis=0, level=['Year','Month'])


Unnamed: 0_level_0,Unnamed: 1_level_0,aapl,qqq,bbby
Year,Month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,7,47.577857,177.68762,19.586667
2018,8,53.336522,181.650001,18.347391
2018,9,55.518421,183.546842,18.203684
