In [1]:
#import all necessary libraries and tools
import numpy as np
import pandas as pd
import os
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import alpaca_trade_api as tradeapi
%matplotlib inline
import pandas_datareader.data as web

## Pull in Data for Analysis Purposes
* S&P500 Index
* REIT Residential Stocks from Nasdaq
* Historical Single Family Home Price Data by State from Zillow

In [2]:
#set time frame for data from a year by day
timeframe = '1D'
end_date = datetime.now()
start_date = end_date + timedelta(-365)

In [3]:
#get s&p 500 index information as a whole - for use in beta calculations
sp500 = web.DataReader(['sp500'], 'fred', start_date, end_date)
sp500.head()

Unnamed: 0_level_0,sp500
DATE,Unnamed: 1_level_1
2019-05-02,2917.52
2019-05-03,2945.64
2019-05-06,2932.47
2019-05-07,2884.05
2019-05-08,2879.42


In [4]:
#calculate percent change and clean data for s&p 500 index
sp500_returns = sp500.pct_change()
sp500_returns.dropna(inplace = True)
sp500_returns.head()

Unnamed: 0_level_0,sp500
DATE,Unnamed: 1_level_1
2019-05-03,0.009638
2019-05-06,-0.004471
2019-05-07,-0.016512
2019-05-08,-0.001605
2019-05-09,-0.003021


In [5]:
#set up data frame reading data from the REIT-residential stocks
screened_stocks_df = pd.read_csv('./Data/REIT_stocks.csv')
screened_stocks_tickers = screened_stocks_df['Ticker']
tickers= screened_stocks_tickers.tolist()
tickers.pop(2) #clean out 2 stocks with incomplete data

'AIII'

In [6]:
#pull closing price data for REIT-residential stocks
read_stocks = web.DataReader(tickers, 'yahoo',start_date,end_date)
REIT_stocks = read_stocks['Close']
REIT_stocks.head()

Symbols,ACC,AHH,AIV,AMH,APTS,AVB,BOWFF,BRG,BRT,CPT,...,NXRT,OPI,RESI,RPT,SNR,SRC,SUI,UDFI,UDR,UMH
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-05-02,47.150002,15.41,49.060001,24.01,15.74,201.440002,28.870001,11.1,14.0,99.980003,...,37.52,26.299999,9.94,12.54,5.59,42.400002,123.839996,4.91,44.259998,13.89
2019-05-03,47.310001,16.030001,50.049999,24.280001,16.059999,202.160004,29.01,11.29,14.05,100.830002,...,38.93,28.41,10.29,12.78,6.09,41.700001,123.709999,5.0,44.470001,13.95
2019-05-06,47.110001,16.42,49.759998,24.16,16.1,200.570007,29.01,11.4,14.19,100.889999,...,39.110001,28.030001,10.07,12.84,6.11,42.150002,123.919998,5.0,44.369999,13.65
2019-05-07,46.25,16.16,49.080002,23.66,15.92,196.789993,29.01,11.11,14.18,99.07,...,38.060001,26.889999,10.03,12.45,6.01,41.509998,121.839996,4.85,43.549999,13.15
2019-05-08,45.91,16.1,49.110001,23.530001,15.93,196.449997,29.01,11.04,14.22,98.75,...,39.970001,26.809999,11.08,12.52,6.12,41.540001,121.150002,4.92,43.599998,13.22


In [7]:
#calculate percent change and clean data for REIT-residential stocks
stocks_returns = REIT_stocks.pct_change()
stocks_returns.dropna(inplace =True)
stocks_returns.head()

Symbols,ACC,AHH,AIV,AMH,APTS,AVB,BOWFF,BRG,BRT,CPT,...,NXRT,OPI,RESI,RPT,SNR,SRC,SUI,UDFI,UDR,UMH
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-11-13,0.004533,0.012048,0.015524,0.014792,0.00232,0.012441,0.011032,0.006873,-0.009217,0.01262,...,0.017457,0.005979,-0.019964,0.007607,0.003995,0.006475,0.01662,-0.04,0.014034,-0.01369
2019-11-14,0.005801,0.007937,0.003356,0.008439,0.029321,0.004868,0.0,0.0,0.005233,0.006593,...,0.007626,0.001564,0.002778,0.00755,0.002653,0.013671,0.004008,0.03125,0.007549,-0.014541
2019-11-15,0.005554,0.006187,0.007432,0.008368,0.02024,0.008983,0.0578,0.005119,-0.01793,0.007447,...,0.009249,0.0,-0.000923,-0.007493,0.025132,0.006942,0.010137,-0.006734,0.008117,-0.004024
2019-11-18,0.008286,-0.010062,0.007008,-0.001886,0.005878,0.003449,0.016448,-0.008489,-0.004711,0.002672,...,0.001666,0.009994,0.001848,0.002745,0.015484,0.005318,0.012733,-0.00339,0.005161,-0.006734
2019-11-19,-0.000421,0.002823,0.001648,-0.002645,-0.005113,0.008315,-0.014262,0.0,0.015976,0.001776,...,-0.006446,-0.005257,0.0,0.001369,0.010165,0.002743,0.013688,0.020408,0.003902,0.010169


In [8]:
#set up data frame reading data from Single Family Home Prices by State
housing_prices_df = pd.read_csv('./Data/State_Zhvi_SingleFamilyResidence.csv')
housing_prices_df.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,1996-01-31,1996-02-29,1996-03-31,1996-04-30,1996-05-31,...,2019-06-30,2019-07-31,2019-08-31,2019-09-30,2019-10-31,2019-11-30,2019-12-31,2020-01-31,2020-02-29,2020-03-31
0,9,0,California,State,CA,164133.0,163906.0,163766.0,163493.0,163318.0,...,562386.0,563398.0,564896.0,566911.0,569019.0,571142.0,574142.0,577601.0,581867.0,586271.0
1,54,1,Texas,State,TX,98043.0,98068.0,98099.0,98226.0,98368.0,...,207244.0,207904.0,208526.0,209208.0,209696.0,210191.0,210559.0,211003.0,211434.0,212043.0
2,43,2,New York,State,NY,119646.0,119373.0,119211.0,118966.0,118867.0,...,266732.0,267217.0,267654.0,268098.0,268639.0,269259.0,269990.0,270660.0,271355.0,271984.0
3,14,3,Florida,State,FL,103902.0,104008.0,104130.0,104360.0,104581.0,...,255766.0,256449.0,257111.0,257807.0,258590.0,259429.0,260224.0,261244.0,262536.0,263965.0
4,21,4,Illinois,State,IL,139641.0,139388.0,139067.0,138688.0,138168.0,...,203835.0,204142.0,204371.0,204487.0,204550.0,204609.0,204767.0,205009.0,205222.0,205522.0


In [9]:
#clean up housing price data to get time as an index and state name as a column
#drop regionID, size, region, name, region type
housing_prices_df.drop(['RegionID','SizeRank','RegionName','RegionType'],axis=1,inplace=True)

In [16]:
housing_prices_df.set_index(['StateName'],inplace=True)
housing_prices_df.head()

Unnamed: 0_level_0,1996-01-31,1996-02-29,1996-03-31,1996-04-30,1996-05-31,1996-06-30,1996-07-31,1996-08-31,1996-09-30,1996-10-31,...,2019-06-30,2019-07-31,2019-08-31,2019-09-30,2019-10-31,2019-11-30,2019-12-31,2020-01-31,2020-02-29,2020-03-31
StateName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CA,164133.0,163906.0,163766.0,163493.0,163318.0,163183.0,163107.0,163113.0,163162.0,163339.0,...,562386.0,563398.0,564896.0,566911.0,569019.0,571142.0,574142.0,577601.0,581867.0,586271.0
TX,98043.0,98068.0,98099.0,98226.0,98368.0,98485.0,98577.0,98692.0,98843.0,99019.0,...,207244.0,207904.0,208526.0,209208.0,209696.0,210191.0,210559.0,211003.0,211434.0,212043.0
NY,119646.0,119373.0,119211.0,118966.0,118867.0,118801.0,118722.0,118645.0,118602.0,118608.0,...,266732.0,267217.0,267654.0,268098.0,268639.0,269259.0,269990.0,270660.0,271355.0,271984.0
FL,103902.0,104008.0,104130.0,104360.0,104581.0,104823.0,105055.0,105262.0,105408.0,105562.0,...,255766.0,256449.0,257111.0,257807.0,258590.0,259429.0,260224.0,261244.0,262536.0,263965.0
IL,139641.0,139388.0,139067.0,138688.0,138168.0,137858.0,137428.0,137883.0,138584.0,140046.0,...,203835.0,204142.0,204371.0,204487.0,204550.0,204609.0,204767.0,205009.0,205222.0,205522.0


In [20]:
#switch time to rows and state name to column
housing_prices_df=housing_prices_df.T
housing_prices_df.head()

StateName,CA,TX,NY,FL,IL,PA,OH,MI,GA,NC,...,NH,RI,MT,DE,SD,AK,ND,VT,DC,WY
1996-01-31,164133.0,98043.0,119646.0,103902.0,139641.0,96738.0,92249.0,88554.0,103052.0,110441.0,...,110481.0,128809.0,,132770.0,84567.0,128799.0,,109627.0,195151.0,
1996-02-29,163906.0,98068.0,119373.0,104008.0,139388.0,96676.0,92362.0,88873.0,103209.0,110656.0,...,110217.0,128916.0,,132713.0,84961.0,129299.0,,109607.0,194150.0,
1996-03-31,163766.0,98099.0,119211.0,104130.0,139067.0,96615.0,92511.0,89176.0,103370.0,110849.0,...,110037.0,128919.0,,132655.0,84923.0,129737.0,,109517.0,193289.0,
1996-04-30,163493.0,98226.0,118966.0,104360.0,138688.0,96516.0,92820.0,89828.0,103716.0,111248.0,...,109767.0,128938.0,,132515.0,84980.0,130648.0,,109372.0,191849.0,
1996-05-31,163318.0,98368.0,118867.0,104581.0,138168.0,96423.0,93141.0,90471.0,104064.0,111641.0,...,109818.0,128778.0,,132638.0,84773.0,131314.0,,109188.0,190734.0,


In [25]:
#calculate percent change and clean data for REIT-residential stocks
housing_returns=housing_prices_df.pct_change()
housing_returns.dropna(inplace=True)
housing_returns.head()

StateName,CA,TX,NY,FL,IL,PA,OH,MI,GA,NC,...,NH,RI,MT,DE,SD,AK,ND,VT,DC,WY
2005-02-28,0.016454,0.003097,0.008752,0.01816,0.006443,0.00774,0.002938,0.002118,0.002479,0.003287,...,0.008523,0.010545,0.014091,0.009775,0.005448,0.008223,-0.001852,0.009794,0.014319,0.006661
2005-03-31,0.015761,0.001918,0.008298,0.018799,0.006283,0.007899,0.002929,0.002381,0.002635,0.00302,...,0.008178,0.008658,0.011732,0.008117,0.006043,0.008493,-0.000474,0.009524,0.016828,0.00543
2005-04-30,0.016462,0.001358,0.008793,0.019779,0.006852,0.008139,0.002584,0.002329,0.003195,0.003896,...,0.008424,0.008561,0.007645,0.009804,0.00671,0.007753,0.001134,0.01177,0.020393,0.003668
2005-05-31,0.014236,0.001958,0.008544,0.020817,0.006768,0.008744,0.002234,0.002723,0.003818,0.00377,...,0.006217,0.008021,0.005876,0.013297,0.006034,0.008281,0.005211,0.009192,0.014016,0.005034
2005-06-30,0.013029,0.002995,0.008292,0.02225,0.006983,0.009049,0.002283,0.001945,0.004106,0.004563,...,0.005259,0.007168,0.006527,0.009281,0.005347,0.006092,0.006673,0.008423,0.014435,0.004593


## Statistical Analysis Functions
* calculate percent change (done above in data clean up and retrieval)
* calculate variance/covariance and beta
* calculate 30 day rolling average
