# Educational Spendings Effect on Home Valuations Across the United States of America


In [1]:
# import libraries 

import pandas as pd
import numpy as np
import statsmodels.api as sm

# set float format
pd.options.display.float_format = '{:.2f}'.format

# Read datasets

gdpDeflator = pd.read_csv('data\FRED_ipd.csv')

homeValuation = pd.read_csv('data\ZillowHousingValues_state-by-state.csv')

educationSpending = pd.read_csv('data\education-spending_state-by-state.csv')

#### Format gdpDeflator to be used homeValuations and educationSpending. This table will eventually be used to merge and perform gdp deflation calculations.

In [2]:
# Rename columns
gdpDeflator = gdpDeflator.rename(columns={'DATE': 'year', 'USAGDPDEFAISMEI': 'ipd'})

# Set date to correct variable type
gdpDeflator['year'] = pd.to_datetime(gdpDeflator['year']) 

# remove all data that does not fit data date range
gdpDeflator = gdpDeflator[gdpDeflator['year'] >= '2000-01-01']
gdpDeflator = gdpDeflator[gdpDeflator['year'] <= '2016-01-01']

# reset index to accommodate new datarange
gdpDeflator = gdpDeflator.reset_index(drop=True)

#### Adjust Base year from 2015 to 2016

In [3]:
# set base year
base_year = '2016-01-01'

# calculate conversion factor
conversion_factor = gdpDeflator.loc[gdpDeflator['year'] == base_year, 'ipd'].values[0] / 100

# adjust to base year of 2016
gdpDeflator['ipd'] = gdpDeflator['ipd'] / conversion_factor

#### Format educationSpending to be merged with gdpDeflator (for inflation calculations)

In [4]:
# convert wide form dataframe into long form
educationSpending = pd.melt(educationSpending, id_vars=['state'], var_name='year', value_name='USD')

# change date variable type
educationSpending['year'] = pd.to_datetime(educationSpending['year'])

# sort by state, then year for easier indexing
educationSpending = educationSpending.sort_values(['state', 'year'])

# remove all data that does not fit data date range
educationSpending = educationSpending[educationSpending['year'] >= '2000-01-01']
educationSpending = educationSpending[educationSpending['year'] <= '2016-01-01']

# reset index after sort
educationSpending = educationSpending.reset_index(drop=True)

# rename columns
educationSpending = educationSpending.rename(columns={'USD': 'EducationSpend'})

#### Format homeValuation Dataframe

In [5]:
# drop unneccessary columns
collumnToDrop = ['RegionID', 'SizeRank', 'RegionType', 'StateName']
homeValuation = homeValuation.drop(collumnToDrop, axis=1)
# Convert wide form into long form dataframe
homeValuation = pd.melt(homeValuation, id_vars=['RegionName'], var_name='Date', value_name='HomeValue')

# convert ['Date'] datatype to DateTime
homeValuation['Date'] = pd.to_datetime(homeValuation['Date']) + pd.tseries.offsets.MonthBegin(-1)

# sort data by state and date
homeValuation = homeValuation.sort_values(['RegionName', 'Date'])

# Remove all data that does is not the start of a year
homeValuation = homeValuation[homeValuation['Date'].dt.month == 1]

# remove all data that does not fit data date range
homeValuation = homeValuation[homeValuation['Date'] <= '2016-01-01']
homeValuation = homeValuation[homeValuation['Date'] >= '2000-01-01']

# rename columns (allow for simple use of left join later)
homeValuation = homeValuation.rename(columns={'Date': 'year', 'RegionName': 'state'})

# reset NaN
homeValuation.dropna(inplace=True)

# reset index
homeValuation = homeValuation.reset_index(drop=True)

#### Left join educationSpending and gdpDeflator, to be used in gdp deflation calculations

In [6]:
# Left join
education_gdpDeflator = pd.merge(educationSpending, gdpDeflator, on='year', how='left')

# define variable for base year value
BaseYear = gdpDeflator.loc[16, 'ipd']

# Make new column for adjusted values
education_gdpDeflator['adjusted_EducationSpend'] = 0

# Adjust value calculations
education_gdpDeflator['adjusted_EducationSpend'] = (education_gdpDeflator['EducationSpend'] / education_gdpDeflator['ipd']) * BaseYear 

In [7]:
# Format final dataset
collumn_Drop = ['ipd', 'EducationSpend']
educationSpending_final = education_gdpDeflator.drop(collumn_Drop, axis=1)

educationSpending_final.head(20)

Unnamed: 0,state,year,adjusted_EducationSpend
0,Alabama,2000-01-01,5430164.89
1,Alabama,2001-01-01,5487005.84
2,Alabama,2002-01-01,5576575.72
3,Alabama,2003-01-01,5669021.11
4,Alabama,2004-01-01,5715963.43
5,Alabama,2005-01-01,5951621.55
6,Alabama,2006-01-01,6421525.28
7,Alabama,2007-01-01,6967551.44
8,Alabama,2008-01-01,7471373.4
9,Alabama,2009-01-01,7376747.43


In [8]:
# Left Join
homeValuation_gdpDeflator = pd.merge(homeValuation, gdpDeflator, on='year', how='left')

# Make new column for adjusted values
homeValuation_gdpDeflator['adjusted_homeValuation'] = 0

# Inflation value adjustment calculations
homeValuation_gdpDeflator['adjusted_homeValuation'] = (homeValuation_gdpDeflator['HomeValue'] / homeValuation_gdpDeflator['ipd']) * BaseYear 


In [12]:
# Format final dataset
collumn_Drop = ['ipd', 'HomeValue']
homeValuation_final = homeValuation_gdpDeflator.drop(collumn_Drop, axis=1)

homeValuation_final.head()

Unnamed: 0,state,year,adjusted_homeValuation
0,Alabama,2000-01-01,106425.87
1,Alabama,2001-01-01,108112.99
2,Alabama,2002-01-01,110015.34
3,Alabama,2003-01-01,110635.58
4,Alabama,2004-01-01,111159.3


In [13]:
OLS_modelmerge = pd.merge(educationSpending_final, homeValuation_final, on=['state', 'year'], how='left')
display(OLS_modelmerge)

Unnamed: 0,state,year,adjusted_EducationSpend,adjusted_homeValuation
0,Alabama,2000-01-01,5430164.89,106425.87
1,Alabama,2001-01-01,5487005.84,108112.99
2,Alabama,2002-01-01,5576575.72,110015.34
3,Alabama,2003-01-01,5669021.11,110635.58
4,Alabama,2004-01-01,5715963.43,111159.30
...,...,...,...,...
862,Wyoming,2012-01-01,1500243.15,194314.90
863,Wyoming,2013-01-01,1481604.84,202100.62
864,Wyoming,2014-01-01,1483239.65,202650.32
865,Wyoming,2015-01-01,1510709.98,208274.54


In [None]:
#possible intercept?

# define independent variables: market risk premium
X = OLS_modelmerge['adjusted_EducationSpend']

# define dependent variable
y = OLS_modelmerge['adjusted_homeValuation']