# Educational Spendings Effect on Home Valuations Across the United States of America


In [9]:
# import libraries 

import pandas as pd
import numpy as np
import statsmodels.api as sm

# Read datasets

gdpDeflator = pd.read_csv('data\gdp_deflator_FRED.csv')

homeValuation = pd.read_csv('data\ZillowHousingValues_state-by-state.csv')

educationSpending = pd.read_csv('data\education-spending_state-by-state.csv')
homeValuation.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,2000-01-31,2000-02-29,2000-03-31,2000-04-30,2000-05-31,...,2022-06-30,2022-07-31,2022-08-31,2022-09-30,2022-10-31,2022-11-30,2022-12-31,2023-01-31,2023-02-28,2023-03-31
0,9,0,California,state,,186276.110907,186903.75446,187750.027242,189586.300199,191719.031375,...,770917.504409,771314.829198,766685.736923,759238.756987,752637.613746,747513.149009,742356.811334,735996.325796,730505.205856,728133.501198
1,54,1,Texas,state,,105214.359426,105270.23952,105296.592,105431.576095,105518.343516,...,296589.890038,299379.177131,300228.906412,299532.992183,298589.121983,297623.444249,296121.316278,294864.993311,294073.120181,294336.348545
2,14,2,Florida,state,,103672.977445,103897.883804,104170.164927,104723.376752,105319.556053,...,380194.359106,385446.114848,388077.606462,388365.446497,387892.617981,387242.033534,385828.827185,384293.206747,383029.022145,383063.088395
3,43,3,New York,state,,123754.505069,124195.869047,124620.230729,125532.440125,126474.471071,...,409537.856474,412732.587736,413850.46872,412716.522689,411295.15767,410125.738516,408268.08773,407584.883808,408242.022283,411304.031953
4,47,4,Pennsylvania,state,,89306.93252,89498.07941,89677.686799,90043.628933,90419.511725,...,240680.874131,241661.576521,241588.251605,240795.850242,240652.061602,240982.617796,241276.57684,241860.227074,242563.191621,243858.898093


 Format gdpDeflator to be used homeValuations and educationSpending. This table will eventually be used to merge and perform gdp deflation calculations.

In [10]:
# Set date to correct variable type
gdpDeflator['DATE'] = pd.to_datetime(gdpDeflator['DATE']) 

# Rename columns
gdpDeflator = gdpDeflator.rename(columns={'A191RI1A225NBEA': 'GDP'})

# Filter gdpDeflator, removing values not included in the educationSpending dataframe
gdpDeflator = gdpDeflator[gdpDeflator['DATE'] >= '1997-01-01']
gdpDeflator = gdpDeflator[gdpDeflator['DATE'] <= '2016-01-01']

# reset index to accommodate new datarange
gdpDeflator = gdpDeflator.reset_index(drop=True)
gdpDeflator.head()


Unnamed: 0,DATE,GDP
0,1997-01-01,1.7
1,1998-01-01,1.1
2,1999-01-01,1.4
3,2000-01-01,2.3
4,2001-01-01,2.3


Format educationSpending to be merged with gdpDeflator (for inflation calculations)

In [11]:
# convert wide form dataframe into long form
educationSpending = pd.melt(educationSpending, id_vars=['state'], var_name='year', value_name='USD')

# change date variable type
educationSpending['year'] = pd.to_datetime(educationSpending['year'])

# sort by state, then year for easier indexing
educationSpending = educationSpending.sort_values(['state', 'year'])

# reset index after sort
educationSpending = educationSpending.reset_index(drop=True)

educationSpending.head()

Unnamed: 0,state,year,USD
0,Alabama,1997-01-01,3271969
1,Alabama,1998-01-01,3504764
2,Alabama,1999-01-01,3784422
3,Alabama,2000-01-01,4006894
4,Alabama,2001-01-01,4140053


Format homeValuation Dataframe

In [12]:
# drop unneccessary collumns
collumnToDrop = ['RegionID', 'SizeRank', 'RegionType', 'StateName']
homeValuation = homeValuation.drop(collumnToDrop, axis=1)

# Convert wide form into long form dataframe
homeValuation = pd.melt(homeValuation, id_vars=['RegionName'], var_name='Date', value_name='HomeValue')

# convert ['Date'] datatype to DateTime
homeValuation['Date'] = pd.to_datetime(homeValuation['Date']) + pd.tseries.offsets.MonthBegin(0)

# sort data by state and date
homeValuation = homeValuation.sort_values(['RegionName', 'Date'])

# Remove all data that does is not the start of a year
homeValuation = homeValuation[homeValuation['Date'].dt.month == 1]

# reset index
homeValuation = homeValuation.reset_index(drop=True)
homeValuation.head(50)

Unnamed: 0,RegionName,Date,HomeValue
0,Alabama,2001-01-01,81173.266499
1,Alabama,2002-01-01,84187.071396
2,Alabama,2003-01-01,86256.311972
3,Alabama,2004-01-01,88977.110393
4,Alabama,2005-01-01,93046.280821
5,Alabama,2006-01-01,98015.482865
6,Alabama,2007-01-01,105972.413138
7,Alabama,2008-01-01,108259.562541
8,Alabama,2009-01-01,106019.16571
9,Alabama,2010-01-01,101072.374147
