# Import Test Data

In [1]:
import pandas as pd

df = pd.read_csv('kc_house_data_test_features.csv')
df.drop('Unnamed: 0', axis = 1, inplace = True)
print(df.shape)
df.head()

(4323, 20)


Unnamed: 0,id,date,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,1974300020,20140827T000000,4,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,98034,47.7089,-122.241,2020,10918
1,1974300020,20150218T000000,4,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,98034,47.7089,-122.241,2020,10918
2,3630020380,20141107T000000,3,2.5,1470,1779,2.0,0,0,3,8,1160,310,2005,0,98029,47.5472,-121.998,1470,1576
3,1771000290,20141203T000000,3,1.75,1280,16200,1.0,0,0,3,8,1030,250,1976,0,98077,47.7427,-122.071,1160,10565
4,5126310470,20150115T000000,4,2.75,2830,8126,2.0,0,0,3,8,2830,0,2005,0,98059,47.4863,-122.14,2830,7916


# Apply all Changes from Train Data onto Test Data

But will not remove outlier in sqft_living as we did for the training data--want to make sure to provide a prediction for every test house.

### Data Cleaning

In [2]:
#cap sqft_above values at 7K sqft
import numpy as np

df['sqft_above'] = np.where(df['sqft_above'] > 7000, 7000, df['sqft_above'])


In [3]:
#Clean up any houses with bedrooms over 20
df['bedrooms'] = np.where(df['bedrooms'] > 20, 3, df['bedrooms'])


### Feature Engineering

In [6]:
#e^grade
df['grade_exp'] = np.exp(df['grade'])


In [7]:
#e^bathroom
df['bath_exp'] = np.exp(df['bathrooms'])


In [8]:
#whethere or not there is a basement
df['basement'] = np.where(df['sqft_basement'] > 0, 1, 0)


In [9]:
#sqft of outdoor space
df['sqft_outdoor'] = df['sqft_lot'] - (df['sqft_living']/df['floors'])

In [10]:
#clean sqft_outdoor--cap at 600K
df['sqft_outdoor'] = np.where(df['sqft_outdoor'] > 600000, 600000, df['sqft_outdoor'])

In [11]:
#distance to amazon headquarters
def haversine_distance(lat1, lon1, lat2, lon2):
   r = 6371
   phi1 = np.radians(lat1)
   phi2 = np.radians(lat2)
   delta_phi = np.radians(lat2 - lat1)
   delta_lambda = np.radians(lon2 - lon1)
   a = np.sin(delta_phi / 2)**2 + np.cos(phi1) * np.cos(phi2) *   np.sin(delta_lambda / 2)**2
   res = r * (2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)))
   return np.round(res, 2)

amazon = (47.623583541669845, -122.33669143795257)

dist_amazon = []
for row in df.itertuples(index=False):
    dist_amazon.append(haversine_distance(row.lat, row.long, amazon[0], amazon[1]))

df['dist_amazon'] = dist_amazon

In [12]:
#distance to microsoft headquarters

microsoft = (47.685228949452586, -122.09372667339042)

dist_microsoft = []
for row in df.itertuples(index=False):
    dist_microsoft.append(haversine_distance(row.lat, row.long, microsoft[0], microsoft[1]))

df['dist_microsoft'] = dist_microsoft

In [13]:
#clean up dist_microsoft--cap at 60
df['dist_microsoft'] = np.where(df['dist_microsoft'] > 60, 60, df['dist_microsoft'])


In [15]:
#feature for Seattle neighborhood (or 'None' if not in Seattle)
df['seattle_neighborhood'] = 'None'
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98133|98177, 'Northwest', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98101|98104|98111|98114|98121|98129|98154|98161|98164|98174|98181|98184|98191, 'Downtown', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98102|98112, 'Capitol Hill', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98103|98103, 'Lake Union', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98105|98115|98145|98185|98195, 'Northeast', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98106|98106|98126 , 'Delridge', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98107|98117, 'Ballard', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98108|98124|98134, 'Duwamish', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98109|98119|98199, 'Queen Anne/Magnolia', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98116|98136|98146, 'Southwest', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98118|98144, 'Southeast', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98122, 'Central', df['seattle_neighborhood'])
df['seattle_neighborhood'] = np.where(df['zipcode'] == 98125, 'North', df['seattle_neighborhood'])



In [16]:
#month sold
df['date'] = pd.to_datetime(df['date'])
df['month_sold'] = df['date'].dt.month

In [17]:
#season sold
#Season sold
df['season_sold'] = 'None'
df['season_sold'] = np.where(df['month_sold'] == 9, 'Fall', df['season_sold'])
df['season_sold'] = np.where(df['month_sold'] == 10, 'Fall', df['season_sold'])
df['season_sold'] = np.where(df['month_sold'] == 11, 'Fall', df['season_sold'])
df['season_sold'] = np.where(df['month_sold'] == 12, 'Winter', df['season_sold'])
df['season_sold'] = np.where(df['month_sold'] == 1, 'Winter', df['season_sold'])
df['season_sold'] = np.where(df['month_sold'] == 2, 'Winter', df['season_sold'])
df['season_sold'] = np.where(df['month_sold'] == 3, 'Winter', df['season_sold'])
df['season_sold'] = np.where(df['month_sold'] == 4, 'Spring', df['season_sold'])
df['season_sold'] = np.where(df['month_sold'] == 5, 'Spring', df['season_sold'])
df['season_sold'] = np.where(df['month_sold'] == 6, 'Summer', df['season_sold'])
df['season_sold'] = np.where(df['month_sold'] == 7, 'Summer', df['season_sold'])
df['season_sold'] = np.where(df['month_sold'] == 8, 'Summer', df['season_sold'])


In [18]:
#remove month_sold columns to avoid multicolinearity with seasons
df.drop(['month_sold'], axis=1, inplace = True)

### Create Dummies

In [19]:
df = pd.get_dummies(df, columns=['seattle_neighborhood', 'bedrooms', 'zipcode', 'season_sold'], drop_first=True)
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 110)

#check new columns
df.head()

Unnamed: 0,id,date,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,lat,long,sqft_living15,sqft_lot15,grade_exp,bath_exp,basement,sqft_outdoor,dist_amazon,dist_microsoft,seattle_neighborhood_Central,seattle_neighborhood_Lake Union,seattle_neighborhood_None,seattle_neighborhood_North,bedrooms_1,bedrooms_2,bedrooms_3,bedrooms_4,bedrooms_5,bedrooms_6,bedrooms_7,bedrooms_8,bedrooms_9,bedrooms_10,zipcode_98002,zipcode_98003,zipcode_98004,zipcode_98005,zipcode_98006,zipcode_98007,zipcode_98008,zipcode_98010,zipcode_98011,zipcode_98014,zipcode_98019,zipcode_98022,...,zipcode_98038,zipcode_98039,zipcode_98040,zipcode_98042,zipcode_98045,zipcode_98052,zipcode_98053,zipcode_98055,zipcode_98056,zipcode_98058,zipcode_98059,zipcode_98065,zipcode_98070,zipcode_98072,zipcode_98074,zipcode_98075,zipcode_98077,zipcode_98092,zipcode_98102,zipcode_98103,zipcode_98105,zipcode_98106,zipcode_98107,zipcode_98108,zipcode_98109,zipcode_98112,zipcode_98115,zipcode_98116,zipcode_98117,zipcode_98118,zipcode_98119,zipcode_98122,zipcode_98125,zipcode_98126,zipcode_98133,zipcode_98136,zipcode_98144,zipcode_98146,zipcode_98148,zipcode_98155,zipcode_98166,zipcode_98168,zipcode_98177,zipcode_98178,zipcode_98188,zipcode_98198,zipcode_98199,season_sold_Spring,season_sold_Summer,season_sold_Winter
0,1974300020,2014-08-27,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,47.7089,-122.241,2020,10918,2980.957987,12.182494,1,9230.0,11.89,11.33,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
1,1974300020,2015-02-18,2.5,2270,11500,1.0,0,0,3,8,1540,730,1967,0,47.7089,-122.241,2020,10918,2980.957987,12.182494,1,9230.0,11.89,11.33,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,3630020380,2014-11-07,2.5,1470,1779,2.0,0,0,3,8,1160,310,2005,0,47.5472,-121.998,1470,1576,2980.957987,12.182494,1,1044.0,26.78,16.94,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1771000290,2014-12-03,1.75,1280,16200,1.0,0,0,3,8,1030,250,1976,0,47.7427,-122.071,1160,10565,2980.957987,5.754603,1,14920.0,23.9,6.61,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,5126310470,2015-01-15,2.75,2830,8126,2.0,0,0,3,8,2830,0,2005,0,47.4863,-122.14,2830,7916,2980.957987,15.642632,0,6711.0,21.23,22.39,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


### Create Polynomial Features

In [24]:
features = ['sqft_living','waterfront','sqft_living15',
 'basement',
 'sqft_outdoor',
 'dist_amazon',
 'dist_microsoft',
 'grade_exp',
 'bath_exp',
 'season_sold_Spring',
 'season_sold_Summer',
 'season_sold_Winter',
 'seattle_neighborhood_Central',
 'seattle_neighborhood_Lake Union',
 'seattle_neighborhood_None',
 'seattle_neighborhood_North',
 'bedrooms_1',
 'bedrooms_2',
 'bedrooms_3',
 'bedrooms_4',
 'bedrooms_5',
 'bedrooms_6',
 'bedrooms_7',
 'bedrooms_8',
 'bedrooms_9',
 'bedrooms_10',
 'zipcode_98002',
 'zipcode_98003',
 'zipcode_98004',
 'zipcode_98005',
 'zipcode_98006',
 'zipcode_98007',
 'zipcode_98008',
 'zipcode_98010',
 'zipcode_98011',
 'zipcode_98014',
 'zipcode_98019',
 'zipcode_98022',
 'zipcode_98023',
 'zipcode_98024',
 'zipcode_98027',
 'zipcode_98028',
 'zipcode_98029',
 'zipcode_98030',
 'zipcode_98031',
 'zipcode_98032',
 'zipcode_98033',
 'zipcode_98034',
 'zipcode_98038',
 'zipcode_98039',
 'zipcode_98040',
 'zipcode_98042',
 'zipcode_98045',
 'zipcode_98052',
 'zipcode_98053',
 'zipcode_98055',
 'zipcode_98056',
 'zipcode_98058',
 'zipcode_98059',
 'zipcode_98065',
 'zipcode_98070',
 'zipcode_98072',
 'zipcode_98074',
 'zipcode_98075',
 'zipcode_98077',
 'zipcode_98092',
 'zipcode_98102',
 'zipcode_98103',
 'zipcode_98105',
 'zipcode_98106',
 'zipcode_98107',
 'zipcode_98108',
 'zipcode_98109',
 'zipcode_98112',
 'zipcode_98115',
 'zipcode_98116',
 'zipcode_98117',
 'zipcode_98118',
 'zipcode_98119',
 'zipcode_98122',
 'zipcode_98125',
 'zipcode_98126',
 'zipcode_98133',
 'zipcode_98136',
 'zipcode_98144',
 'zipcode_98146',
 'zipcode_98148',
 'zipcode_98155',
 'zipcode_98166',
 'zipcode_98168',
 'zipcode_98177',
 'zipcode_98178',
 'zipcode_98188',
 'zipcode_98198',
 'zipcode_98199']

In [25]:
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2, include_bias=False)

poly_data = poly.fit_transform(df[features])

poly_columns = poly.get_feature_names(features)

df_poly = pd.DataFrame(poly_data, columns=poly_columns)

#check new columns
df_poly.head()

Unnamed: 0,sqft_living,waterfront,sqft_living15,basement,sqft_outdoor,dist_amazon,dist_microsoft,grade_exp,bath_exp,season_sold_Spring,season_sold_Summer,season_sold_Winter,seattle_neighborhood_Central,seattle_neighborhood_Lake Union,seattle_neighborhood_None,seattle_neighborhood_North,bedrooms_1,bedrooms_2,bedrooms_3,bedrooms_4,bedrooms_5,bedrooms_6,bedrooms_7,bedrooms_8,bedrooms_9,bedrooms_10,zipcode_98002,zipcode_98003,zipcode_98004,zipcode_98005,zipcode_98006,zipcode_98007,zipcode_98008,zipcode_98010,zipcode_98011,zipcode_98014,zipcode_98019,zipcode_98022,zipcode_98023,zipcode_98024,zipcode_98027,zipcode_98028,zipcode_98029,zipcode_98030,zipcode_98031,zipcode_98032,zipcode_98033,zipcode_98034,zipcode_98038,zipcode_98039,...,zipcode_98146 zipcode_98177,zipcode_98146 zipcode_98178,zipcode_98146 zipcode_98188,zipcode_98146 zipcode_98198,zipcode_98146 zipcode_98199,zipcode_98148^2,zipcode_98148 zipcode_98155,zipcode_98148 zipcode_98166,zipcode_98148 zipcode_98168,zipcode_98148 zipcode_98177,zipcode_98148 zipcode_98178,zipcode_98148 zipcode_98188,zipcode_98148 zipcode_98198,zipcode_98148 zipcode_98199,zipcode_98155^2,zipcode_98155 zipcode_98166,zipcode_98155 zipcode_98168,zipcode_98155 zipcode_98177,zipcode_98155 zipcode_98178,zipcode_98155 zipcode_98188,zipcode_98155 zipcode_98198,zipcode_98155 zipcode_98199,zipcode_98166^2,zipcode_98166 zipcode_98168,zipcode_98166 zipcode_98177,zipcode_98166 zipcode_98178,zipcode_98166 zipcode_98188,zipcode_98166 zipcode_98198,zipcode_98166 zipcode_98199,zipcode_98168^2,zipcode_98168 zipcode_98177,zipcode_98168 zipcode_98178,zipcode_98168 zipcode_98188,zipcode_98168 zipcode_98198,zipcode_98168 zipcode_98199,zipcode_98177^2,zipcode_98177 zipcode_98178,zipcode_98177 zipcode_98188,zipcode_98177 zipcode_98198,zipcode_98177 zipcode_98199,zipcode_98178^2,zipcode_98178 zipcode_98188,zipcode_98178 zipcode_98198,zipcode_98178 zipcode_98199,zipcode_98188^2,zipcode_98188 zipcode_98198,zipcode_98188 zipcode_98199,zipcode_98198^2,zipcode_98198 zipcode_98199,zipcode_98199^2
0,2270.0,0.0,2020.0,1.0,9230.0,11.89,11.33,2980.957987,12.182494,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2270.0,0.0,2020.0,1.0,9230.0,11.89,11.33,2980.957987,12.182494,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1470.0,0.0,1470.0,1.0,1044.0,26.78,16.94,2980.957987,12.182494,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1280.0,0.0,1160.0,1.0,14920.0,23.9,6.61,2980.957987,5.754603,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2830.0,0.0,2830.0,0.0,6711.0,21.23,22.39,2980.957987,15.642632,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Define Selected Columns

In [26]:
selected_columns = ['sqft_living',
 'waterfront',
 'sqft_living15',
 'basement',
 'sqft_outdoor',
 'dist_amazon',
 'dist_microsoft',
 'grade_exp',
 'bath_exp',
 'bedrooms_2',
 'bedrooms_3',
 'bedrooms_4',
 'bedrooms_5',
 'bedrooms_6',
 'zipcode_98004',
 'zipcode_98006',
 'zipcode_98023',
 'zipcode_98033',
 'zipcode_98039',
 'zipcode_98040',
 'zipcode_98042',
 'zipcode_98075',
 'zipcode_98105',
 'zipcode_98112',
 'zipcode_98119',
 'zipcode_98168',
 'sqft_living^2',
 'sqft_living waterfront',
 'sqft_living sqft_living15',
 'sqft_living basement',
 'sqft_living sqft_outdoor',
 'sqft_living dist_amazon',
 'sqft_living dist_microsoft',
 'sqft_living grade_exp',
 'sqft_living bath_exp',
 'sqft_living season_sold_Spring',
 'sqft_living season_sold_Summer',
 'sqft_living season_sold_Winter',
 'sqft_living seattle_neighborhood_None',
 'sqft_living bedrooms_2',
 'sqft_living bedrooms_4',
 'sqft_living bedrooms_5',
 'sqft_living bedrooms_6',
 'sqft_living zipcode_98004',
 'sqft_living zipcode_98006',
 'sqft_living zipcode_98023',
 'sqft_living zipcode_98033',
 'sqft_living zipcode_98039',
 'sqft_living zipcode_98040',
 'sqft_living zipcode_98042',
 'sqft_living zipcode_98074',
 'sqft_living zipcode_98075',
 'sqft_living zipcode_98102',
 'sqft_living zipcode_98105',
 'sqft_living zipcode_98109',
 'sqft_living zipcode_98112',
 'sqft_living zipcode_98119',
 'sqft_living zipcode_98199',
 'waterfront^2',
 'waterfront sqft_living15',
 'waterfront basement',
 'waterfront sqft_outdoor',
 'waterfront dist_amazon',
 'waterfront dist_microsoft',
 'waterfront grade_exp',
 'waterfront bath_exp',
 'waterfront season_sold_Spring',
 'waterfront season_sold_Summer',
 'waterfront season_sold_Winter',
 'waterfront seattle_neighborhood_None',
 'waterfront bedrooms_3',
 'waterfront bedrooms_4',
 'waterfront bedrooms_5',
 'waterfront bedrooms_6',
 'waterfront zipcode_98008',
 'waterfront zipcode_98033',
 'waterfront zipcode_98034',
 'waterfront zipcode_98040',
 'waterfront zipcode_98075',
 'waterfront zipcode_98105',
 'waterfront zipcode_98155',
 'sqft_living15^2',
 'sqft_living15 basement',
 'sqft_living15 sqft_outdoor',
 'sqft_living15 grade_exp',
 'sqft_living15 bath_exp',
 'sqft_living15 season_sold_Spring',
 'sqft_living15 season_sold_Summer',
 'sqft_living15 season_sold_Winter',
 'sqft_living15 seattle_neighborhood_None',
 'sqft_living15 bedrooms_2',
 'sqft_living15 bedrooms_4',
 'sqft_living15 bedrooms_5',
 'sqft_living15 bedrooms_6',
 'sqft_living15 zipcode_98004',
 'sqft_living15 zipcode_98006',
 'sqft_living15 zipcode_98023',
 'sqft_living15 zipcode_98033',
 'sqft_living15 zipcode_98039',
 'sqft_living15 zipcode_98040',
 'sqft_living15 zipcode_98042',
 'sqft_living15 zipcode_98075',
 'sqft_living15 zipcode_98102',
 'sqft_living15 zipcode_98105',
 'sqft_living15 zipcode_98112',
 'sqft_living15 zipcode_98119',
 'sqft_living15 zipcode_98168',
 'sqft_living15 zipcode_98199',
 'basement^2',
 'basement sqft_outdoor',
 'basement grade_exp',
 'basement bath_exp',
 'basement season_sold_Spring',
 'basement season_sold_Summer',
 'basement seattle_neighborhood_None',
 'basement bedrooms_4',
 'basement bedrooms_5',
 'basement bedrooms_6',
 'basement zipcode_98004',
 'basement zipcode_98006',
 'basement zipcode_98033',
 'basement zipcode_98039',
 'basement zipcode_98040',
 'basement zipcode_98105',
 'basement zipcode_98112',
 'basement zipcode_98119',
 'sqft_outdoor grade_exp',
 'sqft_outdoor bath_exp',
 'sqft_outdoor seattle_neighborhood_None',
 'sqft_outdoor bedrooms_4',
 'sqft_outdoor bedrooms_5',
 'sqft_outdoor zipcode_98004',
 'sqft_outdoor zipcode_98006',
 'sqft_outdoor zipcode_98033',
 'sqft_outdoor zipcode_98039',
 'sqft_outdoor zipcode_98040',
 'sqft_outdoor zipcode_98102',
 'sqft_outdoor zipcode_98105',
 'sqft_outdoor zipcode_98112',
 'sqft_outdoor zipcode_98119',
 'sqft_outdoor zipcode_98177',
 'sqft_outdoor zipcode_98199',
 'dist_amazon^2',
 'dist_amazon dist_microsoft',
 'dist_amazon grade_exp',
 'dist_amazon bath_exp',
 'dist_amazon season_sold_Winter',
 'dist_amazon seattle_neighborhood_None',
 'dist_amazon bedrooms_2',
 'dist_amazon bedrooms_3',
 'dist_amazon bedrooms_5',
 'dist_amazon zipcode_98004',
 'dist_amazon zipcode_98006',
 'dist_amazon zipcode_98023',
 'dist_amazon zipcode_98033',
 'dist_amazon zipcode_98039',
 'dist_amazon zipcode_98040',
 'dist_amazon zipcode_98042',
 'dist_amazon zipcode_98075',
 'dist_amazon zipcode_98105',
 'dist_amazon zipcode_98112',
 'dist_amazon zipcode_98168',
 'dist_microsoft^2',
 'dist_microsoft grade_exp',
 'dist_microsoft bath_exp',
 'dist_microsoft season_sold_Summer',
 'dist_microsoft season_sold_Winter',
 'dist_microsoft seattle_neighborhood_None',
 'dist_microsoft bedrooms_2',
 'dist_microsoft bedrooms_3',
 'dist_microsoft bedrooms_5',
 'dist_microsoft zipcode_98004',
 'dist_microsoft zipcode_98006',
 'dist_microsoft zipcode_98023',
 'dist_microsoft zipcode_98033',
 'dist_microsoft zipcode_98039',
 'dist_microsoft zipcode_98040',
 'dist_microsoft zipcode_98042',
 'dist_microsoft zipcode_98075',
 'dist_microsoft zipcode_98112',
 'dist_microsoft zipcode_98119',
 'dist_microsoft zipcode_98168',
 'grade_exp^2',
 'grade_exp bath_exp',
 'grade_exp season_sold_Spring',
 'grade_exp season_sold_Summer',
 'grade_exp season_sold_Winter',
 'grade_exp seattle_neighborhood_None',
 'grade_exp bedrooms_3',
 'grade_exp bedrooms_4',
 'grade_exp bedrooms_5',
 'grade_exp bedrooms_6',
 'grade_exp zipcode_98004',
 'grade_exp zipcode_98006',
 'grade_exp zipcode_98008',
 'grade_exp zipcode_98027',
 'grade_exp zipcode_98033',
 'grade_exp zipcode_98034',
 'grade_exp zipcode_98039',
 'grade_exp zipcode_98040',
 'grade_exp zipcode_98053',
 'grade_exp zipcode_98074',
 'grade_exp zipcode_98075',
 'grade_exp zipcode_98102',
 'grade_exp zipcode_98105',
 'grade_exp zipcode_98109',
 'grade_exp zipcode_98112',
 'grade_exp zipcode_98119',
 'grade_exp zipcode_98144',
 'grade_exp zipcode_98155',
 'grade_exp zipcode_98177',
 'grade_exp zipcode_98199',
 'bath_exp^2',
 'bath_exp season_sold_Spring',
 'bath_exp season_sold_Summer',
 'bath_exp season_sold_Winter',
 'bath_exp seattle_neighborhood_None',
 'bath_exp bedrooms_4',
 'bath_exp bedrooms_5',
 'bath_exp bedrooms_6',
 'bath_exp zipcode_98004',
 'bath_exp zipcode_98006',
 'bath_exp zipcode_98008',
 'bath_exp zipcode_98033',
 'bath_exp zipcode_98039',
 'bath_exp zipcode_98040',
 'bath_exp zipcode_98074',
 'bath_exp zipcode_98075',
 'bath_exp zipcode_98102',
 'bath_exp zipcode_98105',
 'bath_exp zipcode_98109',
 'bath_exp zipcode_98112',
 'bath_exp zipcode_98119',
 'bath_exp zipcode_98199',
 'season_sold_Spring bedrooms_4',
 'season_sold_Spring bedrooms_5',
 'season_sold_Spring zipcode_98004',
 'season_sold_Spring zipcode_98039',
 'season_sold_Spring zipcode_98040',
 'season_sold_Spring zipcode_98112',
 'season_sold_Summer bedrooms_4',
 'season_sold_Summer bedrooms_5',
 'season_sold_Summer zipcode_98004',
 'season_sold_Summer zipcode_98039',
 'season_sold_Summer zipcode_98040',
 'season_sold_Summer zipcode_98112',
 'season_sold_Winter zipcode_98004',
 'season_sold_Winter zipcode_98040',
 'season_sold_Winter zipcode_98112',
 'seattle_neighborhood_None bedrooms_2',
 'seattle_neighborhood_None bedrooms_3',
 'seattle_neighborhood_None bedrooms_4',
 'seattle_neighborhood_None bedrooms_5',
 'seattle_neighborhood_None bedrooms_6',
 'seattle_neighborhood_None zipcode_98004',
 'seattle_neighborhood_None zipcode_98006',
 'seattle_neighborhood_None zipcode_98023',
 'seattle_neighborhood_None zipcode_98033',
 'seattle_neighborhood_None zipcode_98039',
 'seattle_neighborhood_None zipcode_98040',
 'seattle_neighborhood_None zipcode_98042',
 'seattle_neighborhood_None zipcode_98075',
 'seattle_neighborhood_None zipcode_98105',
 'seattle_neighborhood_None zipcode_98112',
 'seattle_neighborhood_None zipcode_98119',
 'seattle_neighborhood_None zipcode_98168',
 'bedrooms_2^2',
 'bedrooms_3^2',
 'bedrooms_3 zipcode_98004',
 'bedrooms_3 zipcode_98023',
 'bedrooms_3 zipcode_98042',
 'bedrooms_4^2',
 'bedrooms_4 zipcode_98004',
 'bedrooms_4 zipcode_98006',
 'bedrooms_4 zipcode_98033',
 'bedrooms_4 zipcode_98039',
 'bedrooms_4 zipcode_98040',
 'bedrooms_4 zipcode_98112',
 'bedrooms_5^2',
 'bedrooms_5 zipcode_98004',
 'bedrooms_5 zipcode_98006',
 'bedrooms_5 zipcode_98033',
 'bedrooms_5 zipcode_98039',
 'bedrooms_5 zipcode_98040',
 'bedrooms_6^2',
 'bedrooms_6 zipcode_98039',
 'bedrooms_6 zipcode_98040',
 'bedrooms_6 zipcode_98102',
 'zipcode_98004^2',
 'zipcode_98006^2',
 'zipcode_98023^2',
 'zipcode_98033^2',
 'zipcode_98039^2',
 'zipcode_98040^2',
 'zipcode_98042^2',
 'zipcode_98075^2',
 'zipcode_98105^2',
 'zipcode_98112^2',
 'zipcode_98119^2',
 'zipcode_98168^2']

# Apply Scaler, Make Predictions

In [27]:
import pickle

scaler = pd.read_pickle('scaler.pickle')
model = pd.read_pickle('model.pickle')


In [28]:
from sklearn.preprocessing import StandardScaler

transformed_holdout = scaler.transform(df_poly[selected_columns])


In [29]:
answers = model.predict(transformed_holdout)


In [30]:
pd.DataFrame(answers).to_csv('housing_preds_DavidaRosenstrauch.csv')
