In [266]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [267]:
df_train = pd.read_csv('../house-prices-advanced-regression-techniques/train.csv')
df_test = pd.read_csv('../house-prices-advanced-regression-techniques/test.csv')

In [268]:
df_train.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [269]:
# Preprocessing - separate features into x variable and
# corresponding labels into y
X_train = df_train.drop('SalePrice', 1)
y_train = df_train['SalePrice']
X_test = df_test
X_train[['LotFrontage','GarageYrBlt', 'GarageCars', 'GarageArea']] = X_train[['LotFrontage',\
                                                                              'GarageYrBlt',\
                                                                              'GarageCars',\
                                                                              'GarageArea']].fillna(0)
X_test[['LotFrontage','GarageYrBlt', 'GarageCars', 'GarageArea']] = X_test[['LotFrontage',\
                                                                            'GarageYrBlt',\
                                                                            'GarageCars',\
                                                                            'GarageArea']].fillna(0)

In [270]:
X_train = pd.get_dummies(X_train, columns=['MSZoning', 'Street', 'Alley',\
                                      'LotShape', 'LandContour', 'Utilities',\
                                      'LotConfig', 'LandSlope', 'Neighborhood', 
                                      'Condition1', 'Condition2',\
                                      'BldgType', 'HouseStyle', 'RoofStyle',\
                                      'RoofMatl', 'Exterior1st', 'Exterior2nd',\
                                      'MasVnrType', 'ExterQual', 'ExterCond',\
                                      'Foundation', 'BsmtQual', 'BsmtCond',\
                                      'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',\
                                      'Heating', 'HeatingQC', 'CentralAir',\
                                      'Electrical', 'KitchenQual', 'Functional',\
                                      'FireplaceQu', 'GarageType', 'GarageFinish',\
                                      'GarageQual', 'GarageCond', 'PavedDrive',\
                                      'PoolQC', 'Fence', 'MiscFeature',\
                                      'SaleType', 'SaleCondition'])

In [271]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)

In [272]:
X_test = pd.get_dummies(X_test, columns=['MSZoning', 'Street', 'Alley',\
                                      'LotShape', 'LandContour', 'Utilities',\
                                      'LotConfig', 'LandSlope', 'Neighborhood', 
                                      'Condition1', 'Condition2',\
                                      'BldgType', 'HouseStyle', 'RoofStyle',\
                                      'RoofMatl', 'Exterior1st', 'Exterior2nd',\
                                      'MasVnrType', 'ExterQual', 'ExterCond',\
                                      'Foundation', 'BsmtQual', 'BsmtCond',\
                                      'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',\
                                      'Heating', 'HeatingQC', 'CentralAir',\
                                      'Electrical', 'KitchenQual', 'Functional',\
                                      'FireplaceQu', 'GarageType', 'GarageFinish',\
                                      'GarageQual', 'GarageCond', 'PavedDrive',\
                                      'PoolQC', 'Fence', 'MiscFeature',\
                                      'SaleType', 'SaleCondition'])

In [273]:
list(X_test.columns)

['Id',
 'MSSubClass',
 'LotFrontage',
 'LotArea',
 'OverallQual',
 'OverallCond',
 'YearBuilt',
 'YearRemodAdd',
 'MasVnrArea',
 'BsmtFinSF1',
 'BsmtFinSF2',
 'BsmtUnfSF',
 'TotalBsmtSF',
 '1stFlrSF',
 '2ndFlrSF',
 'LowQualFinSF',
 'GrLivArea',
 'BsmtFullBath',
 'BsmtHalfBath',
 'FullBath',
 'HalfBath',
 'BedroomAbvGr',
 'KitchenAbvGr',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageYrBlt',
 'GarageCars',
 'GarageArea',
 'WoodDeckSF',
 'OpenPorchSF',
 'EnclosedPorch',
 '3SsnPorch',
 'ScreenPorch',
 'PoolArea',
 'MiscVal',
 'MoSold',
 'YrSold',
 'MSZoning_C (all)',
 'MSZoning_FV',
 'MSZoning_RH',
 'MSZoning_RL',
 'MSZoning_RM',
 'Street_Grvl',
 'Street_Pave',
 'Alley_Grvl',
 'Alley_Pave',
 'LotShape_IR1',
 'LotShape_IR2',
 'LotShape_IR3',
 'LotShape_Reg',
 'LandContour_Bnk',
 'LandContour_HLS',
 'LandContour_Low',
 'LandContour_Lvl',
 'Utilities_AllPub',
 'LotConfig_Corner',
 'LotConfig_CulDSac',
 'LotConfig_FR2',
 'LotConfig_FR3',
 'LotConfig_Inside',
 'LandSlope_Gtl',
 'LandSlope_Mod',
 'Lan

In [274]:
for x in X_test:
    print(x)

Id
MSSubClass
LotFrontage
LotArea
OverallQual
OverallCond
YearBuilt
YearRemodAdd
MasVnrArea
BsmtFinSF1
BsmtFinSF2
BsmtUnfSF
TotalBsmtSF
1stFlrSF
2ndFlrSF
LowQualFinSF
GrLivArea
BsmtFullBath
BsmtHalfBath
FullBath
HalfBath
BedroomAbvGr
KitchenAbvGr
TotRmsAbvGrd
Fireplaces
GarageYrBlt
GarageCars
GarageArea
WoodDeckSF
OpenPorchSF
EnclosedPorch
3SsnPorch
ScreenPorch
PoolArea
MiscVal
MoSold
YrSold
MSZoning_C (all)
MSZoning_FV
MSZoning_RH
MSZoning_RL
MSZoning_RM
Street_Grvl
Street_Pave
Alley_Grvl
Alley_Pave
LotShape_IR1
LotShape_IR2
LotShape_IR3
LotShape_Reg
LandContour_Bnk
LandContour_HLS
LandContour_Low
LandContour_Lvl
Utilities_AllPub
LotConfig_Corner
LotConfig_CulDSac
LotConfig_FR2
LotConfig_FR3
LotConfig_Inside
LandSlope_Gtl
LandSlope_Mod
LandSlope_Sev
Neighborhood_Blmngtn
Neighborhood_Blueste
Neighborhood_BrDale
Neighborhood_BrkSide
Neighborhood_ClearCr
Neighborhood_CollgCr
Neighborhood_Crawfor
Neighborhood_Edwards
Neighborhood_Gilbert
Neighborhood_IDOTRR
Neighborhood_MeadowV
Neighborho

In [275]:
X_test = sc.fit_transform(X_test)

In [276]:
pca = PCA()
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').