# Project 2: Ames Housing Data and Kaggle Challenge

## Cleaning Test Data

### Import Libraries

In [44]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression, RidgeCV, LassoCV, ElasticNetCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import metrics

%matplotlib inline

### Data Import

In [45]:
df = pd.read_csv('datasets/test.csv')
pd.options.display.max_columns = 500
df.head()

Unnamed: 0,Id,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,Utilities,Lot Config,Land Slope,Neighborhood,Condition 1,Condition 2,Bldg Type,House Style,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Roof Style,Roof Matl,Exterior 1st,Exterior 2nd,Mas Vnr Type,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating,Heating QC,Central Air,Electrical,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Type,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type
0,2658,902301120,190,RM,69.0,9142,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,6,8,1910,1950,Gable,CompShg,AsbShng,AsbShng,,0.0,TA,Fa,Stone,Fa,TA,No,Unf,0,Unf,0,1020,1020,GasA,Gd,N,FuseP,908,1020,0,1928,0,0,2,0,4,2,Fa,9,Typ,0,,Detchd,1910.0,Unf,1,440,Po,Po,Y,0,60,112,0,0,0,,,,0,4,2006,WD
1,2718,905108090,90,RL,,9662,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,Duplex,1Story,5,4,1977,1977,Gable,CompShg,Plywood,Plywood,,0.0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1967,1967,GasA,TA,Y,SBrkr,1967,0,0,1967,0,0,2,0,6,2,TA,10,Typ,0,,Attchd,1977.0,Fin,2,580,TA,TA,Y,170,0,0,0,0,0,,,,0,8,2006,WD
2,2414,528218130,60,RL,58.0,17104,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,,0.0,Gd,TA,PConc,Gd,Gd,Av,GLQ,554,Unf,0,100,654,GasA,Ex,Y,SBrkr,664,832,0,1496,1,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2006.0,RFn,2,426,TA,TA,Y,100,24,0,0,0,0,,,,0,9,2006,New
3,1989,902207150,30,RM,60.0,8520,Pave,,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1923,2006,Gable,CompShg,Wd Sdng,Wd Sdng,,0.0,Gd,TA,CBlock,TA,TA,No,Unf,0,Unf,0,968,968,GasA,TA,Y,SBrkr,968,0,0,968,0,0,1,0,2,1,TA,5,Typ,0,,Detchd,1935.0,Unf,2,480,Fa,TA,N,0,0,184,0,0,0,,,,0,7,2007,WD
4,625,535105100,20,RL,,9500,Pave,,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1963,1963,Gable,CompShg,Plywood,Plywood,BrkFace,247.0,TA,TA,CBlock,Gd,TA,No,BLQ,609,Unf,0,785,1394,GasA,Gd,Y,SBrkr,1394,0,0,1394,1,0,1,1,3,1,TA,6,Typ,2,Gd,Attchd,1963.0,RFn,2,514,TA,TA,Y,0,76,0,0,185,0,,,,0,7,2009,WD


### Data Cleaning

## Check for missing data / null values

In [46]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 878 entries, 0 to 877
Data columns (total 80 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Id               878 non-null    int64  
 1   PID              878 non-null    int64  
 2   MS SubClass      878 non-null    int64  
 3   MS Zoning        878 non-null    object 
 4   Lot Frontage     718 non-null    float64
 5   Lot Area         878 non-null    int64  
 6   Street           878 non-null    object 
 7   Alley            58 non-null     object 
 8   Lot Shape        878 non-null    object 
 9   Land Contour     878 non-null    object 
 10  Utilities        878 non-null    object 
 11  Lot Config       878 non-null    object 
 12  Land Slope       878 non-null    object 
 13  Neighborhood     878 non-null    object 
 14  Condition 1      878 non-null    object 
 15  Condition 2      878 non-null    object 
 16  Bldg Type        878 non-null    object 
 17  House Style     

In [47]:
df.shape

(878, 80)

In [48]:
# show full list of null values in data
null_values = df.isnull().sum().sort_values(ascending=False)
print(null_values[null_values > 0])

Pool QC           874
Misc Feature      837
Alley             820
Fence             706
Fireplace Qu      422
Lot Frontage      160
Garage Yr Blt      45
Garage Finish      45
Garage Qual        45
Garage Cond        45
Garage Type        44
BsmtFin Type 1     25
Bsmt Qual          25
Bsmt Cond          25
Bsmt Exposure      25
BsmtFin Type 2     25
Electrical          1
Mas Vnr Type        1
Mas Vnr Area        1
dtype: int64


In [49]:
# check percentage of missing data 
percent_null_data = null_values * 100 / len(df)
print(percent_null_data[percent_null_data > 0])

Pool QC           99.544419
Misc Feature      95.330296
Alley             93.394077
Fence             80.410023
Fireplace Qu      48.063781
Lot Frontage      18.223235
Garage Yr Blt      5.125285
Garage Finish      5.125285
Garage Qual        5.125285
Garage Cond        5.125285
Garage Type        5.011390
BsmtFin Type 1     2.847380
Bsmt Qual          2.847380
Bsmt Cond          2.847380
Bsmt Exposure      2.847380
BsmtFin Type 2     2.847380
Electrical         0.113895
Mas Vnr Type       0.113895
Mas Vnr Area       0.113895
dtype: float64


#### Pool QC, Misc Feature, Alley and Fence has majority of the data missing. Hence, will drop these columns

In [50]:
# drop Pool QC, Misc Feature, Alley and Fence due to majority of data missing
df.drop(['Pool QC'], axis = 1, inplace = True)
df.drop(['Misc Feature'], axis = 1, inplace = True)
df.drop(['Alley'], axis = 1, inplace = True)
df.drop(['Fence'], axis = 1, inplace = True)

#drop PID columns because they are not required
df.drop(['PID'], axis = 1, inplace = True)

df.head()

Unnamed: 0,Id,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Lot Shape,Land Contour,Utilities,Lot Config,Land Slope,Neighborhood,Condition 1,Condition 2,Bldg Type,House Style,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Roof Style,Roof Matl,Exterior 1st,Exterior 2nd,Mas Vnr Type,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating,Heating QC,Central Air,Electrical,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Type,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,Sale Type
0,2658,190,RM,69.0,9142,Pave,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,6,8,1910,1950,Gable,CompShg,AsbShng,AsbShng,,0.0,TA,Fa,Stone,Fa,TA,No,Unf,0,Unf,0,1020,1020,GasA,Gd,N,FuseP,908,1020,0,1928,0,0,2,0,4,2,Fa,9,Typ,0,,Detchd,1910.0,Unf,1,440,Po,Po,Y,0,60,112,0,0,0,0,4,2006,WD
1,2718,90,RL,,9662,Pave,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,Duplex,1Story,5,4,1977,1977,Gable,CompShg,Plywood,Plywood,,0.0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1967,1967,GasA,TA,Y,SBrkr,1967,0,0,1967,0,0,2,0,6,2,TA,10,Typ,0,,Attchd,1977.0,Fin,2,580,TA,TA,Y,170,0,0,0,0,0,0,8,2006,WD
2,2414,60,RL,58.0,17104,Pave,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,,0.0,Gd,TA,PConc,Gd,Gd,Av,GLQ,554,Unf,0,100,654,GasA,Ex,Y,SBrkr,664,832,0,1496,1,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2006.0,RFn,2,426,TA,TA,Y,100,24,0,0,0,0,0,9,2006,New
3,1989,30,RM,60.0,8520,Pave,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1923,2006,Gable,CompShg,Wd Sdng,Wd Sdng,,0.0,Gd,TA,CBlock,TA,TA,No,Unf,0,Unf,0,968,968,GasA,TA,Y,SBrkr,968,0,0,968,0,0,1,0,2,1,TA,5,Typ,0,,Detchd,1935.0,Unf,2,480,Fa,TA,N,0,0,184,0,0,0,0,7,2007,WD
4,625,20,RL,,9500,Pave,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1963,1963,Gable,CompShg,Plywood,Plywood,BrkFace,247.0,TA,TA,CBlock,Gd,TA,No,BLQ,609,Unf,0,785,1394,GasA,Gd,Y,SBrkr,1394,0,0,1394,1,0,1,1,3,1,TA,6,Typ,2,Gd,Attchd,1963.0,RFn,2,514,TA,TA,Y,0,76,0,0,185,0,0,7,2009,WD


In [51]:
df.shape

(878, 75)

#### Bsmt Half Bath, Bsmt Full Bath, BsmtFin SF 1, Garage Area, Garage Cars, BsmtFin SF 2, Bsmt Unf SF, Total Bsmt SF has null values that amount to less than or about 1% of the full data. Hence, propose to drop the rows.

In [52]:
#drop rows with null values from the following columns
df.dropna(subset=['Bsmt Half Bath'], inplace=True)
df.dropna(subset=['Bsmt Full Bath'], inplace=True)
df.dropna(subset=['BsmtFin SF 1'], inplace=True)
df.dropna(subset=['Garage Area'], inplace=True)
df.dropna(subset=['Garage Cars'], inplace=True)
df.dropna(subset=['BsmtFin SF 2'], inplace=True)
df.dropna(subset=['Bsmt Unf SF'], inplace=True)
df.dropna(subset=['Total Bsmt SF'], inplace=True)

In [53]:
#check if column is dropped correctly (total 6 columns dropped)
df.shape

(878, 75)

In [54]:
df['Mas Vnr Type'].value_counts()

None       534
BrkFace    250
Stone       80
BrkCmn      12
CBlock       1
Name: Mas Vnr Type, dtype: int64

In [55]:
df['Mas Vnr Area'].value_counts()

0.0      532
216.0      7
80.0       5
420.0      5
196.0      5
        ... 
233.0      1
189.0      1
34.0       1
771.0      1
415.0      1
Name: Mas Vnr Area, Length: 232, dtype: int64

#### Replace null values with the most Mas Vnr Type and 0 for the area

In [56]:
df['Mas Vnr Type'].fillna(df['Mas Vnr Type'].mode()[0], inplace=True)
df['Mas Vnr Area'].fillna('0', inplace = True)

In [57]:
#check if rows are dropped correctly
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 878 entries, 0 to 877
Data columns (total 75 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Id               878 non-null    int64  
 1   MS SubClass      878 non-null    int64  
 2   MS Zoning        878 non-null    object 
 3   Lot Frontage     718 non-null    float64
 4   Lot Area         878 non-null    int64  
 5   Street           878 non-null    object 
 6   Lot Shape        878 non-null    object 
 7   Land Contour     878 non-null    object 
 8   Utilities        878 non-null    object 
 9   Lot Config       878 non-null    object 
 10  Land Slope       878 non-null    object 
 11  Neighborhood     878 non-null    object 
 12  Condition 1      878 non-null    object 
 13  Condition 2      878 non-null    object 
 14  Bldg Type        878 non-null    object 
 15  House Style      878 non-null    object 
 16  Overall Qual     878 non-null    int64  
 17  Overall Cond    

In [58]:
# show full list of null values in data
null_values = df.isnull().sum().sort_values(ascending=False)
print(null_values[null_values > 0])

Fireplace Qu      422
Lot Frontage      160
Garage Yr Blt      45
Garage Finish      45
Garage Qual        45
Garage Cond        45
Garage Type        44
Bsmt Exposure      25
Bsmt Qual          25
Bsmt Cond          25
BsmtFin Type 1     25
BsmtFin Type 2     25
Electrical          1
dtype: int64


In [59]:
# check percentage of missing data 
percent_null_data = null_values * 100 / len(df)
print(percent_null_data[percent_null_data > 0])

Fireplace Qu      48.063781
Lot Frontage      18.223235
Garage Yr Blt      5.125285
Garage Finish      5.125285
Garage Qual        5.125285
Garage Cond        5.125285
Garage Type        5.011390
Bsmt Exposure      2.847380
Bsmt Qual          2.847380
Bsmt Cond          2.847380
BsmtFin Type 1     2.847380
BsmtFin Type 2     2.847380
Electrical         0.113895
dtype: float64


#### Replace Fireplace Qu null values with 'No' based on data cleaning from train data

In [60]:
df['Fireplace Qu'].fillna('No', inplace = True) 
df['Fireplace Qu'].value_counts()

No    422
Gd    220
TA    193
Fa     16
Po     15
Ex     12
Name: Fireplace Qu, dtype: int64

In [61]:
# show full list of null values in data
null_values = df.isnull().sum().sort_values(ascending=False)
print(null_values[null_values > 0])

Lot Frontage      160
Garage Yr Blt      45
Garage Finish      45
Garage Cond        45
Garage Qual        45
Garage Type        44
Bsmt Cond          25
BsmtFin Type 2     25
BsmtFin Type 1     25
Bsmt Exposure      25
Bsmt Qual          25
Electrical          1
dtype: int64


#### Replace null values for Bsmt Exposure, BsmtFin Type 2, BsmtFin Type 1, Bsmt Cond, Bsmt Qual with mode values based on data cleaning in train data

In [62]:
df['Bsmt Exposure'].fillna(df['Bsmt Exposure'].mode()[0], inplace=True)
df['BsmtFin Type 2'].fillna(df['BsmtFin Type 2'].mode()[0], inplace=True)
df['BsmtFin Type 1'].fillna(df['BsmtFin Type 1'].mode()[0], inplace=True)
df['Bsmt Cond'].fillna(df['Bsmt Cond'].mode()[0], inplace=True)
df['Bsmt Qual'].fillna(df['Bsmt Qual'].mode()[0], inplace=True)

In [63]:
# show full list of null values in data
null_values = df.isnull().sum().sort_values(ascending=False)
print(null_values[null_values > 0])

Lot Frontage     160
Garage Yr Blt     45
Garage Finish     45
Garage Qual       45
Garage Cond       45
Garage Type       44
Electrical         1
dtype: int64


#### Assume that these rows have no garage and replace data with according based on data cleaning in train data

In [64]:
df['Garage Yr Blt'].fillna('0', inplace = True)
df['Garage Qual'].fillna('No', inplace = True)
df['Garage Cond'].fillna('No', inplace = True)
df['Garage Finish'].fillna('No', inplace = True)
df['Garage Area'].fillna('0', inplace = True)
df['Garage Type'].fillna('No', inplace = True)

In [65]:
# show full list of null values in data
null_values = df.isnull().sum().sort_values(ascending=False)
print(null_values[null_values > 0])

Lot Frontage    160
Electrical        1
dtype: int64


#### Replace null value with the most datatype

In [66]:
df['Electrical'].value_counts()

SBrkr    813
FuseA     48
FuseF     15
FuseP      1
Name: Electrical, dtype: int64

In [67]:
df['Electrical'].fillna(df['Electrical'].mode()[0], inplace=True)

In [68]:
# show full list of null values in data
null_values = df.isnull().sum().sort_values(ascending=False)
print(null_values[null_values > 0])

Lot Frontage    160
dtype: int64


#### replace null values with LR coef and intercept

In [69]:
df2 = df[['Lot Frontage', 'Lot Area']]
df2.dropna(inplace=True)
X = df2[['Lot Area']]
y = df2['Lot Frontage']
lr = LinearRegression()
lr.fit(X, y)

LinearRegression()

In [70]:
intercept = lr.intercept_
coef = lr.coef_
print(intercept)
print(coef)

58.546880864074744
[0.00111376]


In [71]:
df['Lot Frontage'] = df.apply(lambda x: int(x['Lot Area']*coef + intercept) if np.isnan(x['Lot Frontage']) else x['Lot Frontage'],axis =1)

In [72]:
# show full list of null values in data
null_values = df.isnull().sum().sort_values(ascending=False)
print(null_values[null_values > 0])

Series([], dtype: int64)


In [73]:
df["House Age"] = df["Yr Sold"] - df["Year Built"]

In [74]:
df.shape

(878, 76)

In [75]:
variables = {
    'has 2nd floor' : '2nd Flr SF',
    'has basement' : 'Total Bsmt SF',
    'has fireplace' : 'Fireplaces',
    'has garage' : 'Garage Area',
    'has pool' : 'Pool Area'
}

for factor_column, column in variables.items():
    df[factor_column] = df[column].apply(lambda x:1 if x > 0 else 0)
variables = list(variables.keys())

In [76]:
df.shape

(878, 81)

### Encoding ordinal features

In [77]:
df['Lot Shape'] = df['Lot Shape'].replace({'Reg':0, 'IR1':1, 'IR2':2, 'IR3':3})
df['Utilities'] = df['Utilities'].replace({'AllPub':0, 'NoSewr':1, 'NoSeWa':2, 'ELO':3})
df['Land Slope'] = df['Land Slope'].replace({'Gtl':0, 'Mod':1, 'Sev':2})
df['Bsmt Qual'] = df['Bsmt Qual'].replace({'Gd':0, 'TA':1, 'Ex':2, 'No':3,'None':3, 'NA':3, 'Fa':4, 'Po' : 5})
df['Bsmt Cond'] = df['Bsmt Cond'].replace({'TA':0, 'Gd':1, 'No':2, 'None':2, 'NA':2, 'Fa':3,'Po':4, 'Ex' : 5})
df['Exter Qual'] = df['Exter Qual'].replace({'Gd':0, 'TA':1, 'Ex':2, 'Fa':3, 'Po':4})
df['Exter Cond'] = df['Exter Cond'].replace({'TA':0, 'Gd':1, 'Fa':2, 'Po':3,'Ex':4})
df['Bsmt Exposure'] = df['Bsmt Exposure'].replace({'No':0, 'Gd':1, 'Mn':2, 'Av':3,'No':4,'None':4, 'NA':4})
df['BsmtFin Type 1'] = df['BsmtFin Type 1'].replace({'GLQ':0, 'ALQ':1, 'Unf':2, 'Rec':3, 'BLQ':4, 'No':5, 'None':5, 'NA':5,'LwQ':6})
df['BsmtFin Type 2'] = df['BsmtFin Type 2'].replace({'Unf':0, 'BLQ':1, 'No':2, 'None':2, 'NA':2,'ALQ':3, 'Rec':4, 'LwQ':5, 'GLQ':6})
df['Heating QC'] = df['Heating QC'].replace({'Ex':0, 'Gd':1, 'TA':2, 'Fa':3, 'Po':4})
df['Electrical'] = df['Electrical'].replace({'SBrkr':0, 'FuseF':1, 'FuseA':2, 'FuseP':3, 'Mix':4})
df['Kitchen Qual'] = df['Kitchen Qual'].replace({'Gd':0, 'TA':1, 'Ex':2, 'Fa':3, 'Po':4})
df['Functional'] = df['Functional'].replace({'Typ':0, 'Min1':1, 'Maj1':2, 'Min2':3, 'Mod':4, 'Maj2':5, 'Sev':6, 'Sal':7})
df['Fireplace Qu'] = df['Fireplace Qu'].replace({'No':0,'None':0, 'NA':0, 'TA':1, 'Gd':2, 'Fa':3, 'Ex':4, 'Po':5})
df['Garage Finish'] = df['Garage Finish'].replace({'RFn':0, 'Unf':1, 'Fin':2, 'None':3, 'NA':3, 'No':3})
df['Garage Qual'] = df['Garage Qual'].replace({'TA':0, 'Fa':1, 'Gd':2, 'No':3, 'Ex':4, 'Po':5})
df['Garage Cond'] = df['Garage Cond'].replace({'TA':0, 'Fa':1, 'No':2,'None':2, 'NA':2, 'Gd':3, 'Po':4, 'Ex':5})
df['Paved Drive'] = df['Paved Drive'].replace({'Y':0, 'N':1, 'P':2})

### One Hot Coding

In [78]:
df = pd.get_dummies(df, columns=["MS Zoning", "Lot Shape", "Utilities", "Land Slope", "Overall Qual", "Overall Cond", "Exter Qual", "Exter Cond", "Bsmt Qual", "Bsmt Cond", "Bsmt Exposure", "BsmtFin Type 1", "BsmtFin Type 2", "Heating QC", "Electrical", "Full Bath", "Half Bath", "Bedroom AbvGr", "Kitchen AbvGr", "Kitchen Qual", "Functional", "Fireplaces", "Fireplace Qu", "Garage Finish", "Garage Qual", "Garage Cond", "Paved Drive", "Street" ,"Land Contour" ,"Lot Config", "Neighborhood", "Condition 1" ,"Condition 2", "Bldg Type","House Style" ,"Roof Style", "Roof Matl" ,"Exterior 1st" ,"Exterior 2nd", "Mas Vnr Type", "Foundation" ,"Heating", "Central Air", "Garage Type" ,"Sale Type"], drop_first=True)

In [79]:
df.shape

(878, 260)

In [80]:
df.head()

Unnamed: 0,Id,MS SubClass,Lot Frontage,Lot Area,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,TotRms AbvGrd,Garage Yr Blt,Garage Cars,Garage Area,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,House Age,has 2nd floor,has basement,has fireplace,has garage,has pool,MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Lot Shape_1,Lot Shape_2,Lot Shape_3,Utilities_1,Land Slope_1,Land Slope_2,Overall Qual_3,Overall Qual_4,Overall Qual_5,Overall Qual_6,Overall Qual_7,Overall Qual_8,Overall Qual_9,Overall Qual_10,Overall Cond_2,Overall Cond_3,Overall Cond_4,Overall Cond_5,Overall Cond_6,Overall Cond_7,Overall Cond_8,Overall Cond_9,Exter Qual_1,Exter Qual_2,Exter Qual_3,Exter Cond_1,Exter Cond_2,Exter Cond_3,Exter Cond_4,Bsmt Qual_1,Bsmt Qual_2,Bsmt Qual_4,Bsmt Qual_5,Bsmt Cond_1,Bsmt Cond_3,Bsmt Exposure_2,Bsmt Exposure_3,Bsmt Exposure_4,BsmtFin Type 1_1,BsmtFin Type 1_2,BsmtFin Type 1_3,BsmtFin Type 1_4,BsmtFin Type 1_6,BsmtFin Type 2_1,BsmtFin Type 2_3,BsmtFin Type 2_4,BsmtFin Type 2_5,BsmtFin Type 2_6,Heating QC_1,Heating QC_2,Heating QC_3,Electrical_1,Electrical_2,Electrical_3,Full Bath_1,Full Bath_2,Full Bath_3,Full Bath_4,Half Bath_1,Half Bath_2,Bedroom AbvGr_1,Bedroom AbvGr_2,Bedroom AbvGr_3,Bedroom AbvGr_4,Bedroom AbvGr_5,Bedroom AbvGr_6,Kitchen AbvGr_1,Kitchen AbvGr_2,Kitchen AbvGr_3,Kitchen Qual_1,Kitchen Qual_2,Kitchen Qual_3,Kitchen Qual_4,Functional_1,Functional_2,Functional_3,Functional_4,Functional_5,Fireplaces_1,Fireplaces_2,Fireplaces_3,Fireplace Qu_1,Fireplace Qu_2,Fireplace Qu_3,Fireplace Qu_4,Fireplace Qu_5,Garage Finish_1,Garage Finish_2,Garage Finish_3,Garage Qual_1,Garage Qual_2,Garage Qual_3,Garage Qual_5,Garage Cond_1,Garage Cond_2,Garage Cond_3,Garage Cond_4,Garage Cond_5,Paved Drive_1,Paved Drive_2,Street_Pave,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Norm,Condition 2_PosA,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_Metal,Roof Matl_Roll,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_None,Mas Vnr Type_Stone,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,Heating_GasA,Heating_GasW,Heating_Grav,Central Air_Y,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_No,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD
0,2658,190,69.0,9142,1910,1950,0.0,0,0,1020,1020,908,1020,0,1928,0,0,9,1910.0,1,440,0,60,112,0,0,0,0,4,2006,96,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
1,2718,90,69.0,9662,1977,1977,0.0,0,0,1967,1967,1967,0,0,1967,0,0,10,1977.0,2,580,170,0,0,0,0,0,0,8,2006,29,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,2414,60,58.0,17104,2006,2006,0.0,554,0,100,654,664,832,0,1496,1,0,7,2006.0,2,426,100,24,0,0,0,0,0,9,2006,0,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0
3,1989,30,60.0,8520,1923,2006,0.0,0,0,968,968,968,0,0,968,0,0,5,1935.0,2,480,0,0,184,0,0,0,0,7,2007,84,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
4,625,20,69.0,9500,1963,1963,247.0,609,0,785,1394,1394,0,0,1394,1,0,6,1963.0,2,514,0,76,0,0,185,0,0,7,2009,46,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1


### Adding columns that are missing in this dataset that is used in features

In [81]:
df['Overall Qual_2'] = 0

In [82]:
df['Fireplaces_4'] = 0

In [83]:
df.head()

Unnamed: 0,Id,MS SubClass,Lot Frontage,Lot Area,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,TotRms AbvGrd,Garage Yr Blt,Garage Cars,Garage Area,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,House Age,has 2nd floor,has basement,has fireplace,has garage,has pool,MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Lot Shape_1,Lot Shape_2,Lot Shape_3,Utilities_1,Land Slope_1,Land Slope_2,Overall Qual_3,Overall Qual_4,Overall Qual_5,Overall Qual_6,Overall Qual_7,Overall Qual_8,Overall Qual_9,Overall Qual_10,Overall Cond_2,Overall Cond_3,Overall Cond_4,Overall Cond_5,Overall Cond_6,Overall Cond_7,Overall Cond_8,Overall Cond_9,Exter Qual_1,Exter Qual_2,Exter Qual_3,Exter Cond_1,Exter Cond_2,Exter Cond_3,Exter Cond_4,Bsmt Qual_1,Bsmt Qual_2,Bsmt Qual_4,Bsmt Qual_5,Bsmt Cond_1,Bsmt Cond_3,Bsmt Exposure_2,Bsmt Exposure_3,Bsmt Exposure_4,BsmtFin Type 1_1,BsmtFin Type 1_2,BsmtFin Type 1_3,BsmtFin Type 1_4,BsmtFin Type 1_6,BsmtFin Type 2_1,BsmtFin Type 2_3,BsmtFin Type 2_4,BsmtFin Type 2_5,BsmtFin Type 2_6,Heating QC_1,Heating QC_2,Heating QC_3,Electrical_1,Electrical_2,Electrical_3,Full Bath_1,Full Bath_2,Full Bath_3,Full Bath_4,Half Bath_1,Half Bath_2,Bedroom AbvGr_1,Bedroom AbvGr_2,Bedroom AbvGr_3,Bedroom AbvGr_4,Bedroom AbvGr_5,Bedroom AbvGr_6,Kitchen AbvGr_1,Kitchen AbvGr_2,Kitchen AbvGr_3,Kitchen Qual_1,Kitchen Qual_2,Kitchen Qual_3,Kitchen Qual_4,Functional_1,Functional_2,Functional_3,Functional_4,Functional_5,Fireplaces_1,Fireplaces_2,Fireplaces_3,Fireplace Qu_1,Fireplace Qu_2,Fireplace Qu_3,Fireplace Qu_4,Fireplace Qu_5,Garage Finish_1,Garage Finish_2,Garage Finish_3,Garage Qual_1,Garage Qual_2,Garage Qual_3,Garage Qual_5,Garage Cond_1,Garage Cond_2,Garage Cond_3,Garage Cond_4,Garage Cond_5,Paved Drive_1,Paved Drive_2,Street_Pave,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Norm,Condition 2_PosA,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_Metal,Roof Matl_Roll,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_None,Mas Vnr Type_Stone,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,Heating_GasA,Heating_GasW,Heating_Grav,Central Air_Y,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_No,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Overall Qual_2,Fireplaces_4
0,2658,190,69.0,9142,1910,1950,0.0,0,0,1020,1020,908,1020,0,1928,0,0,9,1910.0,1,440,0,60,112,0,0,0,0,4,2006,96,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
1,2718,90,69.0,9662,1977,1977,0.0,0,0,1967,1967,1967,0,0,1967,0,0,10,1977.0,2,580,170,0,0,0,0,0,0,8,2006,29,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,2414,60,58.0,17104,2006,2006,0.0,554,0,100,654,664,832,0,1496,1,0,7,2006.0,2,426,100,24,0,0,0,0,0,9,2006,0,1,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,1989,30,60.0,8520,1923,2006,0.0,0,0,968,968,968,0,0,968,0,0,5,1935.0,2,480,0,0,184,0,0,0,0,7,2007,84,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
4,625,20,69.0,9500,1963,1963,247.0,609,0,785,1394,1394,0,0,1394,1,0,6,1963.0,2,514,0,76,0,0,185,0,0,7,2009,46,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


In [84]:
df.shape

(878, 262)

### Saving CSV to be used for submission

In [85]:
df.to_csv('datasets/cleaned_test_feature.csv', index = False)