In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

In [30]:
# Set Max display rows and columns
pd.options.display.max_rows=None
pd.options.display.max_columns=None

In [31]:
# Import the data
housing_df = pd.read_csv("Resources/train.csv")
housing_df.head()
print("Input file has been read in")

Input file has been read in


In [32]:
# Get shape of housing dataframe
housing_df.shape

(1460, 81)

In [33]:
# Get dataframe field information
housing_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [34]:
# Get value counts for all housing_df columns
for col in housing_df.columns:
    print(f'-------------------- {col} START --------------------')
    print(housing_df[col].value_counts(dropna=False))
    print(f'-------------------- {col} END ----------------------')
    print('\n')

print("Value counts for all housing_df columns Complete")

-------------------- Id START --------------------
Id
1       1
982     1
980     1
979     1
978     1
977     1
976     1
975     1
974     1
973     1
972     1
971     1
970     1
969     1
968     1
967     1
966     1
965     1
964     1
963     1
962     1
981     1
983     1
960     1
984     1
1003    1
1002    1
1001    1
1000    1
999     1
998     1
997     1
996     1
995     1
994     1
993     1
992     1
991     1
990     1
989     1
988     1
987     1
986     1
985     1
961     1
959     1
914     1
936     1
934     1
933     1
932     1
931     1
930     1
929     1
928     1
927     1
926     1
925     1
924     1
923     1
922     1
921     1
920     1
919     1
918     1
917     1
916     1
935     1
937     1
958     1
938     1
957     1
956     1
955     1
954     1
953     1
952     1
951     1
950     1
949     1
948     1
947     1
946     1
945     1
944     1
943     1
942     1
941     1
940     1
939     1
1004    1
1005    1
1006    1
1073    1
1071  

BsmtExposure
No     953
Av     221
Gd     134
Mn     114
NaN     38
Name: count, dtype: int64
-------------------- BsmtExposure END ----------------------


-------------------- BsmtFinType1 START --------------------
BsmtFinType1
Unf    430
GLQ    418
ALQ    220
BLQ    148
Rec    133
LwQ     74
NaN     37
Name: count, dtype: int64
-------------------- BsmtFinType1 END ----------------------


-------------------- BsmtFinSF1 START --------------------
BsmtFinSF1
0       467
24       12
16        9
686       5
662       5
20        5
936       5
616       5
560       4
553       4
312       4
400       4
641       4
384       4
360       4
442       4
588       4
600       4
300       4
495       4
1200      4
697       4
655       4
547       4
504       4
428       4
1274      3
685       3
674       3
680       3
624       3
507       3
1300      3
767       3
695       3
625       3
660       3
450       3
120       3
210       3
570       3
375       3
250       3
700       3
816  

EnclosedPorch
0      1252
112      15
96        6
192       5
144       5
120       5
216       5
156       4
116       4
252       4
184       3
126       3
150       3
164       3
128       3
102       3
176       3
228       3
40        3
80        2
236       2
34        2
160       2
32        2
84        2
115       2
158       2
137       2
240       2
100       2
154       2
185       2
264       2
190       2
168       2
108       2
36        2
202       2
234       2
77        2
244       2
180       2
224       2
205       2
39        2
114       2
268       1
19        1
174       1
226       1
138       1
94        1
318       1
48        1
220       1
194       1
68        1
254       1
230       1
275       1
172       1
170       1
87        1
214       1
162       1
50        1
212       1
221       1
198       1
301       1
20        1
286       1
123       1
280       1
42        1
81        1
259       1
145       1
208       1
330       1
134       1
196       1
14

In [35]:
# List housing dataframe columns
housing_df.columns

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond', 'PavedDrive

In [36]:
# Create list of housing_df columns to delete
cols_to_delete = ['Id', 'Street', 'Alley', 'LandContour', 'Utilities', 'Condition2', 'RoofMatl',
                  'MasVnrType', 'MasVnrArea', 'BsmtFinType2', 'BsmtFinSF2', 'Heating', 'CentralAir',
                  'Electrical', 'LowQualFinSF', 'BsmtHalfBath', 'KitchenAbvGr', 'PavedDrive', '3SsnPorch',
                  'ScreenPorch', 'PoolArea', 'PoolQC', 'Fence', 'MiscFeature', 'MiscVal']

In [37]:
# Restore point: Copy housing_df dataframe before deleting columns
housing_df_revised = housing_df.copy()
print("housing_df -- Restore point created")

housing_df -- Restore point created


In [38]:
# Drop Non-value add columns from housing_df_revised dataframe
housing_df_revised.drop(cols_to_delete, axis=1, inplace=True)
print("Dropped Non-value add columns from housing_df_revised dataframe")

Dropped Non-value add columns from housing_df_revised dataframe


In [39]:
# Get shape of housing_df_revised dataframe
housing_df_revised.shape

(1460, 56)

In [40]:
# List revised housing dataframe columns
housing_df_revised.columns

Index(['MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'LotShape',
       'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'Exterior1st', 'Exterior2nd', 'ExterQual', 'ExterCond',
       'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',
       'BsmtFinSF1', 'BsmtUnfSF', 'TotalBsmtSF', 'HeatingQC', '1stFlrSF',
       '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 'FullBath', 'HalfBath',
       'BedroomAbvGr', 'KitchenQual', 'TotRmsAbvGrd', 'Functional',
       'Fireplaces', 'FireplaceQu', 'GarageType', 'GarageYrBlt',
       'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond',
       'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', 'MoSold', 'YrSold',
       'SaleType', 'SaleCondition', 'SalePrice'],
      dtype='object')

In [41]:
# Check to see if any revised housing dataframe columns are null
housing_df_revised.isnull().sum()

MSSubClass         0
MSZoning           0
LotFrontage      259
LotArea            0
LotShape           0
LotConfig          0
LandSlope          0
Neighborhood       0
Condition1         0
BldgType           0
HouseStyle         0
OverallQual        0
OverallCond        0
YearBuilt          0
YearRemodAdd       0
RoofStyle          0
Exterior1st        0
Exterior2nd        0
ExterQual          0
ExterCond          0
Foundation         0
BsmtQual          37
BsmtCond          37
BsmtExposure      38
BsmtFinType1      37
BsmtFinSF1         0
BsmtUnfSF          0
TotalBsmtSF        0
HeatingQC          0
1stFlrSF           0
2ndFlrSF           0
GrLivArea          0
BsmtFullBath       0
FullBath           0
HalfBath           0
BedroomAbvGr       0
KitchenQual        0
TotRmsAbvGrd       0
Functional         0
Fireplaces         0
FireplaceQu      690
GarageType        81
GarageYrBlt       81
GarageFinish      81
GarageCars         0
GarageArea         0
GarageQual        81
GarageCond   

In [42]:
# Print rows where 'LotFrontage' is null
print(housing_df_revised.loc[housing_df_revised['LotFrontage'].isnull()])

      MSSubClass MSZoning  LotFrontage  LotArea LotShape LotConfig LandSlope  \
7             60       RL          NaN    10382      IR1    Corner       Gtl   
12            20       RL          NaN    12968      IR2    Inside       Gtl   
14            20       RL          NaN    10920      IR1    Corner       Gtl   
16            20       RL          NaN    11241      IR1   CulDSac       Gtl   
24            20       RL          NaN     8246      IR1    Inside       Gtl   
31            20       RL          NaN     8544      IR1   CulDSac       Gtl   
42            85       RL          NaN     9180      IR1   CulDSac       Gtl   
43            20       RL          NaN     9200      IR1   CulDSac       Gtl   
50            60       RL          NaN    13869      IR2    Corner       Gtl   
64            60       RL          NaN     9375      Reg    Inside       Gtl   
66            20       RL          NaN    19900      Reg    Inside       Gtl   
76            20       RL          NaN  

In [43]:
# After analysis:
#  Homes with null LotFrontage - 259

# Set null 'LotFrontage' values to mean LotFrontage
housing_df_revised['LotFrontage'] = housing_df_revised['LotFrontage'].fillna(round(housing_df_revised['LotFrontage'].mean()))
housing_df_revised.head(10)


Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,LotShape,LotConfig,LandSlope,Neighborhood,Condition1,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,Exterior1st,Exterior2nd,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtUnfSF,TotalBsmtSF,HeatingQC,1stFlrSF,2ndFlrSF,GrLivArea,BsmtFullBath,FullBath,HalfBath,BedroomAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,WoodDeckSF,OpenPorchSF,EnclosedPorch,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,60,RL,65.0,8450,Reg,Inside,Gtl,CollgCr,Norm,1Fam,2Story,7,5,2003,2003,Gable,VinylSd,VinylSd,Gd,TA,PConc,Gd,TA,No,GLQ,706,150,856,Ex,856,854,1710,1,2,1,3,Gd,8,Typ,0,,Attchd,2003.0,RFn,2,548,TA,TA,0,61,0,2,2008,WD,Normal,208500
1,20,RL,80.0,9600,Reg,FR2,Gtl,Veenker,Feedr,1Fam,1Story,6,8,1976,1976,Gable,MetalSd,MetalSd,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,284,1262,Ex,1262,0,1262,0,2,0,3,TA,6,Typ,1,TA,Attchd,1976.0,RFn,2,460,TA,TA,298,0,0,5,2007,WD,Normal,181500
2,60,RL,68.0,11250,IR1,Inside,Gtl,CollgCr,Norm,1Fam,2Story,7,5,2001,2002,Gable,VinylSd,VinylSd,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,434,920,Ex,920,866,1786,1,2,1,3,Gd,6,Typ,1,TA,Attchd,2001.0,RFn,2,608,TA,TA,0,42,0,9,2008,WD,Normal,223500
3,70,RL,60.0,9550,IR1,Corner,Gtl,Crawfor,Norm,1Fam,2Story,7,5,1915,1970,Gable,Wd Sdng,Wd Shng,TA,TA,BrkTil,TA,Gd,No,ALQ,216,540,756,Gd,961,756,1717,1,1,0,3,Gd,7,Typ,1,Gd,Detchd,1998.0,Unf,3,642,TA,TA,0,35,272,2,2006,WD,Abnorml,140000
4,60,RL,84.0,14260,IR1,FR2,Gtl,NoRidge,Norm,1Fam,2Story,8,5,2000,2000,Gable,VinylSd,VinylSd,Gd,TA,PConc,Gd,TA,Av,GLQ,655,490,1145,Ex,1145,1053,2198,1,2,1,4,Gd,9,Typ,1,TA,Attchd,2000.0,RFn,3,836,TA,TA,192,84,0,12,2008,WD,Normal,250000
5,50,RL,85.0,14115,IR1,Inside,Gtl,Mitchel,Norm,1Fam,1.5Fin,5,5,1993,1995,Gable,VinylSd,VinylSd,TA,TA,Wood,Gd,TA,No,GLQ,732,64,796,Ex,796,566,1362,1,1,1,1,TA,5,Typ,0,,Attchd,1993.0,Unf,2,480,TA,TA,40,30,0,10,2009,WD,Normal,143000
6,20,RL,75.0,10084,Reg,Inside,Gtl,Somerst,Norm,1Fam,1Story,8,5,2004,2005,Gable,VinylSd,VinylSd,Gd,TA,PConc,Ex,TA,Av,GLQ,1369,317,1686,Ex,1694,0,1694,1,2,0,3,Gd,7,Typ,1,Gd,Attchd,2004.0,RFn,2,636,TA,TA,255,57,0,8,2007,WD,Normal,307000
7,60,RL,70.0,10382,IR1,Corner,Gtl,NWAmes,PosN,1Fam,2Story,7,6,1973,1973,Gable,HdBoard,HdBoard,TA,TA,CBlock,Gd,TA,Mn,ALQ,859,216,1107,Ex,1107,983,2090,1,2,1,3,TA,7,Typ,2,TA,Attchd,1973.0,RFn,2,484,TA,TA,235,204,228,11,2009,WD,Normal,200000
8,50,RM,51.0,6120,Reg,Inside,Gtl,OldTown,Artery,1Fam,1.5Fin,7,5,1931,1950,Gable,BrkFace,Wd Shng,TA,TA,BrkTil,TA,TA,No,Unf,0,952,952,Gd,1022,752,1774,0,2,0,2,TA,8,Min1,2,TA,Detchd,1931.0,Unf,2,468,Fa,TA,90,0,205,4,2008,WD,Abnorml,129900
9,190,RL,50.0,7420,Reg,Corner,Gtl,BrkSide,Artery,2fmCon,1.5Unf,5,6,1939,1950,Gable,MetalSd,MetalSd,TA,TA,BrkTil,TA,TA,No,GLQ,851,140,991,Ex,1077,0,1077,1,1,0,2,TA,5,Typ,2,TA,Attchd,1939.0,RFn,1,205,Gd,TA,0,4,0,1,2008,WD,Normal,118000


In [44]:
# Print rows where 'BsmtExposure' is null
print(housing_df_revised.loc[housing_df_revised['BsmtExposure'].isnull()])

      MSSubClass MSZoning  LotFrontage  LotArea LotShape LotConfig LandSlope  \
17            90       RL         72.0    10791      Reg    Inside       Gtl   
39            90       RL         65.0     6040      Reg    Inside       Gtl   
90            20       RL         60.0     7200      Reg    Inside       Gtl   
102           90       RL         64.0     7018      Reg    Inside       Gtl   
156           20       RL         60.0     7200      Reg    Inside       Gtl   
182           20       RL         60.0     9060      Reg    Inside       Gtl   
259           20       RM         70.0    12702      Reg    Inside       Gtl   
342           90       RL         70.0     8544      Reg    Inside       Gtl   
362           85       RL         64.0     7301      Reg    Corner       Gtl   
371           50       RL         80.0    17120      Reg    Inside       Gtl   
392           20       RL         70.0     8339      IR1    Inside       Gtl   
520          190       RL         60.0  

In [45]:
# After Analysis:
# There's one more Null value in 'BsmtExposure' 38 vs 37 null values that indicate the home has no basement.
# Investigate index 948 and determine what value to set 'BsmtExposure' to.
print(housing_df_revised.iloc[948,:])

MSSubClass            60
MSZoning              RL
LotFrontage         65.0
LotArea            14006
LotShape             IR1
LotConfig         Inside
LandSlope            Gtl
Neighborhood     CollgCr
Condition1          Norm
BldgType            1Fam
HouseStyle        2Story
OverallQual            7
OverallCond            5
YearBuilt           2002
YearRemodAdd        2002
RoofStyle          Gable
Exterior1st      VinylSd
Exterior2nd      VinylSd
ExterQual             Gd
ExterCond             TA
Foundation         PConc
BsmtQual              Gd
BsmtCond              TA
BsmtExposure         NaN
BsmtFinType1         Unf
BsmtFinSF1             0
BsmtUnfSF            936
TotalBsmtSF          936
HeatingQC             Ex
1stFlrSF             936
2ndFlrSF             840
GrLivArea           1776
BsmtFullBath           0
FullBath               2
HalfBath               1
BedroomAbvGr           3
KitchenQual           Gd
TotRmsAbvGrd           7
Functional           Typ
Fireplaces             1


In [46]:
# After Further Investigation, set null value in 'BsmtExposure' for index 948 to 'No' to indicate No Exposure.
# Most homes have this value.
housing_df_revised.at[948,'BsmtExposure'] = 'No'
print('Dataframe row 948 after BsmtExposure column updated to No')
print(housing_df_revised.iloc[948,:])

Dataframe row 948 after BsmtExposure column updated to No
MSSubClass            60
MSZoning              RL
LotFrontage         65.0
LotArea            14006
LotShape             IR1
LotConfig         Inside
LandSlope            Gtl
Neighborhood     CollgCr
Condition1          Norm
BldgType            1Fam
HouseStyle        2Story
OverallQual            7
OverallCond            5
YearBuilt           2002
YearRemodAdd        2002
RoofStyle          Gable
Exterior1st      VinylSd
Exterior2nd      VinylSd
ExterQual             Gd
ExterCond             TA
Foundation         PConc
BsmtQual              Gd
BsmtCond              TA
BsmtExposure          No
BsmtFinType1         Unf
BsmtFinSF1             0
BsmtUnfSF            936
TotalBsmtSF          936
HeatingQC             Ex
1stFlrSF             936
2ndFlrSF             840
GrLivArea           1776
BsmtFullBath           0
FullBath               2
HalfBath               1
BedroomAbvGr           3
KitchenQual           Gd
TotRmsAbvGrd     

In [47]:
# After analysis:
#  Homes with no Basement - 37
#  Homes with no Fireplace - 690
#  Homes with no Garage - 81

# Convert column null values to 'NA' to indicate that home has No Basement, No Fireplace, or No Garage

convert_null_values_cols = ['BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'FireplaceQu',
                            'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond']

# Convert column null values to 'NA' 
for col in convert_null_values_cols:
    housing_df_revised[col] = housing_df_revised[col].fillna('None')

print('Converted column null values to NA to indicate that home has No Basement, No Fireplace, or No Garage')

Converted column null values to NA to indicate that home has No Basement, No Fireplace, or No Garage


In [48]:
print(housing_df_revised.iloc[17,:])

MSSubClass            90
MSZoning              RL
LotFrontage         72.0
LotArea            10791
LotShape             Reg
LotConfig         Inside
LandSlope            Gtl
Neighborhood      Sawyer
Condition1          Norm
BldgType          Duplex
HouseStyle        1Story
OverallQual            4
OverallCond            5
YearBuilt           1967
YearRemodAdd        1967
RoofStyle          Gable
Exterior1st      MetalSd
Exterior2nd      MetalSd
ExterQual             TA
ExterCond             TA
Foundation          Slab
BsmtQual            None
BsmtCond            None
BsmtExposure        None
BsmtFinType1        None
BsmtFinSF1             0
BsmtUnfSF              0
TotalBsmtSF            0
HeatingQC             TA
1stFlrSF            1296
2ndFlrSF               0
GrLivArea           1296
BsmtFullBath           0
FullBath               2
HalfBath               0
BedroomAbvGr           2
KitchenQual           TA
TotRmsAbvGrd           6
Functional           Typ
Fireplaces             0


In [49]:
# Print rows where 'GarageYrBlt' is null
print(housing_df_revised.loc[housing_df_revised['GarageYrBlt'].isnull()])

      MSSubClass MSZoning  LotFrontage  LotArea LotShape LotConfig LandSlope  \
39            90       RL         65.0     6040      Reg    Inside       Gtl   
48           190       RM         33.0     4456      Reg    Inside       Gtl   
78            90       RL         72.0    10778      Reg    Inside       Gtl   
88            50  C (all)        105.0     8470      IR1    Corner       Gtl   
89            20       RL         60.0     8070      Reg    Inside       Gtl   
99            20       RL         77.0     9320      IR1    Inside       Gtl   
108           50       RM         85.0     8500      Reg    Corner       Gtl   
125          190       RM         60.0     6780      Reg    Inside       Gtl   
127           45       RM         55.0     4388      IR1    Inside       Gtl   
140           20       RL         70.0    10500      Reg       FR2       Gtl   
148           20       RL         63.0     7500      Reg    Inside       Gtl   
155           50       RL         60.0  

In [50]:
# For homes with No Garage, set null 'GarageYrBlt' values to the year the house was built, 'YearBuilt'
housing_df_revised['GarageYrBlt'] = housing_df_revised['GarageYrBlt'].fillna(housing_df_revised['YearBuilt'])
print('For homes with No Garage, set null GarageYrBlt values to the year the house was built, YearBuilt')

For homes with No Garage, set null GarageYrBlt values to the year the house was built, YearBuilt


In [51]:
# Display housing_df_revised after null value updates
housing_df_revised.head(10)

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,LotShape,LotConfig,LandSlope,Neighborhood,Condition1,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,Exterior1st,Exterior2nd,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtUnfSF,TotalBsmtSF,HeatingQC,1stFlrSF,2ndFlrSF,GrLivArea,BsmtFullBath,FullBath,HalfBath,BedroomAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,WoodDeckSF,OpenPorchSF,EnclosedPorch,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,60,RL,65.0,8450,Reg,Inside,Gtl,CollgCr,Norm,1Fam,2Story,7,5,2003,2003,Gable,VinylSd,VinylSd,Gd,TA,PConc,Gd,TA,No,GLQ,706,150,856,Ex,856,854,1710,1,2,1,3,Gd,8,Typ,0,,Attchd,2003.0,RFn,2,548,TA,TA,0,61,0,2,2008,WD,Normal,208500
1,20,RL,80.0,9600,Reg,FR2,Gtl,Veenker,Feedr,1Fam,1Story,6,8,1976,1976,Gable,MetalSd,MetalSd,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,284,1262,Ex,1262,0,1262,0,2,0,3,TA,6,Typ,1,TA,Attchd,1976.0,RFn,2,460,TA,TA,298,0,0,5,2007,WD,Normal,181500
2,60,RL,68.0,11250,IR1,Inside,Gtl,CollgCr,Norm,1Fam,2Story,7,5,2001,2002,Gable,VinylSd,VinylSd,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,434,920,Ex,920,866,1786,1,2,1,3,Gd,6,Typ,1,TA,Attchd,2001.0,RFn,2,608,TA,TA,0,42,0,9,2008,WD,Normal,223500
3,70,RL,60.0,9550,IR1,Corner,Gtl,Crawfor,Norm,1Fam,2Story,7,5,1915,1970,Gable,Wd Sdng,Wd Shng,TA,TA,BrkTil,TA,Gd,No,ALQ,216,540,756,Gd,961,756,1717,1,1,0,3,Gd,7,Typ,1,Gd,Detchd,1998.0,Unf,3,642,TA,TA,0,35,272,2,2006,WD,Abnorml,140000
4,60,RL,84.0,14260,IR1,FR2,Gtl,NoRidge,Norm,1Fam,2Story,8,5,2000,2000,Gable,VinylSd,VinylSd,Gd,TA,PConc,Gd,TA,Av,GLQ,655,490,1145,Ex,1145,1053,2198,1,2,1,4,Gd,9,Typ,1,TA,Attchd,2000.0,RFn,3,836,TA,TA,192,84,0,12,2008,WD,Normal,250000
5,50,RL,85.0,14115,IR1,Inside,Gtl,Mitchel,Norm,1Fam,1.5Fin,5,5,1993,1995,Gable,VinylSd,VinylSd,TA,TA,Wood,Gd,TA,No,GLQ,732,64,796,Ex,796,566,1362,1,1,1,1,TA,5,Typ,0,,Attchd,1993.0,Unf,2,480,TA,TA,40,30,0,10,2009,WD,Normal,143000
6,20,RL,75.0,10084,Reg,Inside,Gtl,Somerst,Norm,1Fam,1Story,8,5,2004,2005,Gable,VinylSd,VinylSd,Gd,TA,PConc,Ex,TA,Av,GLQ,1369,317,1686,Ex,1694,0,1694,1,2,0,3,Gd,7,Typ,1,Gd,Attchd,2004.0,RFn,2,636,TA,TA,255,57,0,8,2007,WD,Normal,307000
7,60,RL,70.0,10382,IR1,Corner,Gtl,NWAmes,PosN,1Fam,2Story,7,6,1973,1973,Gable,HdBoard,HdBoard,TA,TA,CBlock,Gd,TA,Mn,ALQ,859,216,1107,Ex,1107,983,2090,1,2,1,3,TA,7,Typ,2,TA,Attchd,1973.0,RFn,2,484,TA,TA,235,204,228,11,2009,WD,Normal,200000
8,50,RM,51.0,6120,Reg,Inside,Gtl,OldTown,Artery,1Fam,1.5Fin,7,5,1931,1950,Gable,BrkFace,Wd Shng,TA,TA,BrkTil,TA,TA,No,Unf,0,952,952,Gd,1022,752,1774,0,2,0,2,TA,8,Min1,2,TA,Detchd,1931.0,Unf,2,468,Fa,TA,90,0,205,4,2008,WD,Abnorml,129900
9,190,RL,50.0,7420,Reg,Corner,Gtl,BrkSide,Artery,2fmCon,1.5Unf,5,6,1939,1950,Gable,MetalSd,MetalSd,TA,TA,BrkTil,TA,TA,No,GLQ,851,140,991,Ex,1077,0,1077,1,1,0,2,TA,5,Typ,2,TA,Attchd,1939.0,RFn,1,205,Gd,TA,0,4,0,1,2008,WD,Normal,118000


In [52]:
# After null updates, check to see if any revised housing dataframe columns are null
housing_df_revised.isnull().sum()
   

MSSubClass       0
MSZoning         0
LotFrontage      0
LotArea          0
LotShape         0
LotConfig        0
LandSlope        0
Neighborhood     0
Condition1       0
BldgType         0
HouseStyle       0
OverallQual      0
OverallCond      0
YearBuilt        0
YearRemodAdd     0
RoofStyle        0
Exterior1st      0
Exterior2nd      0
ExterQual        0
ExterCond        0
Foundation       0
BsmtQual         0
BsmtCond         0
BsmtExposure     0
BsmtFinType1     0
BsmtFinSF1       0
BsmtUnfSF        0
TotalBsmtSF      0
HeatingQC        0
1stFlrSF         0
2ndFlrSF         0
GrLivArea        0
BsmtFullBath     0
FullBath         0
HalfBath         0
BedroomAbvGr     0
KitchenQual      0
TotRmsAbvGrd     0
Functional       0
Fireplaces       0
FireplaceQu      0
GarageType       0
GarageYrBlt      0
GarageFinish     0
GarageCars       0
GarageArea       0
GarageQual       0
GarageCond       0
WoodDeckSF       0
OpenPorchSF      0
EnclosedPorch    0
MoSold           0
YrSold      

In [53]:

# Save housing dataframe revised for analysis
housing_df_revised.to_csv('Resources/housing_df_clean.csv', index=False)
print("housing_df_clean.csv file saved")

housing_df_clean.csv file saved
