#### Import Statements

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker #For changing ticker location and formatting
import seaborn as sns

In [3]:
df = pd.read_csv('./datasets/test.csv')
df.head(5)

Unnamed: 0,Id,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,...,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type
0,2658,902301120,190,RM,69.0,9142,Pave,Grvl,Reg,Lvl,...,0,0,0,,,,0,4,2006,WD
1,2718,905108090,90,RL,,9662,Pave,,IR1,Lvl,...,0,0,0,,,,0,8,2006,WD
2,2414,528218130,60,RL,58.0,17104,Pave,,IR1,Lvl,...,0,0,0,,,,0,9,2006,New
3,1989,902207150,30,RM,60.0,8520,Pave,,Reg,Lvl,...,0,0,0,,,,0,7,2007,WD
4,625,535105100,20,RL,,9500,Pave,,IR1,Lvl,...,0,185,0,,,,0,7,2009,WD


#### Columns with Missing Data

In [5]:
df.isnull().sum().reset_index().sort_values(0, ascending = False)

missing_data = pd.DataFrame(df.isnull().sum().reset_index().sort_values(0, ascending = False))
missing_data.rename(columns = {'0' : 'Count'})
missing_data[missing_data[0] > 0]

Unnamed: 0,index,0
73,Pool QC,874
75,Misc Feature,837
7,Alley,820
74,Fence,706
26,Mas Vnr Type,535
58,Fireplace Qu,422
4,Lot Frontage,160
60,Garage Yr Blt,45
61,Garage Finish,45
64,Garage Qual,45


In [7]:
df.shape

(878, 80)

In [9]:
#Changing all the columns to snake case and lower case to make it easier to code
df.columns = [col.replace(' ', '_').lower() for col in df.columns]
df

Unnamed: 0,id,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,...,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type
0,2658,902301120,190,RM,69.0,9142,Pave,Grvl,Reg,Lvl,...,0,0,0,,,,0,4,2006,WD
1,2718,905108090,90,RL,,9662,Pave,,IR1,Lvl,...,0,0,0,,,,0,8,2006,WD
2,2414,528218130,60,RL,58.0,17104,Pave,,IR1,Lvl,...,0,0,0,,,,0,9,2006,New
3,1989,902207150,30,RM,60.0,8520,Pave,,Reg,Lvl,...,0,0,0,,,,0,7,2007,WD
4,625,535105100,20,RL,,9500,Pave,,IR1,Lvl,...,0,185,0,,,,0,7,2009,WD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
873,1662,527377110,60,RL,80.0,8000,Pave,,Reg,Lvl,...,0,0,0,,,,0,11,2007,WD
874,1234,535126140,60,RL,90.0,14670,Pave,,Reg,Lvl,...,0,0,0,,MnPrv,,0,8,2008,WD
875,1373,904100040,20,RL,55.0,8250,Pave,,Reg,Lvl,...,0,0,0,,,,0,8,2008,WD
876,1672,527425140,20,RL,60.0,9000,Pave,,Reg,Lvl,...,0,0,0,,GdWo,,0,5,2007,WD


In [10]:
df['year_built'].value_counts(dropna = False).reset_index().sort_values('year_built')

Unnamed: 0,year_built,count
83,1880,2
99,1882,1
105,1885,1
88,1890,2
97,1892,1
...,...,...
1,2006,39
2,2007,30
16,2008,13
30,2009,11


#### Creating a variable to calculate the age of the property both since built and remodeled

In [11]:
df['age_at_sale'] = df['yr_sold'] - df['year_built']
df[['yr_sold', 'year_built', 'age_at_sale']].sort_values('age_at_sale')

df[['year_built', 'year_remod/add', 'yr_sold']]
df['age_since_remod'] = df['yr_sold'] - df['year_remod/add']
df['age_since_remod'].value_counts().reset_index().sort_values('age_since_remod')

df[['age_at_sale', 'age_since_remod']]


Unnamed: 0,age_at_sale,age_since_remod
0,96,56
1,29,29
2,0,0
3,84,1
4,46,46
...,...,...
873,33,33
874,42,9
875,40,40
876,36,36


### Creating Finished Basement Square Feet (bsmnt_fin_sf) that takes the total basement square feet minus the unfinished basement square feet

In [14]:
df['bsmnt_fin_sf'] = df['total_bsmt_sf'] - df['bsmt_unf_sf']
df[['total_bsmt_sf', 'bsmt_unf_sf', 'bsmnt_fin_sf']]

Unnamed: 0,total_bsmt_sf,bsmt_unf_sf,bsmnt_fin_sf
0,1020,1020,0
1,1967,1967,0
2,654,100,554
3,968,968,0
4,1394,785,609
...,...,...,...
873,1084,0,1084
874,1104,529,575
875,952,210,742
876,864,248,616


In [19]:
# Creating variable whether a house is sold before or after the housing crisis
df['sold_in_crisis'] = np.where(df['yr_sold'] >= 2008, 1, 0)
df.groupby(by = ['yr_sold', 'sold_in_crisis']).size().reset_index()

Unnamed: 0,yr_sold,sold_in_crisis,0
0,2006,0,187
1,2007,0,195
2,2008,1,187
3,2009,1,202
4,2010,1,107


In [23]:
lot_shape_map = {'IR3' : 1,
                 'IR2' : 2,
                 'IR1' : 3,
                 'Reg' : 4 }

df['lot_shape'].replace(lot_shape_map, inplace = True)

df['lot_shape'].value_counts()

lot_shape
4    564
3    286
2     21
1      7
Name: count, dtype: int64

In [26]:
print(df['land_slope'].value_counts() )

land_slope_map = {'Gtl' : 3, 'Mod' : 2, 'Sev' : 1}
df['land_slope'].replace(land_slope_map, inplace = True)

df['land_slope'].value_counts()

land_slope
Gtl    835
Mod     37
Sev      6
Name: count, dtype: int64


land_slope
3    835
2     37
1      6
Name: count, dtype: int64

In [283]:
grouped_df = df.groupby(by = ['mas_vnr_type'], dropna = False) ['saleprice'].agg(['count', 'mean', 'median']).reset_index().sort_values('count', ascending = False)

grouped_df['count_percentage'] = round( ( (grouped_df['count'] / grouped_df['count'].sum() ) * 100), 2)

for col in ['mean', 'median']:
    col_mean = grouped_df[col].mean()
    col_std = grouped_df[col].std()
    grouped_df[f'{col}_zscore'] = (grouped_df[col] - col_mean) / col_std

grouped_df

Unnamed: 0,mas_vnr_type,count,mean,median,count_percentage,mean_zscore,median_zscore
3,,1240,155120.870161,143950.0,60.52,-0.700857,-0.709926
1,BrkFace,630,212161.436508,188700.0,30.75,0.323314,0.169717
2,Stone,166,264820.60241,248614.0,8.1,1.268817,1.347436
0,BrkCmn,13,144515.692308,139000.0,0.63,-0.891275,-0.807227


In [28]:
#Fill NA on Msn_vnr_type as Unknown
df['mas_vnr_type'].fillna("Unknown", inplace = True)

df['mas_vnr_type'].value_counts()

mas_vnr_type
Unknown    535
BrkFace    250
Stone       80
BrkCmn      12
CBlock       1
Name: count, dtype: int64


mas_vnr_type
Unknown    535
BrkFace    250
Stone       80
BrkCmn      12
CBlock       1
Name: count, dtype: int64

In [29]:
print ( df['mas_vnr_area'].value_counts(dropna = False) )

df['mas_vnr_area'].fillna(0, inplace = True)

df['mas_vnr_area'].value_counts(dropna = False) 

mas_vnr_area
0.0      532
216.0      7
80.0       5
420.0      5
196.0      5
        ... 
281.0      1
95.0       1
481.0      1
459.0      1
410.0      1
Name: count, Length: 233, dtype: int64


mas_vnr_area
0.0      533
216.0      7
80.0       5
420.0      5
196.0      5
        ... 
281.0      1
95.0       1
481.0      1
459.0      1
410.0      1
Name: count, Length: 232, dtype: int64

In [31]:
print( df['exter_qual'].value_counts(dropna = False) )

exter_qual_map = { 'Ex' : 5, 
           'Gd' : 4, 
           'TA' :3,
           'Fa' : 2,
           'Po' : 1}

df['exter_qual'] = df['exter_qual'].replace(exter_qual_map)

df['exter_qual'].value_counts(dropna = False)

exter_qual
TA    552
Gd    292
Ex     25
Fa      9
Name: count, dtype: int64


exter_qual
3    552
4    292
5     25
2      9
Name: count, dtype: int64

In [32]:
print( df['exter_cond'].value_counts(dropna = False) )

exter_cond_map = { 'Ex' : 5, 
           'Gd' : 4, 
           'TA' :3,
           'Fa' : 2,
           'Po' : 1}

df['exter_cond'] = df['exter_cond'].replace(exter_cond_map)

print( df['exter_cond'].value_counts(dropna = False) )

exter_cond
TA    770
Gd     84
Fa     18
Ex      5
Po      1
Name: count, dtype: int64
exter_cond
3    770
4     84
2     18
5      5
1      1
Name: count, dtype: int64


In [33]:
print( df['bsmt_qual'].value_counts(dropna = False) )

#### Going to change Basement Qual to ordinal value since NA / Unknown is No Basement
df['bsmt_qual'].fillna("Unknown", inplace = True)

bsmt_qual_map = {'Ex' : 5,
            'Gd' : 4,
            'TA' : 3,
            'Fa' : 2,
            'Po' : 1,
            'Unknown' : 0 }

df['bsmt_qual'] = df['bsmt_qual'].replace(bsmt_qual_map)

print( df['bsmt_qual'].value_counts(dropna = False) )

bsmt_qual
TA     396
Gd     355
Ex      73
Fa      28
NaN     25
Po       1
Name: count, dtype: int64
bsmt_qual
3    396
4    355
5     73
2     28
0     25
1      1
Name: count, dtype: int64


In [34]:
print( df['bsmt_cond'].value_counts(dropna = False) )

df['bsmt_cond'].fillna(0, inplace = True)

bsmt_cond_map = {'Ex' : 5,
            'Gd' : 4,
            'TA' : 3,
            'Fa' : 2,
            'Po' : 1,
            'Unknown' : 0 }

df['bsmt_cond'] = df['bsmt_cond'].replace(bsmt_cond_map)

print( df['bsmt_cond'].value_counts(dropna = False) )

bsmt_cond
TA     781
Fa      39
Gd      33
NaN     25
Name: count, dtype: int64
bsmt_cond
3    781
2     39
4     33
0     25
Name: count, dtype: int64


In [35]:
print( df['bsmt_exposure'].value_counts(dropna = False) )

df['bsmt_exposure'].fillna(0, inplace = True)

bsmt_exp_map = {'Gd' : 4,
                'Av' : 3,
                'Mn' : 2,
                'No' : 1,
}

df['bsmt_exposure'] = df['bsmt_exposure'].replace(bsmt_exp_map)

print( df['bsmt_exposure'].value_counts(dropna = False) )

bsmt_exposure
No     567
Av     130
Gd      80
Mn      76
NaN     25
Name: count, dtype: int64
bsmt_exposure
1    567
3    130
4     80
2     76
0     25
Name: count, dtype: int64


In [36]:
print( df['bsmtfin_type_1'].value_counts(dropna = False) )

#Replace Null as 0, and Change Basement Fin Type to Ordinal based on ranking in data dictionary
bsmt_fin_type_map = {'GLQ' : 6,
            'ALQ' : 5,
            'BLQ' : 4,
            'Rec' : 3,
            'LwQ' : 2,
            'Unf' : 1}

df['bsmtfin_type_1'].fillna(0, inplace = True)

df['bsmtfin_type_1'] = df['bsmtfin_type_1'].replace(bsmt_fin_type_map)

print( df['bsmtfin_type_1'].value_counts(dropna = False) )

bsmtfin_type_1
Unf    248
GLQ    243
ALQ    136
Rec    105
BLQ     69
LwQ     52
NaN     25
Name: count, dtype: int64
bsmtfin_type_1
1    248
6    243
5    136
3    105
4     69
2     52
0     25
Name: count, dtype: int64


In [37]:
print( df['bsmtfin_type_2'].value_counts(dropna = False) )

df['bsmtfin_type_2'].fillna(0, inplace = True)

df['bsmtfin_type_2'] = df['bsmtfin_type_2'].replace(bsmt_fin_type_map)

print( df['bsmtfin_type_2'].value_counts(dropna = False) )

bsmtfin_type_2
Unf    749
LwQ     29
Rec     26
NaN     25
BLQ     20
ALQ     18
GLQ     11
Name: count, dtype: int64
bsmtfin_type_2
1    749
2     29
3     26
0     25
4     20
5     18
6     11
Name: count, dtype: int64


In [40]:
df['bsmtfin_sf_1'].fillna(0, inplace = True)
df['bsmtfin_type_2'].fillna(0, inplace = True)
df['bsmtfin_sf_2'].fillna(0, inplace = True)
df['bsmt_unf_sf'].fillna(0, inplace = True)
df['total_bsmt_sf'].fillna(0, inplace = True)
df['bsmnt_fin_sf'].fillna(0, inplace = True)

In [41]:
print( df['heating_qc'].value_counts(dropna = False) )

heating_qc_map = {'Ex' : 5,
           'Gd' : 4,
           'TA' : 3,
           'Fa' : 2,
           'Po' : 1
}

df['heating_qc'].replace(heating_qc_map, inplace = True)

print( df['heating_qc'].value_counts(dropna = False) )

heating_qc
Ex    429
TA    267
Gd    157
Fa     25
Name: count, dtype: int64
heating_qc
5    429
3    267
4    157
2     25
Name: count, dtype: int64


In [42]:
print( df['central_air'].value_counts(dropna = False) )

#Map central air = even though over 90% has central air, will include in the model since it can be coded as a 1 or 0
air_map = {'Y' : 1, 'N' : 0 }

df['central_air'].replace(air_map, inplace = True)

print( df['central_air'].value_counts(dropna = False) )

central_air
Y    823
N     55
Name: count, dtype: int64
central_air
1    823
0     55
Name: count, dtype: int64


In [44]:
print( df['electrical'].value_counts(dropna = False) )

electrical_map = { 'SBrkr' : 5,
                  'FuseA' : 4,
                  'FuseF' : 3,
                  'FuseP' : 2,
                  'Mix' : 1
}

df['electrical'].replace(electrical_map, inplace = True)

df['electrical'].fillna(0, inplace = True)

print( df['electrical'].value_counts(dropna = False) )

electrical
5.0    813
4.0     48
3.0     15
2.0      1
NaN      1
Name: count, dtype: int64
electrical
5.0    813
4.0     48
3.0     15
2.0      1
0.0      1
Name: count, dtype: int64


In [45]:
print( df['bsmt_full_bath'].value_counts(dropna = False) )

df['bsmt_full_bath'].fillna(0, inplace = True)

print( df['bsmt_full_bath'].value_counts(dropna = False) )

bsmt_full_bath
0    507
1    356
2     15
Name: count, dtype: int64
bsmt_full_bath
0    507
1    356
2     15
Name: count, dtype: int64


In [46]:
print( df['bsmt_half_bath'].value_counts(dropna = False) )

df['bsmt_half_bath'].fillna(0, inplace = True)

print( df['bsmt_half_bath'].value_counts(dropna = False) )

bsmt_half_bath
0    829
1     49
Name: count, dtype: int64
bsmt_half_bath
0    829
1     49
Name: count, dtype: int64


In [47]:
print( df['kitchen_qual'].value_counts(dropna = False) )

kitchen_qual_map = {'Ex' : 5,
               'Gd' : 4,
               'TA' : 3,
               'Fa' : 2,
               'Po' : 1 }

df['kitchen_qual'].replace(kitchen_qual_map, inplace = True)

print( df['kitchen_qual'].value_counts(dropna = False) )

kitchen_qual
TA    447
Gd    354
Ex     53
Fa     23
Po      1
Name: count, dtype: int64
kitchen_qual
3    447
4    354
5     53
2     23
1      1
Name: count, dtype: int64


In [48]:
print( df['functional'].value_counts(dropna = False) )

functional_map = {'Sal' : 1,
            'Sev' : 2,
            'Maj2' : 3,
            'Maj1' : 4,
            'Mod' : 5,
            'Min2' : 6,
            'Min1' : 7,
            'Typ' : 8 }

df['functional'].replace(functional_map, inplace = True)

print( df['functional'].value_counts(dropna = False) )

functional
Typ     812
Min2     28
Min1     23
Maj1      7
Mod       6
Maj2      2
Name: count, dtype: int64
functional
8    812
6     28
7     23
4      7
5      6
3      2
Name: count, dtype: int64


In [49]:
print( df['fireplace_qu'].value_counts(dropna = False) )

df['fireplace_qu'].fillna(0, inplace = True)

fireplace_qu_map = {'Po' : 1,
                    'Fa' : 2,
                    'TA' : 3,
                    'Gd' : 4,
                    'Ex' : 5}

df['fireplace_qu'].replace(fireplace_qu_map, inplace = True)

print( df['fireplace_qu'].value_counts(dropna = False) )

fireplace_qu
NaN    422
Gd     220
TA     193
Fa      16
Po      15
Ex      12
Name: count, dtype: int64
fireplace_qu
0    422
4    220
3    193
2     16
1     15
5     12
Name: count, dtype: int64


In [51]:
print( df['garage_type'].value_counts(dropna = False) )

df['garage_type'].fillna('None', inplace = True)

print( df['garage_type'].value_counts(dropna = False) )

garage_type
Attchd     518
Detchd     246
BuiltIn     53
NaN         44
Basment      9
2Types       4
CarPort      4
Name: count, dtype: int64
garage_type
Attchd     518
Detchd     246
BuiltIn     53
None        44
Basment      9
2Types       4
CarPort      4
Name: count, dtype: int64


In [53]:
print( df['garage_finish'].value_counts(dropna = False) )

df['garage_finish'].fillna(0, inplace = True)

garage_finish_map = {'Unf' : 1,
                     'RFn' : 2,
                     'Fin' : 3}

df['garage_finish'].replace(garage_finish_map, inplace = True)

print( df['garage_finish'].value_counts(dropna = False) )


garage_finish
Unf    382
RFn    233
Fin    218
NaN     45
Name: count, dtype: int64
garage_finish
1    382
2    233
3    218
0     45
Name: count, dtype: int64


In [55]:
print( df['garage_cars'].value_counts(dropna = False) )

df['garage_cars'].fillna(0, inplace = True)

print( df['garage_cars'].value_counts(dropna = False) )

garage_cars
2    467
1    254
3    110
0     44
4      3
Name: count, dtype: int64
garage_cars
2    467
1    254
3    110
0     44
4      3
Name: count, dtype: int64


In [56]:
print( df['garage_area'].value_counts(dropna = False) )

df['garage_area'].fillna(0, inplace = True)

print( df['garage_area'].value_counts(dropna = False) )

garage_area
0       44
576     28
440     26
240     24
484     24
        ..
1200     1
753      1
843      1
1092     1
488      1
Name: count, Length: 357, dtype: int64
garage_area
0       44
576     28
440     26
240     24
484     24
        ..
1200     1
753      1
843      1
1092     1
488      1
Name: count, Length: 357, dtype: int64


In [57]:
print( df['garage_qual'].value_counts(dropna = False) )
print( df['garage_cond'].value_counts(dropna = False) )

df['garage_qual'].fillna(0, inplace = True)
df['garage_cond'].fillna(0, inplace = True)

garage_map = {'Po' : 1,
                   'Fa' : 2,
                   'TA' : 3,
                   'Gd' : 4,
                  'Ex' : 5 }

df['garage_qual'].replace(garage_map, inplace = True)
df['garage_cond'].replace(garage_map, inplace = True)

print( df['garage_qual'].value_counts(dropna = False) )
print( df['garage_cond'].value_counts(dropna = False) )

garage_qual
TA     782
NaN     45
Fa      42
Gd       6
Po       3
Name: count, dtype: int64
garage_cond
TA     796
NaN     45
Fa      27
Po       6
Gd       3
Ex       1
Name: count, dtype: int64
garage_qual
3    782
0     45
2     42
4      6
1      3
Name: count, dtype: int64
garage_cond
3    796
0     45
2     27
1      6
4      3
5      1
Name: count, dtype: int64


In [58]:
print( df['paved_drive'].value_counts(dropna = False) )

paved_drive_map = {'N' : 1,
                   'P' : 2,
                   'Y' : 3}

df['paved_drive'].replace(paved_drive_map, inplace = True)

print( df['paved_drive'].value_counts(dropna = False) )

paved_drive
Y    790
N     65
P     23
Name: count, dtype: int64
paved_drive
3    790
1     65
2     23
Name: count, dtype: int64


In [59]:
df['has_wood_deck'] = (df['wood_deck_sf'] > 0).astype(int)

In [61]:
df['has_pool'] = (df['pool_area'] > 0).astype(int)

print( df['has_pool'].value_counts(dropna = False) )

has_pool
0    874
1      4
Name: count, dtype: int64


In [62]:
print( df['fence'].value_counts(dropna = False) )

df['fence'].fillna(0, inplace = True)

fence_map = {'MnWw' : 1,
             'GdWo' : 2,
             'MnPrv' : 3,
             'GdPrv' : 4 }

df['fence'].replace(fence_map, inplace = True)

print( df['fence'].value_counts(dropna = False) )

fence
NaN      706
MnPrv    103
GdPrv     35
GdWo      32
MnWw       2
Name: count, dtype: int64
fence
0    706
3    103
4     35
2     32
1      2
Name: count, dtype: int64


In [63]:
df['mo_sold'] = df['mo_sold'].astype(str)

In [64]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 878 entries, 0 to 877
Data columns (total 86 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               878 non-null    int64  
 1   pid              878 non-null    int64  
 2   ms_subclass      878 non-null    int64  
 3   ms_zoning        878 non-null    object 
 4   lot_frontage     718 non-null    float64
 5   lot_area         878 non-null    int64  
 6   street           878 non-null    object 
 7   alley            58 non-null     object 
 8   lot_shape        878 non-null    int64  
 9   land_contour     878 non-null    object 
 10  utilities        878 non-null    object 
 11  lot_config       878 non-null    object 
 12  land_slope       878 non-null    int64  
 13  neighborhood     878 non-null    object 
 14  condition_1      878 non-null    object 
 15  condition_2      878 non-null    object 
 16  bldg_type        878 non-null    object 
 17  house_style     

#### Looking at remaining null values

In [65]:
df.columns[df.isnull().any()].tolist()

['lot_frontage', 'alley', 'garage_yr_blt', 'pool_qc', 'misc_feature']

In [66]:
df['lot_frontage'].fillna(0, inplace = True)

In [403]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2049 entries, 0 to 2050
Data columns (total 87 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               2049 non-null   int64  
 1   pid              2049 non-null   int64  
 2   ms_subclass      2049 non-null   int64  
 3   ms_zoning        2049 non-null   object 
 4   lot_frontage     2049 non-null   float64
 5   lot_area         2049 non-null   int64  
 6   street           2049 non-null   object 
 7   alley            140 non-null    object 
 8   lot_shape        2049 non-null   int64  
 9   land_contour     2049 non-null   object 
 10  utilities        2049 non-null   object 
 11  lot_config       2049 non-null   object 
 12  land_slope       2049 non-null   int64  
 13  neighborhood     2049 non-null   object 
 14  condition_1      2049 non-null   object 
 15  condition_2      2049 non-null   object 
 16  bldg_type        2049 non-null   object 
 17  house_style      20

In [68]:
df.to_csv('./datasets/clean_recoded_test_ames.csv', index = False)

In [69]:
#Not sure why at the end here, garage_type has no nulls but it does in the data set imported in Feature Selection
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 878 entries, 0 to 877
Data columns (total 86 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               878 non-null    int64  
 1   pid              878 non-null    int64  
 2   ms_subclass      878 non-null    int64  
 3   ms_zoning        878 non-null    object 
 4   lot_frontage     878 non-null    float64
 5   lot_area         878 non-null    int64  
 6   street           878 non-null    object 
 7   alley            58 non-null     object 
 8   lot_shape        878 non-null    int64  
 9   land_contour     878 non-null    object 
 10  utilities        878 non-null    object 
 11  lot_config       878 non-null    object 
 12  land_slope       878 non-null    int64  
 13  neighborhood     878 non-null    object 
 14  condition_1      878 non-null    object 
 15  condition_2      878 non-null    object 
 16  bldg_type        878 non-null    object 
 17  house_style     