Initial attempt at categorizing variables

In [66]:
# Quantitative
# quanflt = housing[[‘LotFrontage’,‘MasVnrArea’, ‘BsmtFinSF1’, ‘BsmtFinSF2’, ‘BsmtUnfSF’, 
#                    ‘TotalBsmtSF’, ‘GarageYrBlt’, ‘GarageArea’ ]]

# Ordinal Categorical
# Ordflt = housing[[‘BsmtFullBath’, ‘BsmtHalfBath’, ‘GarageCars’]]

# ------------------------------
# ‘’'Quantitative’’'
# quanint = housing[[‘GrLivArea’,‘SalePrice’,‘LotArea’, ‘YearBuilt’, ‘YearRemodAdd’,‘1stFlrSF’,‘2ndFlrSF’,
#                 ‘LowQualFinSF’, ‘WoodDeckSF’, ‘OpenPorchSF’,‘EnclosedPorch’, ‘3SsnPorch’, ‘ScreenPorch’,
#                 ‘PoolArea’, ‘MiscVal’, ‘MoSold’ , ‘YrSold’]]
# ‘’'Ordinal Categorical’’'
# Ordint = housing[[‘MSSubClass’, ‘OverallQual’,‘OverallCond’, ‘FullBath’, ‘HalfBath’, ‘BedroomAbvGr’, ‘KitchenAbvGr’, ‘TotRmsAbvGrd’, ‘Fireplaces’ ]]
# ‘’'Nominal Categorical’’'
# ----------------------------

Import necessary packages

In [67]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#increase pandas default max_rows 
pd.options.display.max_rows = 250
pd.options.display.max_columns = 250

Read the cleaned CSV files

In [68]:
# DO THIS EVERY TIME YOU ARE LOADING THE CSV FILE INTO THE IDE OR NOTEBOOK

ames = pd.read_csv('Data/Clean_Ames_Housing_Price.csv', index_col=0)

# Apply mask/filter to examine only null values.
lot_nulls = ames[ames['LotFrontage'].isna()] #lots with no linear feet of street connected to property

# Group lots by configuration
grouped_lots = ames.groupby(['LotConfig']).agg({
   'LotFrontage' : 'mean', 
})

# Replace np.nan with average for lot_config of that type
ames.loc[(ames['LotFrontage'].isna()) & (ames['LotConfig'] == 'Corner'), 'LotFrontage'] = 81.468023
ames.loc[(ames['LotFrontage'].isna()) & (ames['LotConfig'] == 'CulDSac'), 'LotFrontage'] = 56.456790        
ames.loc[(ames['LotFrontage'].isna()) & (ames['LotConfig'] == 'FR2'), 'LotFrontage'] = 59.413793        
ames.loc[(ames['LotFrontage'].isna()) & (ames['LotConfig'] == 'FR3'), 'LotFrontage'] = 79.3       
ames.loc[(ames['LotFrontage'].isna()) & (ames['LotConfig'] == 'Inside'), 'LotFrontage'] = 66.633846

# Clean NaN values with proper coding
ames['Alley'] = ames['Alley'].replace(np.nan, 'NA')
ames['BsmtQual'] = ames['BsmtQual'].replace(np.nan, 'NA')
ames['BsmtCond'] = ames['BsmtCond'].replace(np.nan, 'NA')
ames['BsmtExposure'] = ames['BsmtExposure'].replace(np.nan, 'NA')
ames['BsmtFinType1'] = ames['BsmtFinType1'].replace(np.nan, 'NA')
ames['BsmtFinSF1'] = ames['BsmtFinSF1'].replace(np.nan, 0)
ames['BsmtFinType2'] = ames['BsmtFinType2'].replace(np.nan, 'NA')
ames['BsmtFinSF2'] = ames['BsmtFinSF2'].replace(np.nan, 0)
ames['BsmtUnfSF'] = ames['BsmtUnfSF'].replace(np.nan, 0)
ames['TotalBsmtSF'] = ames['TotalBsmtSF'].replace(np.nan, 'NA')
ames['BsmtFullBath'] = ames['BsmtFullBath'].replace(np.nan, 0)
ames['BsmtHalfBath'] = ames['BsmtHalfBath'].replace(np.nan, 0)
ames['Electrical'] = ames['Electrical'].replace(np.nan, 'SBrkr')
ames['FireplaceQu'] = ames['FireplaceQu'].replace(np.nan, 'NA')
ames['GarageType'] = ames['GarageType'].replace(np.nan, 'NA')
ames['GarageYrBlt'] = ames['GarageYrBlt'].replace(np.nan, 'NA')
ames['GarageFinish'] = ames['GarageFinish'].replace(np.nan, 'NA')
ames['GarageCars'] = ames['GarageCars'].replace(np.nan, 0)
ames['GarageArea'] = ames['GarageArea'].replace(np.nan, 0)
ames['GarageQual'] = ames['GarageQual'].replace(np.nan, 'NA')
ames['GarageCond'] = ames['GarageCond'].replace(np.nan, 'NA')
ames['PoolQC'] = ames['PoolQC'].replace(np.nan, 'NA')
ames['Fence'] = ames['Fence'].replace(np.nan, 'NA')
ames['MiscFeature'] = ames['MiscFeature'].replace(np.nan, 'NA')

# Inspect the property records to see front façades
# https://beacon.schneidercorp.com/Application.aspx?AppID=165&LayerID=2145&PageTypeID=2&PageID=1104
ames['MasVnrType'].loc[176] = "Stone"
ames['MasVnrType'].loc[359] = "None"
ames['MasVnrType'].loc[754] = "None"
ames['MasVnrType'].loc[788] = "None"
ames['MasVnrType'].loc[872] = "None"
ames['MasVnrType'].loc[892] = "None"
ames['MasVnrType'].loc[952] = "Stone"
ames['MasVnrType'].loc[1042] = "None"
ames['MasVnrType'].loc[1162] = "None"
ames['MasVnrType'].loc[1356] = "None"
ames['MasVnrType'].loc[1554] = "None"
ames['MasVnrType'].loc[2034] = "None"
ames['MasVnrType'].loc[2478] = "None"
ames['MasVnrType'].loc[2537] = "None"

# For the properties with partial height stone façade, assume 4 ft height,
# and for full height, assume 8 ft height
# Assume façade length is the 1.618 length of a house built in the footprint of a golden ration
# sqrt(1.618 * 1stFlrSF)
ames['MasVnrArea'].loc[176] = 4 * (1.618 * ames.loc[176]['1stFlrSF']) ** 0.5
ames['MasVnrArea'].loc[359] = 0
ames['MasVnrArea'].loc[754] = 0
ames['MasVnrArea'].loc[788] = 0
ames['MasVnrArea'].loc[872] = 0
ames['MasVnrArea'].loc[892] = 0
ames['MasVnrArea'].loc[952] = 8 * (1.618 * ames.loc[176]['1stFlrSF']) ** 0.5
ames['MasVnrArea'].loc[1042] = 0
ames['MasVnrArea'].loc[1162] = 0
ames['MasVnrArea'].loc[1356] = 0
ames['MasVnrArea'].loc[1554] = 0
ames['MasVnrArea'].loc[2034] = 0
ames['MasVnrArea'].loc[2478] = 0
ames['MasVnrArea'].loc[2537] = 0

# ames.to_csv('Data/Clean_Ames_HousePrice.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [69]:
# Verify that no null values are present
ames.isnull().sum()[0:90]

PID              0
GrLivArea        0
SalePrice        0
MSSubClass       0
MSZoning         0
LotFrontage      0
LotArea          0
Street           0
Alley            0
LotShape         0
LandContour      0
Utilities        0
LotConfig        0
LandSlope        0
Neighborhood     0
Condition1       0
Condition2       0
BldgType         0
HouseStyle       0
OverallQual      0
OverallCond      0
YearBuilt        0
YearRemodAdd     0
RoofStyle        0
RoofMatl         0
Exterior1st      0
Exterior2nd      0
MasVnrType       0
MasVnrArea       0
ExterQual        0
ExterCond        0
Foundation       0
BsmtQual         0
BsmtCond         0
BsmtExposure     0
BsmtFinType1     0
BsmtFinSF1       0
BsmtFinType2     0
BsmtFinSF2       0
BsmtUnfSF        0
TotalBsmtSF      0
Heating          0
HeatingQC        0
CentralAir       0
Electrical       0
1stFlrSF         0
2ndFlrSF         0
LowQualFinSF     0
BsmtFullBath     0
BsmtHalfBath     0
FullBath         0
HalfBath         0
BedroomAbvGr

In [70]:
# Ordinal Categorical
# Change to a series of numbers
# DO NOT USE ZERO
Ordstr = ['ExterQual','ExterCond','BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1', \
              'BsmtFinType2','HeatingQC','KitchenQual','FireplaceQu','GarageQual', \
              'GarageCond','PoolQC','Fence','MasVnrType','LotShape','Foundation','GarageFinish','PoolArea']

In [71]:
# Decide how to map string values to numerical values for ordinal variables
ord2dummies01 = {'Ex':4, 'Gd':3, 'TA':2, 'Fa':1, 'Po':0}
ord_vars01 = ['ExterQual','ExterCond','HeatingQC','KitchenQual']

ord2dummies02 = {'Ex':5, 'Gd':4, 'TA':3, 'Fa':2, 'Po':1, "NA":0}
ord_vars02 = ['BsmtQual','BsmtCond','FireplaceQu','GarageQual','GarageCond']

ord2dummies03 = {'Gd':4, 'Av':3, 'Mn':2, 'No':1, 'NA':0}
ord_vars03 = ['BsmtExposure']

ord2dummies04 = {'GLQ':6, 'ALQ':5, 'BLQ':4, 'Rec':3, 'LwQ':2, 'Unf':1, 'NA':0}
ord_vars04 = ['BsmtFinType1','BsmtFinType2']

ord2dummies05 = {'Ex':4, 'Gd':3, 'TA':2, 'Fa':1, 'NA':0}
ord_vars05 = ['PoolQC']

ord2dummies06 = {'GdPrv':4, 'MnPrv':3, 'GdWo':2, 'MnWw':1, 'NA':0}
ord_vars06 = ['Fence']

ord2dummies07 = {'Stone':4,'BrkCmn':3,'BrkFace':2,'CBlock':1,'None':0}
ord_vars07 = ['MasVnrType']

ord2dummies08 = {'Reg':3,'IR1':2,'IR2':1,'IR3':0}
ord_vars08 = ['LotShape']

ord2dummies09 = {'PConc':5,'Slab':4,'CBlock':3,'BrkTil':2,'Stone':1,'Wood':0}
ord_vars09 = ['Foundation']

ord2dummies10 = {'Fin':3,'RFn':2,'Unf':1,'NA':0}
ord_vars10 = ['GarageFinish']

In [73]:
# Map the values in the dataframe
for i in ord_vars01:
    ames[i].replace(ord2dummies01, inplace=True)

for i in ord_vars02:
    ames[i].replace(ord2dummies02, inplace=True)

for i in ord_vars03:
    ames[i].replace(ord2dummies03, inplace=True)

for i in ord_vars04:
    ames[i].replace(ord2dummies04, inplace=True)

for i in ord_vars05:
    ames[i].replace(ord2dummies05, inplace=True)

for i in ord_vars06:
    ames[i].replace(ord2dummies06, inplace=True)
    
for i in ord_vars07:
    ames[i].replace(ord2dummies07, inplace=True)

for i in ord_vars08:
    ames[i].replace(ord2dummies08, inplace=True)

for i in ord_vars09:
    ames[i].replace(ord2dummies09, inplace=True)
    
for i in ord_vars10:
    ames[i].replace(ord2dummies10, inplace=True)
    
# Assign a binary value for whether or not a house has a pool
ames.loc[ames['PoolArea'] > 0] = 1

In [74]:
ames[Ordstr]

Unnamed: 0,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,HeatingQC,KitchenQual,FireplaceQu,GarageQual,GarageCond,PoolQC,Fence,MasVnrType,LotShape,Foundation,GarageFinish,PoolArea
1,2,2,3,3,1,3,1,2,2,4,3,3,0,0,0,3,3,1,0
2,3,2,4,3,2,6,5,2,3,0,3,3,0,0,2,3,3,3,0
3,3,2,3,3,1,5,1,4,3,0,3,1,0,0,0,3,2,1,0
4,3,3,2,3,1,1,1,3,2,0,3,3,0,0,0,3,2,1,0
5,3,2,4,3,1,6,1,4,3,0,3,3,0,0,0,3,5,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2576,2,2,3,3,1,1,1,1,1,4,2,1,0,0,0,3,2,1,0
2577,2,2,0,0,0,0,0,4,2,4,3,3,0,0,0,2,4,1,0
2578,2,2,3,3,1,4,1,2,2,0,3,3,0,0,0,3,3,1,0
2579,3,2,4,3,1,6,1,4,3,3,3,3,0,0,2,3,5,3,0


In [57]:
ames.columns[ames.isin(['NA']).any()]

Index(['Alley', 'TotalBsmtSF', 'GarageType', 'GarageYrBlt', 'MiscFeature'], dtype='object')

In [58]:
# Nominal Categorical
Nomstr = ['MSZoning','Street','Alley','LandContour','Utilities', \
               'LotConfig','LandSlope','Neighborhood','Condition1','Condition2', \
               'BldgType','HouseStyle','RoofStyle','RoofMatl','Exterior1st', \
               'Exterior2nd','Heating', \
               'CentralAir','Electrical','Functional','GarageType', \
               'PavedDrive','MiscFeature','SaleType','SaleCondition']

In [62]:
# THIS IS THE LOOPED CODE THAT IS USED TO DUMMIFY VARIABLES
for i in Nomstr:   
    dummy = pd.get_dummies(ames[i], prefix=i, prefix_sep='__')
    dummy = dummy.drop(i+'__'+str(ames[i].mode()[0]), axis=1)
    ames = pd.concat([ames.drop(i, axis=1), dummy], axis=1)

In [63]:
ames.head()

Unnamed: 0,PID,GrLivArea,SalePrice,MSSubClass,LotFrontage,LotArea,LotShape,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Fireplaces,FireplaceQu,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscVal,MoSold,YrSold,MSZoning__A (agr),MSZoning__C (all),MSZoning__FV,MSZoning__I (all),MSZoning__RH,MSZoning__RM,Street__Grvl,Alley__Grvl,Alley__Pave,LandContour__Bnk,LandContour__HLS,LandContour__Low,Utilities__NoSewr,LotConfig__Corner,LotConfig__CulDSac,LotConfig__FR2,LotConfig__FR3,LandSlope__Mod,LandSlope__Sev,Neighborhood__Blmngtn,Neighborhood__Blueste,Neighborhood__BrDale,Neighborhood__BrkSide,Neighborhood__ClearCr,Neighborhood__CollgCr,Neighborhood__Crawfor,Neighborhood__Edwards,Neighborhood__Gilbert,Neighborhood__Greens,Neighborhood__GrnHill,Neighborhood__IDOTRR,Neighborhood__Landmrk,Neighborhood__MeadowV,Neighborhood__Mitchel,Neighborhood__NPkVill,Neighborhood__NWAmes,Neighborhood__NoRidge,Neighborhood__NridgHt,Neighborhood__OldTown,Neighborhood__SWISU,Neighborhood__Sawyer,Neighborhood__SawyerW,Neighborhood__Somerst,Neighborhood__StoneBr,Neighborhood__Timber,Neighborhood__Veenker,Condition1__Artery,Condition1__Feedr,Condition1__PosA,Condition1__PosN,Condition1__RRAe,Condition1__RRAn,Condition1__RRNe,Condition1__RRNn,Condition2__Artery,Condition2__Feedr,Condition2__PosA,Condition2__PosN,Condition2__RRAe,Condition2__RRAn,Condition2__RRNn,BldgType__2fmCon,BldgType__Duplex,BldgType__Twnhs,BldgType__TwnhsE,HouseStyle__1.5Fin,HouseStyle__1.5Unf,HouseStyle__2.5Fin,HouseStyle__2.5Unf,HouseStyle__2Story,HouseStyle__SFoyer,HouseStyle__SLvl,RoofStyle__Flat,RoofStyle__Gambrel,RoofStyle__Hip,RoofStyle__Mansard,RoofStyle__Shed,RoofMatl__Membran,RoofMatl__Metal,RoofMatl__Roll,RoofMatl__Tar&Grv,RoofMatl__WdShake,RoofMatl__WdShngl,Exterior1st__AsbShng,Exterior1st__AsphShn,Exterior1st__BrkComm,Exterior1st__BrkFace,Exterior1st__CBlock,Exterior1st__CemntBd,Exterior1st__HdBoard,Exterior1st__ImStucc,Exterior1st__MetalSd,Exterior1st__Plywood,Exterior1st__PreCast,Exterior1st__Stucco,Exterior1st__Wd Sdng,Exterior1st__WdShing,Exterior2nd__AsbShng,Exterior2nd__AsphShn,Exterior2nd__Brk Cmn,Exterior2nd__BrkFace,Exterior2nd__CBlock,Exterior2nd__CmentBd,Exterior2nd__HdBoard,Exterior2nd__ImStucc,Exterior2nd__MetalSd,Exterior2nd__Plywood,Exterior2nd__PreCast,Exterior2nd__Stone,Exterior2nd__Stucco,Exterior2nd__Wd Sdng,Exterior2nd__Wd Shng,Heating__Floor,Heating__GasW,Heating__Grav,Heating__OthW,Heating__Wall,HeatingQC__0,HeatingQC__1,HeatingQC__2,HeatingQC__3,CentralAir__N,Electrical__FuseA,Electrical__FuseF,Electrical__FuseP,Functional__Maj1,Functional__Maj2,Functional__Min1,Functional__Min2,Functional__Mod,Functional__Sal,GarageType__2Types,GarageType__Basment,GarageType__BuiltIn,GarageType__CarPort,GarageType__Detchd,GarageType__NA,PavedDrive__N,PavedDrive__P,MiscFeature__Gar2,MiscFeature__Othr,MiscFeature__Shed,MiscFeature__TenC,SaleType__COD,SaleType__CWD,SaleType__Con,SaleType__ConLD,SaleType__ConLI,SaleType__ConLw,SaleType__New,SaleType__Oth,SaleType__VWD,SaleCondition__Abnorml,SaleCondition__AdjLand,SaleCondition__Alloca,SaleCondition__Family,SaleCondition__Partial
1,909176150,856,126000,30,81.468023,7890,3,6,6,1939,1950,0,0.0,2,2,3,3,3,1,3,238.0,1,0.0,618.0,856.0,856,0,0,1.0,0.0,1,0,2,1,2,4,1,4,1939.0,1,2.0,399.0,3,3,0,0,0,0,166,0,0,0,0,3,2010,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,905476230,1049,139500,120,42.0,4235,3,5,5,1984,1984,2,149.0,3,2,3,4,3,2,6,552.0,5,393.0,104.0,1049.0,1049,0,0,1.0,0.0,2,0,2,1,3,5,0,0,1984.0,3,1.0,266.0,3,3,0,105,0,0,0,0,0,0,0,2,2009,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,911128020,1001,124900,30,60.0,6060,3,5,9,1930,2007,0,0.0,3,2,2,3,3,1,5,737.0,1,0.0,100.0,837.0,1001,0,0,0.0,0.0,1,0,2,1,3,5,0,0,1930.0,1,1.0,216.0,3,1,154,0,42,86,0,0,0,0,0,11,2007,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,535377150,1039,114000,70,80.0,8146,3,4,8,1900,2003,0,0.0,3,3,2,2,3,1,1,0.0,1,0.0,405.0,405.0,717,322,0,0.0,0.0,1,0,2,1,2,6,0,0,1940.0,1,1.0,281.0,3,3,0,0,168,0,111,0,0,0,0,5,2009,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,534177230,1665,227000,60,70.0,8400,3,8,6,2001,2001,0,0.0,3,2,5,4,3,1,6,643.0,1,0.0,167.0,810.0,810,855,0,1.0,0.0,2,1,3,1,3,6,0,0,2001.0,3,2.0,528.0,3,3,0,45,0,0,0,0,0,0,0,11,2009,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
# Quantitative:
# LotArea, GrLivArea, YearBuilt,BedroomAbvGr,

# Nominal Categorical:
    
    #BldgType :1Fam,2FmCon,TwnhsE,Twnhs,Duplex
    #OverallQual
    #OverallCond
    #LandSlope: Gtl,Mod,Sev
    
# Condition:
    #PavedDrive,MSZoning
    
multi_linear_var=data_House_price[['SalePrice','LotArea', 'GrLivArea', 'BedroomAbvGr','LandSlope','YearBuilt','MSZoning','BldgType','OverallQual','OverallCond','PavedDrive']]    

multi_linear_var

enc = OrdinalEncoder()
enc.fit(multi_linear_var[["OverallQual","BldgType","OverallCond","LandSlope"]])
multi_linear_var[["OverallQual","BldgType","OverallCond","LandSlope"]] = enc.transform(multi_linear_var[["OverallQual","BldgType","OverallCond","LandSlope"]])

# onehot_encoder = OneHotEncoder(sparse=False, drop="first")
# print(onehot_encoder.fit_transform(multi_linear_var[["PavedDrive"]]))
multi_linear_var = pd.get_dummies(multi_linear_var, columns = ['PavedDrive'],drop_first = True)
multi_linear_var = pd.get_dummies(multi_linear_var, columns = ['MSZoning'],drop_first = True)


#x2=multi_linear_var[['LotArea', 'GrLivArea', 'BedroomAbvGr','LandSlope','YearBuilt','MSZoning','BldgType','OverallQual','OverallCond','PavedDrive']]
x2=multi_linear_var.iloc[: , 1:]
y2=multi_linear_var['SalePrice']

#print(multi_linear_var.iloc[: , 1:])

ols2 = linear_model.LinearRegression()
lm_test2=ols2.fit(x2, y2)

print(lm_test2.score(x2, y2))


################### Summary

features2 = sm.add_constant(x2)
ols_sm2   = OLS(y2,features2)
lm_test2_1    = ols_sm2.fit()

print(lm_test2_1.summary())

# print(data_House_price['LandSlope'].value_counts())
# np.sum(data_House_price['OverallCond'].isnull())