In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model, metrics
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [2]:
#load the dataset
df = pd.read_csv("train.csv")
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [3]:
#Find out columns which are having null values
df.isna().any()[lambda x: x]

LotFrontage     True
Alley           True
MasVnrType      True
MasVnrArea      True
BsmtQual        True
BsmtCond        True
BsmtExposure    True
BsmtFinType1    True
BsmtFinType2    True
Electrical      True
FireplaceQu     True
GarageType      True
GarageYrBlt     True
GarageFinish    True
GarageQual      True
GarageCond      True
PoolQC          True
Fence           True
MiscFeature     True
dtype: bool

In [4]:
#impute null values with meaningfully applicable data
df['LotFrontage'] = df['LotFrontage'].apply(lambda x:float(str(x).replace('nan','0.0')))
df['MasVnrArea'] = df['MasVnrArea'].apply(lambda x:float(str(x).replace('nan','0.0')))
avgYear = int(df['GarageYrBlt'].sum()/(len(df.index)-df['GarageYrBlt'].isnull().sum()))
df['GarageYrBlt'] = df['GarageYrBlt'].apply(lambda x:float(str(x).replace('nan','0.0')))
df['GarageYrBlt'] = df['GarageYrBlt'].astype(int)

In [5]:
df['Electrical'] = df['Electrical'].apply(lambda x:str(x).replace('na','None'))
df['Alley'] = df['Alley'].apply(lambda x:str(x).replace('na','None'))
df['MasVnrType'] = df['MasVnrType'].apply(lambda x:str(x).replace('na','None'))
df['BsmtQual'] = df['BsmtQual'].apply(lambda x:str(x).replace('na','None'))
df['BsmtCond'] = df['BsmtCond'].apply(lambda x:str(x).replace('na','None'))
df['BsmtExposure'] = df['BsmtExposure'].apply(lambda x:str(x).replace('na','None'))
df['BsmtFinType1'] = df['BsmtFinType1'].apply(lambda x:str(x).replace('na','None'))
df['BsmtFinType2'] = df['BsmtFinType2'].apply(lambda x:str(x).replace('na','None'))
df['FireplaceQu'] = df['FireplaceQu'].apply(lambda x:str(x).replace('na','None'))
df['GarageType'] = df['GarageType'].apply(lambda x:str(x).replace('na','None'))
df['GarageFinish'] = df['GarageFinish'].apply(lambda x:str(x).replace('na','None'))
df['GarageQual'] = df['GarageQual'].apply(lambda x:str(x).replace('na','None'))
df['GarageCond'] = df['GarageCond'].apply(lambda x:str(x).replace('na','None'))
df['PoolQC'] = df['PoolQC'].apply(lambda x:str(x).replace('na','None'))
df['Fence'] = df['Fence'].apply(lambda x:str(x).replace('na','None'))
df['MiscFeature'] = df['MiscFeature'].apply(lambda x:str(x).replace('na','None'))

In [6]:
df['Alley'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 1460 entries, 0 to 1459
Series name: Alley
Non-Null Count  Dtype 
--------------  ----- 
1460 non-null   object
dtypes: object(1)
memory usage: 11.5+ KB


In [7]:
df.isna().any()[lambda x: x]

Series([], dtype: bool)

In [8]:
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,Nonen,Reg,Lvl,AllPub,...,0,Nonen,Nonen,Nonen,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,Nonen,Reg,Lvl,AllPub,...,0,Nonen,Nonen,Nonen,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,Nonen,IR1,Lvl,AllPub,...,0,Nonen,Nonen,Nonen,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,Nonen,IR1,Lvl,AllPub,...,0,Nonen,Nonen,Nonen,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,Nonen,IR1,Lvl,AllPub,...,0,Nonen,Nonen,Nonen,0,12,2008,WD,Normal,250000


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1460 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          1460 non-null   object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [10]:
df=df.drop('Id',axis=1)

#Add the dummies
dummy_columns = ['MSZoning','Street','LotShape','LandContour','Utilities','LotConfig','LandSlope','Neighborhood','Condition1','Condition2',
               'BldgType','HouseStyle','RoofStyle','RoofMatl','Exterior1st','Exterior2nd','MasVnrType','ExterQual','ExterCond','Foundation',
               'BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinType2','Heating','HeatingQC','CentralAir','Electrical','KitchenQual',
               'Functional','FireplaceQu','GarageType','GarageFinish','GarageQual','GarageCond','PavedDrive','PoolQC','Fence','MiscFeature',
               'SaleType','SaleCondition','Alley']

dummy = pd.get_dummies(df[dummy_columns], drop_first=True)
df = pd.concat([df,dummy],axis=1)
df = df.drop(dummy_columns,axis=1)
df.head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,Alley_Nonen,Alley_Pave
0,60,65.0,8450,7,5,2003,2003,196.0,706,0,...,0,0,1,0,0,0,1,0,1,0
1,20,80.0,9600,6,8,1976,1976,0.0,978,0,...,0,0,1,0,0,0,1,0,1,0
2,60,68.0,11250,7,5,2001,2002,162.0,486,0,...,0,0,1,0,0,0,1,0,1,0
3,70,60.0,9550,7,5,1915,1970,0.0,216,0,...,0,0,1,0,0,0,0,0,1,0
4,60,84.0,14260,8,5,2000,2000,350.0,655,0,...,0,0,1,0,0,0,1,0,1,0


In [11]:
df.info(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 262 columns):
 #    Column                 Dtype  
---   ------                 -----  
 0    MSSubClass             int64  
 1    LotFrontage            float64
 2    LotArea                int64  
 3    OverallQual            int64  
 4    OverallCond            int64  
 5    YearBuilt              int64  
 6    YearRemodAdd           int64  
 7    MasVnrArea             float64
 8    BsmtFinSF1             int64  
 9    BsmtFinSF2             int64  
 10   BsmtUnfSF              int64  
 11   TotalBsmtSF            int64  
 12   1stFlrSF               int64  
 13   2ndFlrSF               int64  
 14   LowQualFinSF           int64  
 15   GrLivArea              int64  
 16   BsmtFullBath           int64  
 17   BsmtHalfBath           int64  
 18   FullBath               int64  
 19   HalfBath               int64  
 20   BedroomAbvGr           int64  
 21   KitchenAbvGr           int64  
 22 

In [12]:
from sklearn.preprocessing import PolynomialFeatures, MinMaxScaler
scaler = MinMaxScaler()
col_to_scale = ['MSSubClass','OverallQual','LotArea','OverallCond','KitchenAbvGr','TotRmsAbvGrd','MasVnrArea','BsmtFinSF1']
df[df.columns] = scaler.fit_transform(df[df.columns])
df.head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,Alley_Nonen,Alley_Pave
0,0.235294,0.207668,0.03342,0.666667,0.5,0.949275,0.883333,0.1225,0.125089,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
1,0.0,0.255591,0.038795,0.555556,0.875,0.753623,0.433333,0.0,0.173281,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
2,0.235294,0.217252,0.046507,0.666667,0.5,0.934783,0.866667,0.10125,0.086109,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
3,0.294118,0.191693,0.038561,0.666667,0.5,0.311594,0.333333,0.0,0.038271,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.235294,0.268371,0.060576,0.777778,0.5,0.927536,0.833333,0.21875,0.116052,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0


In [13]:
y = df.pop('SalePrice')
X = df
y.head()

0    0.241078
1    0.203583
2    0.261908
3    0.145952
4    0.298709
Name: SalePrice, dtype: float64

In [14]:
#Split the data into train & test sets.
X_tr,X_te,y_tr,y_te = train_test_split(X,y, train_size=0.7,random_state=100)

In [15]:
X_tr.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1021 entries, 318 to 792
Columns: 261 entries, MSSubClass to Alley_Pave
dtypes: float64(261)
memory usage: 2.0 MB


In [16]:
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
lr = LinearRegression()
rfe = RFE(lr, step=30)
rfe = rfe.fit(X_tr, y_tr)

In [17]:
list(zip(X_tr.columns, rfe.support_, rfe.ranking_))

[('MSSubClass', False, 5),
 ('LotFrontage', False, 4),
 ('LotArea', True, 1),
 ('OverallQual', True, 1),
 ('OverallCond', True, 1),
 ('YearBuilt', True, 1),
 ('YearRemodAdd', True, 1),
 ('MasVnrArea', True, 1),
 ('BsmtFinSF1', True, 1),
 ('BsmtFinSF2', True, 1),
 ('BsmtUnfSF', True, 1),
 ('TotalBsmtSF', True, 1),
 ('1stFlrSF', True, 1),
 ('2ndFlrSF', True, 1),
 ('LowQualFinSF', True, 1),
 ('GrLivArea', True, 1),
 ('BsmtFullBath', True, 1),
 ('BsmtHalfBath', False, 6),
 ('FullBath', False, 5),
 ('HalfBath', True, 1),
 ('BedroomAbvGr', True, 1),
 ('KitchenAbvGr', True, 1),
 ('TotRmsAbvGrd', False, 6),
 ('Fireplaces', False, 4),
 ('GarageYrBlt', True, 1),
 ('GarageCars', True, 1),
 ('GarageArea', True, 1),
 ('WoodDeckSF', True, 1),
 ('OpenPorchSF', False, 4),
 ('EnclosedPorch', True, 1),
 ('3SsnPorch', False, 4),
 ('ScreenPorch', False, 2),
 ('PoolArea', True, 1),
 ('MiscVal', True, 1),
 ('MoSold', False, 4),
 ('YrSold', False, 4),
 ('MSZoning_FV', True, 1),
 ('MSZoning_RH', True, 1),
 ('

In [18]:
#Selecting columns suggested by rfe. 
col = X_tr.columns[rfe.support_]
col

Index(['LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',
       ...
       'PoolQC_Gd', 'PoolQC_Nonen', 'MiscFeature_Nonen', 'MiscFeature_Othr',
       'MiscFeature_Shed', 'SaleType_Con', 'SaleType_New',
       'SaleCondition_AdjLand', 'SaleCondition_Alloca',
       'SaleCondition_Partial'],
      dtype='object', length=130)

In [19]:
X_tr = X_tr[col]
X_te = X_te[col]
y_train = y_tr
y_test = y_te

In [20]:
X_tr.head()

Unnamed: 0,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,...,PoolQC_Gd,PoolQC_Nonen,MiscFeature_Nonen,MiscFeature_Othr,MiscFeature_Shed,SaleType_Con,SaleType_New,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Partial
318,0.040197,0.666667,0.5,0.876812,0.716667,0.16,0.174876,0.0,0.15411,0.220458,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
239,0.03478,0.555556,0.375,0.528986,0.0,0.0,0.016655,0.0,0.274401,0.120295,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
986,0.018743,0.555556,0.875,0.275362,0.883333,0.0,0.0,0.0,0.20762,0.079378,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1416,0.046928,0.333333,0.625,0.094203,0.0,0.0,0.0,0.0,0.33262,0.127169,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
390,0.033209,0.444444,0.875,0.202899,0.0,0.0,0.0427,0.265265,0.098031,0.140917,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
# LM value for this data is 
# R2 - Training : 0.9456798856254166
# R2 - Test     : 1.2594062741158006e+23
# Clearly model overfitting is a problem.

lm = LinearRegression()

# Fit a line
lm.fit(X_tr, y_tr)

In [22]:
from sklearn.metrics import r2_score, mean_squared_error
y_pred_train = lm.predict(X_tr)
y_pred_test = lm.predict(X_te)

metric = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric.append(mse_test_lr**0.5)

0.9456798856254166
-1.2594062741158006e+23
0.6685318285275208
6.862939117710612e+23
0.0006547814187341046
1.563311871915857e+21


In [23]:

from sklearn.model_selection import GridSearchCV

params = {'alpha': [0.0001, 0.001, 0.01, 0.05, 0.1, 
 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 2.0, 3.0, 
 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 20, 50, 100, 500, 1000 ]}

ridge = Ridge()

# cross validation
folds = 10
model_cv = GridSearchCV(estimator = ridge, 
                        param_grid = params, 
                        scoring= 'neg_mean_absolute_error',  
                        cv = folds, 
                        return_train_score=True,
                        verbose = 1)            
model_cv.fit(X_tr, y_tr)

print(model_cv.best_params_)


Fitting 10 folds for each of 28 candidates, totalling 280 fits
{'alpha': 0.3}


In [24]:

alpha = 0.2
ridge = Ridge(alpha=alpha)

ridge.fit(X_tr, y_tr)
print(ridge.coef_)

[ 1.22945129e-01  9.08148118e-02  5.77855212e-02  7.54739863e-02
  1.29318666e-02  3.57822937e-02  1.23719768e-01  2.27000306e-02
  1.85700563e-02  1.26859880e-01  1.85539933e-01  1.13664161e-01
 -8.70634121e-03  1.95614072e-01  2.12797671e-02 -1.31344717e-02
 -4.92728604e-02 -1.02414818e-01  9.01706006e-03  3.77786547e-02
  1.28403102e-02  1.14875762e-02 -3.45418417e-03  8.26138643e-02
  5.85205783e-03  1.77049147e-02  2.62813617e-02  2.41097439e-02
  1.91848915e-02  4.58991585e-02 -1.48994930e-03 -3.13526303e-02
  1.30756016e-02 -8.59857681e-03  1.12581440e-02  1.07034095e-02
  3.90953289e-02 -1.68472363e-02  2.28724194e-02  5.53421686e-02
  3.08464244e-02  2.88874954e-02  3.84175375e-02  1.56749445e-02
  1.71285952e-02  8.04177133e-02 -4.66005972e-01 -2.90735602e-02
 -1.14555700e-02  1.08566522e-02 -3.55232060e-02 -2.42777114e-02
  1.17312585e-02 -2.55179540e-02 -9.75943622e-03  1.18406431e-02
  1.95674488e-02  4.10427585e-02  7.70298288e-02  1.24904357e-01
  7.70110208e-02  6.12042

In [25]:
# Lets calculate some metrics such as R2 score, RSS and RMSE
y_test = y_te
y_train = y_tr
y_pred_train = ridge.predict(X_tr)
y_pred_test = ridge.predict(X_te)

metric2 = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric2.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric2.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric2.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric2.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric2.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric2.append(mse_test_lr**0.5)

0.9364716044406161
0.6940980311428007
0.7818605490014722
1.666965324377708
0.0007657791860935085
0.0037971875270562828


In [26]:
import statsmodels.api as sm
X_tr_sm = sm.add_constant(X_tr) #adds const=1 column .. useful in intercept calc
X_te_sm = sm.add_constant(X_te)
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,138.7
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:09,Log-Likelihood:,2295.0
No. Observations:,1021,AIC:,-4360.0
Df Residuals:,906,BIC:,-3793.0
Df Model:,114,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0361,0.484,-8.341,0.000,-4.986,-3.086
LotArea,0.2032,0.028,7.259,0.000,0.148,0.258
OverallQual,0.0826,0.012,7.158,0.000,0.060,0.105
OverallCond,0.0560,0.009,6.264,0.000,0.038,0.074
YearBuilt,0.0725,0.011,6.311,0.000,0.050,0.095
YearRemodAdd,0.0151,0.004,3.437,0.001,0.006,0.024
MasVnrArea,0.0435,0.009,4.636,0.000,0.025,0.062
BsmtFinSF1,0.1744,0.018,9.907,0.000,0.140,0.209
BsmtFinSF2,0.0283,0.009,3.152,0.002,0.011,0.046

0,1,2,3
Omnibus:,300.188,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4321.817
Skew:,0.936,Prob(JB):,0.0
Kurtosis:,12.904,Cond. No.,1.05e+16


In [27]:
lasso = Lasso()

# cross validation
model_cv = GridSearchCV(estimator = lasso, 
                        param_grid = params, 
                        scoring= 'neg_mean_absolute_error', 
                        cv = 10, 
                        return_train_score=True,
                        verbose = 1)            

model_cv.fit(X_tr, y_tr) 
print(model_cv.best_params_)


Fitting 10 folds for each of 28 candidates, totalling 280 fits
{'alpha': 0.0001}


In [28]:
alpha = 0.0001
lasso = Lasso(alpha=alpha)

lasso.fit(X_tr, y_tr)
print(lasso.coef_)

[ 6.88057377e-02  1.11860060e-01  5.29178702e-02  6.20699515e-02
  1.53483027e-02  2.60897091e-02  7.92230452e-02  0.00000000e+00
 -0.00000000e+00  1.84087889e-01  0.00000000e+00  1.42018792e-02
 -2.44427161e-02  4.03741733e-01  2.26725437e-02 -0.00000000e+00
 -2.97762145e-02 -8.51187290e-02 -0.00000000e+00  3.02328944e-02
  1.61153473e-02  1.28121022e-02 -0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  7.41193354e-03
 -5.10625987e-05  4.29777713e-03  1.31932736e-03 -0.00000000e+00
  1.30094620e-02  0.00000000e+00  0.00000000e+00  9.29026667e-03
  3.33357193e-02 -1.15148822e-02  0.00000000e+00  5.56885326e-02
  2.91568528e-02  2.55855770e-02  2.56502714e-02  6.64333107e-03
  1.39818659e-02  0.00000000e+00 -4.42493551e-01 -0.00000000e+00
 -0.00000000e+00  0.00000000e+00 -3.00162484e-02 -1.67350048e-02
  3.21254979e-03 -0.00000000e+00 -0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000

In [29]:


y_pred_train = lasso.predict(X_tr)
y_pred_test = lasso.predict(X_te)

metric3 = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric3.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric3.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric3.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric3.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric3.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric3.append(mse_test_lr**0.5)

0.9279119121372996
0.6405771601229744
0.887206916789068
1.9586190082485389
0.0008689587823595182
0.004461546715828107


In [30]:
betas = pd.DataFrame(index=X_tr.columns)
betas.rows = X_tr.columns

betas['Linear'] = lm.coef_
betas['Ridge'] = ridge.coef_
betas['Lasso'] = lasso.coef_

pd.set_option('display.max_rows', None)
betas.head(68)

Unnamed: 0,Linear,Ridge,Lasso
LotArea,0.2031913,0.122945,0.068806
OverallQual,0.08026972,0.090815,0.11186
OverallCond,0.0525758,0.057786,0.052918
YearBuilt,0.0698019,0.075474,0.06207
YearRemodAdd,0.01541679,0.012932,0.015348
MasVnrArea,0.04311697,0.035782,0.02609
BsmtFinSF1,764313400000.0,0.12372,0.079223
BsmtFinSF2,199609800000.0,0.0227,0.0
BsmtUnfSF,316342300000.0,0.01857,-0.0
TotalBsmtSF,-827419400000.0,0.12686,0.184088


In [31]:
lr_table = {'Metric': ['R2 Score (Train)','R2 Score (Test)','RSS (Train)','RSS (Test)',
                       'MSE (Train)','MSE (Test)'], 
        'Linear Regression': metric
        }

lr_metric = pd.DataFrame(lr_table ,columns = ['Metric', 'Linear Regression'] )

rg_metric = pd.Series(metric2, name = 'Ridge Regression')
ls_metric = pd.Series(metric3, name = 'Lasso Regression')

final_metric = pd.concat([lr_metric, rg_metric, ls_metric], axis = 1)

final_metric

Unnamed: 0,Metric,Linear Regression,Ridge Regression,Lasso Regression
0,R2 Score (Train),0.9456799,0.936472,0.927912
1,R2 Score (Test),-1.259406e+23,0.694098,0.640577
2,RSS (Train),0.6685318,0.781861,0.887207
3,RSS (Test),6.862939e+23,1.666965,1.958619
4,MSE (Train),0.0255887,0.027673,0.029478
5,MSE (Test),39538740000.0,0.061621,0.066795


In [32]:
#Looking at above R2 values for Linear, Ridge & Lasso Regression , could say model is overfit. 

In [33]:
#Remove the columns to address multicollinearity .
#find the columns whose P value under 0.05 & having low VIF value. After repeating steps for mulitple time
#Finally left with 30 columns in execution step number 147 of this notebook

In [34]:
import statsmodels.api as sm
X_tr_sm = sm.add_constant(X_tr) #adds const=1 column .. useful in intercept calc
X_te_sm = sm.add_constant(X_te)
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,138.7
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:14,Log-Likelihood:,2295.0
No. Observations:,1021,AIC:,-4360.0
Df Residuals:,906,BIC:,-3793.0
Df Model:,114,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0361,0.484,-8.341,0.000,-4.986,-3.086
LotArea,0.2032,0.028,7.259,0.000,0.148,0.258
OverallQual,0.0826,0.012,7.158,0.000,0.060,0.105
OverallCond,0.0560,0.009,6.264,0.000,0.038,0.074
YearBuilt,0.0725,0.011,6.311,0.000,0.050,0.095
YearRemodAdd,0.0151,0.004,3.437,0.001,0.006,0.024
MasVnrArea,0.0435,0.009,4.636,0.000,0.025,0.062
BsmtFinSF1,0.1744,0.018,9.907,0.000,0.140,0.209
BsmtFinSF2,0.0283,0.009,3.152,0.002,0.011,0.046

0,1,2,3
Omnibus:,300.188,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4321.817
Skew:,0.936,Prob(JB):,0.0
Kurtosis:,12.904,Cond. No.,1.05e+16


In [35]:
X_tr_sm.pop('GarageCond_Po')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,138.7
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:14,Log-Likelihood:,2295.0
No. Observations:,1021,AIC:,-4360.0
Df Residuals:,906,BIC:,-3793.0
Df Model:,114,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0361,0.484,-8.341,0.000,-4.986,-3.086
LotArea,0.2032,0.028,7.259,0.000,0.148,0.258
OverallQual,0.0826,0.012,7.158,0.000,0.060,0.105
OverallCond,0.0560,0.009,6.264,0.000,0.038,0.074
YearBuilt,0.0725,0.011,6.311,0.000,0.050,0.095
YearRemodAdd,0.0151,0.004,3.437,0.001,0.006,0.024
MasVnrArea,0.0435,0.009,4.636,0.000,0.025,0.062
BsmtFinSF1,0.1744,0.018,9.907,0.000,0.140,0.209
BsmtFinSF2,0.0283,0.009,3.152,0.002,0.011,0.046

0,1,2,3
Omnibus:,300.188,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4321.817
Skew:,0.936,Prob(JB):,0.0
Kurtosis:,12.904,Cond. No.,1.05e+16


In [36]:
X_tr_sm.pop('GarageQual_Gd')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,140.0
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:15,Log-Likelihood:,2294.9
No. Observations:,1021,AIC:,-4362.0
Df Residuals:,907,BIC:,-3800.0
Df Model:,113,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0280,0.483,-8.333,0.000,-4.977,-3.079
LotArea,0.2031,0.028,7.258,0.000,0.148,0.258
OverallQual,0.0824,0.012,7.149,0.000,0.060,0.105
OverallCond,0.0561,0.009,6.269,0.000,0.039,0.074
YearBuilt,0.0726,0.011,6.322,0.000,0.050,0.095
YearRemodAdd,0.0152,0.004,3.458,0.001,0.007,0.024
MasVnrArea,0.0435,0.009,4.633,0.000,0.025,0.062
BsmtFinSF1,0.1740,0.018,9.902,0.000,0.140,0.209
BsmtFinSF2,0.0282,0.009,3.151,0.002,0.011,0.046

0,1,2,3
Omnibus:,299.839,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4311.251
Skew:,0.935,Prob(JB):,0.0
Kurtosis:,12.892,Cond. No.,1.05e+16


In [37]:

X_tr_sm.pop('Condition2_RRNn')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,141.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:15,Log-Likelihood:,2294.8
No. Observations:,1021,AIC:,-4364.0
Df Residuals:,908,BIC:,-3807.0
Df Model:,112,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0270,0.483,-8.335,0.000,-4.975,-3.079
LotArea,0.2031,0.028,7.263,0.000,0.148,0.258
OverallQual,0.0824,0.012,7.154,0.000,0.060,0.105
OverallCond,0.0561,0.009,6.276,0.000,0.039,0.074
YearBuilt,0.0728,0.011,6.345,0.000,0.050,0.095
YearRemodAdd,0.0153,0.004,3.490,0.001,0.007,0.024
MasVnrArea,0.0434,0.009,4.633,0.000,0.025,0.062
BsmtFinSF1,0.1739,0.018,9.899,0.000,0.139,0.208
BsmtFinSF2,0.0288,0.009,3.232,0.001,0.011,0.046

0,1,2,3
Omnibus:,299.806,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4313.465
Skew:,0.934,Prob(JB):,0.0
Kurtosis:,12.895,Cond. No.,1.05e+16


In [38]:

X_tr_sm.pop('GarageFinish_Nonen')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,141.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:15,Log-Likelihood:,2294.8
No. Observations:,1021,AIC:,-4364.0
Df Residuals:,908,BIC:,-3807.0
Df Model:,112,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0270,0.483,-8.335,0.000,-4.975,-3.079
LotArea,0.2031,0.028,7.263,0.000,0.148,0.258
OverallQual,0.0824,0.012,7.154,0.000,0.060,0.105
OverallCond,0.0561,0.009,6.276,0.000,0.039,0.074
YearBuilt,0.0728,0.011,6.345,0.000,0.050,0.095
YearRemodAdd,0.0153,0.004,3.490,0.001,0.007,0.024
MasVnrArea,0.0434,0.009,4.633,0.000,0.025,0.062
BsmtFinSF1,0.1739,0.018,9.899,0.000,0.139,0.208
BsmtFinSF2,0.0288,0.009,3.232,0.001,0.011,0.046

0,1,2,3
Omnibus:,299.806,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4313.465
Skew:,0.934,Prob(JB):,0.0
Kurtosis:,12.895,Cond. No.,1.05e+16


In [39]:


X_tr_sm.pop('GarageType_Nonen')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,141.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:16,Log-Likelihood:,2294.8
No. Observations:,1021,AIC:,-4364.0
Df Residuals:,908,BIC:,-3807.0
Df Model:,112,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0270,0.483,-8.335,0.000,-4.975,-3.079
LotArea,0.2031,0.028,7.263,0.000,0.148,0.258
OverallQual,0.0824,0.012,7.154,0.000,0.060,0.105
OverallCond,0.0561,0.009,6.276,0.000,0.039,0.074
YearBuilt,0.0728,0.011,6.345,0.000,0.050,0.095
YearRemodAdd,0.0153,0.004,3.490,0.001,0.007,0.024
MasVnrArea,0.0434,0.009,4.633,0.000,0.025,0.062
BsmtFinSF1,0.1739,0.018,9.899,0.000,0.139,0.208
BsmtFinSF2,0.0288,0.009,3.232,0.001,0.011,0.046

0,1,2,3
Omnibus:,299.806,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4313.465
Skew:,0.934,Prob(JB):,0.0
Kurtosis:,12.895,Cond. No.,1.05e+16


In [40]:

X_tr_sm.pop('GarageCond_Nonen')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,141.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:16,Log-Likelihood:,2294.8
No. Observations:,1021,AIC:,-4364.0
Df Residuals:,908,BIC:,-3807.0
Df Model:,112,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0270,0.483,-8.335,0.000,-4.975,-3.079
LotArea,0.2031,0.028,7.263,0.000,0.148,0.258
OverallQual,0.0824,0.012,7.154,0.000,0.060,0.105
OverallCond,0.0561,0.009,6.276,0.000,0.039,0.074
YearBuilt,0.0728,0.011,6.345,0.000,0.050,0.095
YearRemodAdd,0.0153,0.004,3.490,0.001,0.007,0.024
MasVnrArea,0.0434,0.009,4.633,0.000,0.025,0.062
BsmtFinSF1,0.1739,0.018,9.899,0.000,0.139,0.208
BsmtFinSF2,0.0288,0.009,3.232,0.001,0.011,0.046

0,1,2,3
Omnibus:,299.806,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4313.465
Skew:,0.934,Prob(JB):,0.0
Kurtosis:,12.895,Cond. No.,1.05e+16


In [41]:

X_tr_sm.pop('GarageQual_Nonen')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,142.8
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:16,Log-Likelihood:,2294.6
No. Observations:,1021,AIC:,-4365.0
Df Residuals:,909,BIC:,-3813.0
Df Model:,111,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9870,0.476,-8.370,0.000,-4.922,-3.052
LotArea,0.2028,0.028,7.256,0.000,0.148,0.258
OverallQual,0.0824,0.012,7.155,0.000,0.060,0.105
OverallCond,0.0557,0.009,6.259,0.000,0.038,0.073
YearBuilt,0.0746,0.011,6.870,0.000,0.053,0.096
YearRemodAdd,0.0158,0.004,3.717,0.000,0.007,0.024
MasVnrArea,0.0431,0.009,4.612,0.000,0.025,0.061
BsmtFinSF1,0.1734,0.018,9.891,0.000,0.139,0.208
BsmtFinSF2,0.0285,0.009,3.213,0.001,0.011,0.046

0,1,2,3
Omnibus:,300.166,Durbin-Watson:,1.989
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4325.462
Skew:,0.935,Prob(JB):,0.0
Kurtosis:,12.908,Cond. No.,1.05e+16


In [42]:

X_tr_sm.pop('Condition2_RRAn')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,144.2
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:17,Log-Likelihood:,2294.5
No. Observations:,1021,AIC:,-4367.0
Df Residuals:,910,BIC:,-3820.0
Df Model:,110,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9867,0.476,-8.373,0.000,-4.921,-3.052
LotArea,0.2027,0.028,7.257,0.000,0.148,0.258
OverallQual,0.0825,0.012,7.173,0.000,0.060,0.105
OverallCond,0.0554,0.009,6.242,0.000,0.038,0.073
YearBuilt,0.0744,0.011,6.857,0.000,0.053,0.096
YearRemodAdd,0.0158,0.004,3.727,0.000,0.007,0.024
MasVnrArea,0.0431,0.009,4.612,0.000,0.025,0.061
BsmtFinSF1,0.1733,0.018,9.890,0.000,0.139,0.208
BsmtFinSF2,0.0285,0.009,3.209,0.001,0.011,0.046

0,1,2,3
Omnibus:,300.326,Durbin-Watson:,1.992
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4320.028
Skew:,0.937,Prob(JB):,0.0
Kurtosis:,12.901,Cond. No.,1.05e+16


In [43]:

X_tr_sm.pop('Neighborhood_Blueste')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,145.6
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:17,Log-Likelihood:,2294.3
No. Observations:,1021,AIC:,-4369.0
Df Residuals:,911,BIC:,-3826.0
Df Model:,109,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9848,0.476,-8.373,0.000,-4.919,-3.051
LotArea,0.2026,0.028,7.254,0.000,0.148,0.257
OverallQual,0.0825,0.011,7.174,0.000,0.060,0.105
OverallCond,0.0556,0.009,6.262,0.000,0.038,0.073
YearBuilt,0.0744,0.011,6.863,0.000,0.053,0.096
YearRemodAdd,0.0158,0.004,3.719,0.000,0.007,0.024
MasVnrArea,0.0429,0.009,4.598,0.000,0.025,0.061
BsmtFinSF1,0.1731,0.018,9.886,0.000,0.139,0.207
BsmtFinSF2,0.0290,0.009,3.293,0.001,0.012,0.046

0,1,2,3
Omnibus:,300.134,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4312.959
Skew:,0.936,Prob(JB):,0.0
Kurtosis:,12.893,Cond. No.,1.05e+16


In [44]:

X_tr_sm.pop('BsmtExposure_Nonen')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,147.1
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:17,Log-Likelihood:,2294.1
No. Observations:,1021,AIC:,-4370.0
Df Residuals:,912,BIC:,-3833.0
Df Model:,108,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9837,0.476,-8.373,0.000,-4.917,-3.050
LotArea,0.2022,0.028,7.245,0.000,0.147,0.257
OverallQual,0.0825,0.011,7.175,0.000,0.060,0.105
OverallCond,0.0556,0.009,6.272,0.000,0.038,0.073
YearBuilt,0.0744,0.011,6.861,0.000,0.053,0.096
YearRemodAdd,0.0158,0.004,3.717,0.000,0.007,0.024
MasVnrArea,0.0429,0.009,4.592,0.000,0.025,0.061
BsmtFinSF1,0.1732,0.018,9.893,0.000,0.139,0.208
BsmtFinSF2,0.0290,0.009,3.294,0.001,0.012,0.046

0,1,2,3
Omnibus:,299.895,Durbin-Watson:,1.992
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4304.711
Skew:,0.935,Prob(JB):,0.0
Kurtosis:,12.884,Cond. No.,1.05e+16


In [45]:

X_tr_sm.pop('Exterior1st_AsphShn')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,147.1
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:18,Log-Likelihood:,2294.1
No. Observations:,1021,AIC:,-4370.0
Df Residuals:,912,BIC:,-3833.0
Df Model:,108,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9837,0.476,-8.373,0.000,-4.917,-3.050
LotArea,0.2022,0.028,7.245,0.000,0.147,0.257
OverallQual,0.0825,0.011,7.175,0.000,0.060,0.105
OverallCond,0.0556,0.009,6.272,0.000,0.038,0.073
YearBuilt,0.0744,0.011,6.861,0.000,0.053,0.096
YearRemodAdd,0.0158,0.004,3.717,0.000,0.007,0.024
MasVnrArea,0.0429,0.009,4.592,0.000,0.025,0.061
BsmtFinSF1,0.1732,0.018,9.893,0.000,0.139,0.208
BsmtFinSF2,0.0290,0.009,3.294,0.001,0.012,0.046

0,1,2,3
Omnibus:,299.895,Durbin-Watson:,1.992
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4304.711
Skew:,0.935,Prob(JB):,0.0
Kurtosis:,12.884,Cond. No.,1.05e+16


In [46]:

X_tr_sm.pop('GarageCond_Fa')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,148.6
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:18,Log-Likelihood:,2293.9
No. Observations:,1021,AIC:,-4372.0
Df Residuals:,913,BIC:,-3839.0
Df Model:,107,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9830,0.476,-8.375,0.000,-4.916,-3.050
LotArea,0.2021,0.028,7.245,0.000,0.147,0.257
OverallQual,0.0826,0.011,7.186,0.000,0.060,0.105
OverallCond,0.0556,0.009,6.272,0.000,0.038,0.073
YearBuilt,0.0749,0.011,6.939,0.000,0.054,0.096
YearRemodAdd,0.0157,0.004,3.697,0.000,0.007,0.024
MasVnrArea,0.0429,0.009,4.596,0.000,0.025,0.061
BsmtFinSF1,0.1728,0.017,9.882,0.000,0.139,0.207
BsmtFinSF2,0.0290,0.009,3.287,0.001,0.012,0.046

0,1,2,3
Omnibus:,299.599,Durbin-Watson:,1.993
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4292.794
Skew:,0.935,Prob(JB):,0.0
Kurtosis:,12.87,Cond. No.,1.05e+16


In [47]:


X_tr_sm.pop('GarageYrBlt')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,150.1
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:18,Log-Likelihood:,2293.8
No. Observations:,1021,AIC:,-4374.0
Df Residuals:,914,BIC:,-3846.0
Df Model:,106,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9807,0.475,-8.376,0.000,-4.913,-3.048
LotArea,0.2020,0.028,7.245,0.000,0.147,0.257
OverallQual,0.0827,0.011,7.209,0.000,0.060,0.105
OverallCond,0.0558,0.009,6.310,0.000,0.038,0.073
YearBuilt,0.0750,0.011,6.951,0.000,0.054,0.096
YearRemodAdd,0.0157,0.004,3.697,0.000,0.007,0.024
MasVnrArea,0.0427,0.009,4.590,0.000,0.024,0.061
BsmtFinSF1,0.1729,0.017,9.890,0.000,0.139,0.207
BsmtFinSF2,0.0290,0.009,3.291,0.001,0.012,0.046

0,1,2,3
Omnibus:,299.144,Durbin-Watson:,1.992
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4288.638
Skew:,0.932,Prob(JB):,0.0
Kurtosis:,12.866,Cond. No.,1.05e+16


In [48]:

X_tr_sm.pop('GarageCond_TA')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,151.7
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:19,Log-Likelihood:,2293.7
No. Observations:,1021,AIC:,-4375.0
Df Residuals:,915,BIC:,-3853.0
Df Model:,105,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9826,0.475,-8.383,0.000,-4.915,-3.050
LotArea,0.2018,0.028,7.242,0.000,0.147,0.256
OverallQual,0.0824,0.011,7.191,0.000,0.060,0.105
OverallCond,0.0552,0.009,6.287,0.000,0.038,0.072
YearBuilt,0.0740,0.011,6.949,0.000,0.053,0.095
YearRemodAdd,0.0158,0.004,3.728,0.000,0.007,0.024
MasVnrArea,0.0429,0.009,4.608,0.000,0.025,0.061
BsmtFinSF1,0.1732,0.017,9.920,0.000,0.139,0.208
BsmtFinSF2,0.0292,0.009,3.324,0.001,0.012,0.046

0,1,2,3
Omnibus:,299.333,Durbin-Watson:,1.992
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4284.134
Skew:,0.934,Prob(JB):,0.0
Kurtosis:,12.86,Cond. No.,1.05e+16


In [49]:

X_tr_sm.pop('Exterior2nd_AsphShn')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,153.2
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:19,Log-Likelihood:,2293.4
No. Observations:,1021,AIC:,-4377.0
Df Residuals:,916,BIC:,-3859.0
Df Model:,104,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9858,0.475,-8.393,0.000,-4.918,-3.054
LotArea,0.2017,0.028,7.241,0.000,0.147,0.256
OverallQual,0.0820,0.011,7.173,0.000,0.060,0.104
OverallCond,0.0552,0.009,6.287,0.000,0.038,0.072
YearBuilt,0.0737,0.011,6.928,0.000,0.053,0.095
YearRemodAdd,0.0159,0.004,3.755,0.000,0.008,0.024
MasVnrArea,0.0430,0.009,4.622,0.000,0.025,0.061
BsmtFinSF1,0.1743,0.017,10.038,0.000,0.140,0.208
BsmtFinSF2,0.0295,0.009,3.353,0.001,0.012,0.047

0,1,2,3
Omnibus:,299.202,Durbin-Watson:,1.992
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4272.539
Skew:,0.934,Prob(JB):,0.0
Kurtosis:,12.846,Cond. No.,1.05e+16


In [50]:
X_tr_sm.pop('BsmtFinType1_Nonen')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,153.2
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:19,Log-Likelihood:,2293.4
No. Observations:,1021,AIC:,-4377.0
Df Residuals:,916,BIC:,-3859.0
Df Model:,104,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9858,0.475,-8.393,0.000,-4.918,-3.054
LotArea,0.2017,0.028,7.241,0.000,0.147,0.256
OverallQual,0.0820,0.011,7.173,0.000,0.060,0.104
OverallCond,0.0552,0.009,6.287,0.000,0.038,0.072
YearBuilt,0.0737,0.011,6.928,0.000,0.053,0.095
YearRemodAdd,0.0159,0.004,3.755,0.000,0.008,0.024
MasVnrArea,0.0430,0.009,4.622,0.000,0.025,0.061
BsmtFinSF1,0.1743,0.017,10.038,0.000,0.140,0.208
BsmtFinSF2,0.0295,0.009,3.353,0.001,0.012,0.047

0,1,2,3
Omnibus:,299.202,Durbin-Watson:,1.992
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4272.539
Skew:,0.934,Prob(JB):,0.0
Kurtosis:,12.846,Cond. No.,1.05e+16


In [51]:
X_tr_sm.pop('BsmtCond_Nonen')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,153.2
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:19,Log-Likelihood:,2293.4
No. Observations:,1021,AIC:,-4377.0
Df Residuals:,916,BIC:,-3859.0
Df Model:,104,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9858,0.475,-8.393,0.000,-4.918,-3.054
LotArea,0.2017,0.028,7.241,0.000,0.147,0.256
OverallQual,0.0820,0.011,7.173,0.000,0.060,0.104
OverallCond,0.0552,0.009,6.287,0.000,0.038,0.072
YearBuilt,0.0737,0.011,6.928,0.000,0.053,0.095
YearRemodAdd,0.0159,0.004,3.755,0.000,0.008,0.024
MasVnrArea,0.0430,0.009,4.622,0.000,0.025,0.061
BsmtFinSF1,0.1743,0.017,10.038,0.000,0.140,0.208
BsmtFinSF2,0.0295,0.009,3.353,0.001,0.012,0.047

0,1,2,3
Omnibus:,299.202,Durbin-Watson:,1.992
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4272.539
Skew:,0.934,Prob(JB):,0.0
Kurtosis:,12.846,Cond. No.,1.05e+16


In [52]:
X_tr_sm.pop('BsmtQual_Nonen')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,154.8
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:20,Log-Likelihood:,2293.3
No. Observations:,1021,AIC:,-4379.0
Df Residuals:,917,BIC:,-3866.0
Df Model:,103,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9848,0.475,-8.394,0.000,-4.916,-3.053
LotArea,0.2019,0.028,7.250,0.000,0.147,0.257
OverallQual,0.0812,0.011,7.165,0.000,0.059,0.103
OverallCond,0.0552,0.009,6.286,0.000,0.038,0.072
YearBuilt,0.0734,0.011,6.915,0.000,0.053,0.094
YearRemodAdd,0.0158,0.004,3.734,0.000,0.007,0.024
MasVnrArea,0.0431,0.009,4.642,0.000,0.025,0.061
BsmtFinSF1,0.1701,0.016,10.923,0.000,0.140,0.201
BsmtFinSF2,0.0287,0.009,3.308,0.001,0.012,0.046

0,1,2,3
Omnibus:,299.055,Durbin-Watson:,1.992
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4296.373
Skew:,0.931,Prob(JB):,0.0
Kurtosis:,12.875,Cond. No.,1.05e+16


In [53]:
X_tr_sm.pop('Exterior1st_BrkComm')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,156.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:20,Log-Likelihood:,2293.0
No. Observations:,1021,AIC:,-4380.0
Df Residuals:,918,BIC:,-3872.0
Df Model:,102,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9930,0.474,-8.416,0.000,-4.924,-3.062
LotArea,0.2012,0.028,7.233,0.000,0.147,0.256
OverallQual,0.0813,0.011,7.179,0.000,0.059,0.104
OverallCond,0.0556,0.009,6.352,0.000,0.038,0.073
YearBuilt,0.0737,0.011,6.951,0.000,0.053,0.095
YearRemodAdd,0.0157,0.004,3.725,0.000,0.007,0.024
MasVnrArea,0.0431,0.009,4.642,0.000,0.025,0.061
BsmtFinSF1,0.1698,0.016,10.914,0.000,0.139,0.200
BsmtFinSF2,0.0287,0.009,3.315,0.001,0.012,0.046

0,1,2,3
Omnibus:,298.596,Durbin-Watson:,1.989
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4280.9
Skew:,0.93,Prob(JB):,0.0
Kurtosis:,12.857,Cond. No.,1.05e+16


In [54]:
X_tr_sm.pop('Condition2_RRAe')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,158.0
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:20,Log-Likelihood:,2292.8
No. Observations:,1021,AIC:,-4382.0
Df Residuals:,919,BIC:,-3879.0
Df Model:,101,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0528,0.466,-8.690,0.000,-4.968,-3.138
LotArea,0.1983,0.028,7.212,0.000,0.144,0.252
OverallQual,0.0813,0.011,7.176,0.000,0.059,0.103
OverallCond,0.0557,0.009,6.376,0.000,0.039,0.073
YearBuilt,0.0740,0.011,6.984,0.000,0.053,0.095
YearRemodAdd,0.0156,0.004,3.693,0.000,0.007,0.024
MasVnrArea,0.0430,0.009,4.631,0.000,0.025,0.061
BsmtFinSF1,0.1694,0.016,10.898,0.000,0.139,0.200
BsmtFinSF2,0.0286,0.009,3.298,0.001,0.012,0.046

0,1,2,3
Omnibus:,298.485,Durbin-Watson:,1.989
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4279.592
Skew:,0.929,Prob(JB):,0.0
Kurtosis:,12.856,Cond. No.,1.05e+16


In [55]:
X_tr_sm.pop('GarageType_CarPort')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,159.7
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:21,Log-Likelihood:,2292.5
No. Observations:,1021,AIC:,-4383.0
Df Residuals:,920,BIC:,-3885.0
Df Model:,100,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0517,0.466,-8.691,0.000,-4.967,-3.137
LotArea,0.1976,0.027,7.192,0.000,0.144,0.251
OverallQual,0.0808,0.011,7.150,0.000,0.059,0.103
OverallCond,0.0558,0.009,6.383,0.000,0.039,0.073
YearBuilt,0.0738,0.011,6.968,0.000,0.053,0.095
YearRemodAdd,0.0155,0.004,3.692,0.000,0.007,0.024
MasVnrArea,0.0430,0.009,4.631,0.000,0.025,0.061
BsmtFinSF1,0.1694,0.016,10.905,0.000,0.139,0.200
BsmtFinSF2,0.0289,0.009,3.341,0.001,0.012,0.046

0,1,2,3
Omnibus:,298.335,Durbin-Watson:,1.989
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4289.961
Skew:,0.928,Prob(JB):,0.0
Kurtosis:,12.869,Cond. No.,1.05e+16


In [56]:
X_tr_sm.pop('RoofStyle_Gambrel')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.946
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,161.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:21,Log-Likelihood:,2292.2
No. Observations:,1021,AIC:,-4384.0
Df Residuals:,921,BIC:,-3892.0
Df Model:,99,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0483,0.466,-8.686,0.000,-4.963,-3.134
LotArea,0.1971,0.027,7.179,0.000,0.143,0.251
OverallQual,0.0803,0.011,7.123,0.000,0.058,0.102
OverallCond,0.0556,0.009,6.363,0.000,0.038,0.073
YearBuilt,0.0737,0.011,6.961,0.000,0.053,0.094
YearRemodAdd,0.0157,0.004,3.743,0.000,0.007,0.024
MasVnrArea,0.0429,0.009,4.623,0.000,0.025,0.061
BsmtFinSF1,0.1700,0.016,10.961,0.000,0.140,0.200
BsmtFinSF2,0.0290,0.009,3.356,0.001,0.012,0.046

0,1,2,3
Omnibus:,296.827,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4262.755
Skew:,0.921,Prob(JB):,0.0
Kurtosis:,12.839,Cond. No.,1.05e+16


In [57]:
X_tr_sm.pop('Electrical_FuseP')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,163.1
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:21,Log-Likelihood:,2291.9
No. Observations:,1021,AIC:,-4386.0
Df Residuals:,922,BIC:,-3898.0
Df Model:,98,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0530,0.466,-8.699,0.000,-4.967,-3.139
LotArea,0.1971,0.027,7.180,0.000,0.143,0.251
OverallQual,0.0802,0.011,7.120,0.000,0.058,0.102
OverallCond,0.0555,0.009,6.359,0.000,0.038,0.073
YearBuilt,0.0732,0.011,6.927,0.000,0.052,0.094
YearRemodAdd,0.0157,0.004,3.728,0.000,0.007,0.024
MasVnrArea,0.0428,0.009,4.618,0.000,0.025,0.061
BsmtFinSF1,0.1703,0.016,10.987,0.000,0.140,0.201
BsmtFinSF2,0.0290,0.009,3.354,0.001,0.012,0.046

0,1,2,3
Omnibus:,296.333,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4246.751
Skew:,0.92,Prob(JB):,0.0
Kurtosis:,12.821,Cond. No.,1.05e+16


In [58]:
X_tr_sm.pop('Exterior2nd_CBlock')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,163.1
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:21,Log-Likelihood:,2291.9
No. Observations:,1021,AIC:,-4386.0
Df Residuals:,922,BIC:,-3898.0
Df Model:,98,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0530,0.466,-8.699,0.000,-4.967,-3.139
LotArea,0.1971,0.027,7.180,0.000,0.143,0.251
OverallQual,0.0802,0.011,7.120,0.000,0.058,0.102
OverallCond,0.0555,0.009,6.359,0.000,0.038,0.073
YearBuilt,0.0732,0.011,6.927,0.000,0.052,0.094
YearRemodAdd,0.0157,0.004,3.728,0.000,0.007,0.024
MasVnrArea,0.0428,0.009,4.618,0.000,0.025,0.061
BsmtFinSF1,0.1703,0.016,10.987,0.000,0.140,0.201
BsmtFinSF2,0.0290,0.009,3.354,0.001,0.012,0.046

0,1,2,3
Omnibus:,296.333,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4246.751
Skew:,0.92,Prob(JB):,0.0
Kurtosis:,12.821,Cond. No.,1.05e+16


In [59]:
X_tr_sm.pop('Exterior1st_CBlock')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,164.9
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:22,Log-Likelihood:,2291.6
No. Observations:,1021,AIC:,-4387.0
Df Residuals:,923,BIC:,-3904.0
Df Model:,97,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0529,0.466,-8.701,0.000,-4.967,-3.139
LotArea,0.1969,0.027,7.175,0.000,0.143,0.251
OverallQual,0.0805,0.011,7.149,0.000,0.058,0.103
OverallCond,0.0555,0.009,6.361,0.000,0.038,0.073
YearBuilt,0.0734,0.011,6.958,0.000,0.053,0.094
YearRemodAdd,0.0157,0.004,3.743,0.000,0.007,0.024
MasVnrArea,0.0429,0.009,4.630,0.000,0.025,0.061
BsmtFinSF1,0.1701,0.015,10.976,0.000,0.140,0.200
BsmtFinSF2,0.0291,0.009,3.366,0.001,0.012,0.046

0,1,2,3
Omnibus:,296.655,Durbin-Watson:,1.989
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4244.343
Skew:,0.922,Prob(JB):,0.0
Kurtosis:,12.817,Cond. No.,1.05e+16


In [60]:
X_tr_sm.pop('ExterQual_Fa')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,166.7
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:22,Log-Likelihood:,2291.4
No. Observations:,1021,AIC:,-4389.0
Df Residuals:,924,BIC:,-3911.0
Df Model:,96,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0663,0.465,-8.743,0.000,-4.979,-3.154
LotArea,0.1960,0.027,7.155,0.000,0.142,0.250
OverallQual,0.0803,0.011,7.135,0.000,0.058,0.102
OverallCond,0.0555,0.009,6.363,0.000,0.038,0.073
YearBuilt,0.0734,0.011,6.962,0.000,0.053,0.094
YearRemodAdd,0.0157,0.004,3.741,0.000,0.007,0.024
MasVnrArea,0.0430,0.009,4.637,0.000,0.025,0.061
BsmtFinSF1,0.1705,0.015,11.022,0.000,0.140,0.201
BsmtFinSF2,0.0295,0.009,3.423,0.001,0.013,0.046

0,1,2,3
Omnibus:,295.861,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4230.897
Skew:,0.918,Prob(JB):,0.0
Kurtosis:,12.802,Cond. No.,1.05e+16


In [61]:
X_tr_sm.pop('Electrical_Mix')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,166.7
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:22,Log-Likelihood:,2291.4
No. Observations:,1021,AIC:,-4389.0
Df Residuals:,924,BIC:,-3911.0
Df Model:,96,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0663,0.465,-8.743,0.000,-4.979,-3.154
LotArea,0.1960,0.027,7.155,0.000,0.142,0.250
OverallQual,0.0803,0.011,7.135,0.000,0.058,0.102
OverallCond,0.0555,0.009,6.363,0.000,0.038,0.073
YearBuilt,0.0734,0.011,6.962,0.000,0.053,0.094
YearRemodAdd,0.0157,0.004,3.741,0.000,0.007,0.024
MasVnrArea,0.0430,0.009,4.637,0.000,0.025,0.061
BsmtFinSF1,0.1705,0.015,11.022,0.000,0.140,0.201
BsmtFinSF2,0.0295,0.009,3.423,0.001,0.013,0.046

0,1,2,3
Omnibus:,295.861,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4230.897
Skew:,0.918,Prob(JB):,0.0
Kurtosis:,12.802,Cond. No.,1.05e+16


In [62]:
X_tr_sm.pop('LowQualFinSF')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,166.7
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:22,Log-Likelihood:,2291.4
No. Observations:,1021,AIC:,-4389.0
Df Residuals:,924,BIC:,-3911.0
Df Model:,96,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0663,0.465,-8.743,0.000,-4.979,-3.154
LotArea,0.1960,0.027,7.155,0.000,0.142,0.250
OverallQual,0.0803,0.011,7.135,0.000,0.058,0.102
OverallCond,0.0555,0.009,6.363,0.000,0.038,0.073
YearBuilt,0.0734,0.011,6.962,0.000,0.053,0.094
YearRemodAdd,0.0157,0.004,3.741,0.000,0.007,0.024
MasVnrArea,0.0430,0.009,4.637,0.000,0.025,0.061
BsmtFinSF1,0.1705,0.015,11.022,0.000,0.140,0.201
BsmtFinSF2,0.0295,0.009,3.423,0.001,0.013,0.046

0,1,2,3
Omnibus:,295.861,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4230.897
Skew:,0.918,Prob(JB):,0.0
Kurtosis:,12.802,Cond. No.,1.05e+16


In [63]:
X_tr_sm.pop('Foundation_Stone')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,168.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:23,Log-Likelihood:,2290.9
No. Observations:,1021,AIC:,-4390.0
Df Residuals:,925,BIC:,-3917.0
Df Model:,95,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0919,0.464,-8.812,0.000,-5.003,-3.181
LotArea,0.1965,0.027,7.176,0.000,0.143,0.250
OverallQual,0.0804,0.011,7.150,0.000,0.058,0.103
OverallCond,0.0558,0.009,6.403,0.000,0.039,0.073
YearBuilt,0.0730,0.011,6.927,0.000,0.052,0.094
YearRemodAdd,0.0158,0.004,3.759,0.000,0.008,0.024
MasVnrArea,0.0427,0.009,4.611,0.000,0.025,0.061
BsmtFinSF1,0.1701,0.015,10.999,0.000,0.140,0.200
BsmtFinSF2,0.0293,0.009,3.400,0.001,0.012,0.046

0,1,2,3
Omnibus:,295.624,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4216.769
Skew:,0.918,Prob(JB):,0.0
Kurtosis:,12.785,Cond. No.,1.05e+16


In [64]:
X_tr_sm.pop('Electrical_Nonen')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,168.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:23,Log-Likelihood:,2290.9
No. Observations:,1021,AIC:,-4390.0
Df Residuals:,925,BIC:,-3917.0
Df Model:,95,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0919,0.464,-8.812,0.000,-5.003,-3.181
LotArea,0.1965,0.027,7.176,0.000,0.143,0.250
OverallQual,0.0804,0.011,7.150,0.000,0.058,0.103
OverallCond,0.0558,0.009,6.403,0.000,0.039,0.073
YearBuilt,0.0730,0.011,6.927,0.000,0.052,0.094
YearRemodAdd,0.0158,0.004,3.759,0.000,0.008,0.024
MasVnrArea,0.0427,0.009,4.611,0.000,0.025,0.061
BsmtFinSF1,0.1701,0.015,10.999,0.000,0.140,0.200
BsmtFinSF2,0.0293,0.009,3.400,0.001,0.012,0.046

0,1,2,3
Omnibus:,295.624,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4216.769
Skew:,0.918,Prob(JB):,0.0
Kurtosis:,12.785,Cond. No.,1.05e+16


In [65]:
X_tr_sm.pop('1stFlrSF')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,170.3
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:23,Log-Likelihood:,2290.4
No. Observations:,1021,AIC:,-4391.0
Df Residuals:,926,BIC:,-3923.0
Df Model:,94,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.1974,0.452,-9.290,0.000,-5.084,-3.311
LotArea,0.1970,0.027,7.196,0.000,0.143,0.251
OverallQual,0.0806,0.011,7.162,0.000,0.058,0.103
OverallCond,0.0562,0.009,6.457,0.000,0.039,0.073
YearBuilt,0.0730,0.011,6.930,0.000,0.052,0.094
YearRemodAdd,0.0158,0.004,3.765,0.000,0.008,0.024
MasVnrArea,0.0430,0.009,4.647,0.000,0.025,0.061
BsmtFinSF1,0.1722,0.015,11.249,0.000,0.142,0.202
BsmtFinSF2,0.0294,0.009,3.421,0.001,0.013,0.046

0,1,2,3
Omnibus:,294.981,Durbin-Watson:,1.987
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4216.975
Skew:,0.914,Prob(JB):,0.0
Kurtosis:,12.787,Cond. No.,1.05e+16


In [66]:
X_tr_sm.pop('HouseStyle_2.5Unf')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,172.0
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:23,Log-Likelihood:,2289.7
No. Observations:,1021,AIC:,-4391.0
Df Residuals:,927,BIC:,-3928.0
Df Model:,93,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.1983,0.452,-9.291,0.000,-5.085,-3.311
LotArea,0.1970,0.027,7.196,0.000,0.143,0.251
OverallQual,0.0791,0.011,7.080,0.000,0.057,0.101
OverallCond,0.0566,0.009,6.508,0.000,0.040,0.074
YearBuilt,0.0742,0.010,7.077,0.000,0.054,0.095
YearRemodAdd,0.0154,0.004,3.688,0.000,0.007,0.024
MasVnrArea,0.0435,0.009,4.707,0.000,0.025,0.062
BsmtFinSF1,0.1708,0.015,11.194,0.000,0.141,0.201
BsmtFinSF2,0.0293,0.009,3.403,0.001,0.012,0.046

0,1,2,3
Omnibus:,296.515,Durbin-Watson:,1.987
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4242.002
Skew:,0.921,Prob(JB):,0.0
Kurtosis:,12.814,Cond. No.,1.05e+16


In [67]:
X_tr_sm.pop('Exterior1st_Stone')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,173.9
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:24,Log-Likelihood:,2289.0
No. Observations:,1021,AIC:,-4392.0
Df Residuals:,928,BIC:,-3934.0
Df Model:,92,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.1915,0.452,-9.275,0.000,-5.078,-3.305
LotArea,0.1959,0.027,7.159,0.000,0.142,0.250
OverallQual,0.0796,0.011,7.125,0.000,0.058,0.101
OverallCond,0.0567,0.009,6.510,0.000,0.040,0.074
YearBuilt,0.0736,0.010,7.028,0.000,0.053,0.094
YearRemodAdd,0.0154,0.004,3.677,0.000,0.007,0.024
MasVnrArea,0.0439,0.009,4.752,0.000,0.026,0.062
BsmtFinSF1,0.1718,0.015,11.276,0.000,0.142,0.202
BsmtFinSF2,0.0285,0.009,3.324,0.001,0.012,0.045

0,1,2,3
Omnibus:,297.457,Durbin-Watson:,1.994
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4223.34
Skew:,0.928,Prob(JB):,0.0
Kurtosis:,12.789,Cond. No.,1.05e+16


In [68]:
X_tr_sm.pop('EnclosedPorch')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,175.8
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:24,Log-Likelihood:,2288.5
No. Observations:,1021,AIC:,-4393.0
Df Residuals:,929,BIC:,-3940.0
Df Model:,91,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0970,0.442,-9.273,0.000,-4.964,-3.230
LotArea,0.1944,0.027,7.116,0.000,0.141,0.248
OverallQual,0.0803,0.011,7.203,0.000,0.058,0.102
OverallCond,0.0558,0.009,6.444,0.000,0.039,0.073
YearBuilt,0.0709,0.010,7.014,0.000,0.051,0.091
YearRemodAdd,0.0155,0.004,3.703,0.000,0.007,0.024
MasVnrArea,0.0438,0.009,4.738,0.000,0.026,0.062
BsmtFinSF1,0.1714,0.015,11.253,0.000,0.141,0.201
BsmtFinSF2,0.0289,0.009,3.366,0.001,0.012,0.046

0,1,2,3
Omnibus:,296.654,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4180.378
Skew:,0.926,Prob(JB):,0.0
Kurtosis:,12.738,Cond. No.,1.05e+16


In [69]:
X_tr_sm.pop('SaleCondition_AdjLand')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,177.6
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:24,Log-Likelihood:,2287.6
No. Observations:,1021,AIC:,-4393.0
Df Residuals:,930,BIC:,-3945.0
Df Model:,90,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0947,0.442,-9.265,0.000,-4.962,-3.227
LotArea,0.1942,0.027,7.105,0.000,0.141,0.248
OverallQual,0.0806,0.011,7.234,0.000,0.059,0.102
OverallCond,0.0550,0.009,6.369,0.000,0.038,0.072
YearBuilt,0.0701,0.010,6.947,0.000,0.050,0.090
YearRemodAdd,0.0156,0.004,3.731,0.000,0.007,0.024
MasVnrArea,0.0438,0.009,4.742,0.000,0.026,0.062
BsmtFinSF1,0.1702,0.015,11.194,0.000,0.140,0.200
BsmtFinSF2,0.0285,0.009,3.331,0.001,0.012,0.045

0,1,2,3
Omnibus:,295.771,Durbin-Watson:,1.991
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4157.709
Skew:,0.923,Prob(JB):,0.0
Kurtosis:,12.712,Cond. No.,1.05e+16


In [70]:
X_tr_sm.pop('Foundation_Wood')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,179.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:25,Log-Likelihood:,2286.8
No. Observations:,1021,AIC:,-4394.0
Df Residuals:,931,BIC:,-3950.0
Df Model:,89,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0952,0.442,-9.263,0.000,-4.963,-3.228
LotArea,0.1942,0.027,7.103,0.000,0.141,0.248
OverallQual,0.0811,0.011,7.279,0.000,0.059,0.103
OverallCond,0.0553,0.009,6.404,0.000,0.038,0.072
YearBuilt,0.0700,0.010,6.940,0.000,0.050,0.090
YearRemodAdd,0.0154,0.004,3.686,0.000,0.007,0.024
MasVnrArea,0.0443,0.009,4.798,0.000,0.026,0.062
BsmtFinSF1,0.1693,0.015,11.143,0.000,0.139,0.199
BsmtFinSF2,0.0286,0.009,3.332,0.001,0.012,0.045

0,1,2,3
Omnibus:,296.377,Durbin-Watson:,1.991
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4148.057
Skew:,0.927,Prob(JB):,0.0
Kurtosis:,12.699,Cond. No.,1.05e+16


In [71]:
X_tr_sm.pop('Utilities_NoSeWa')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,181.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:25,Log-Likelihood:,2285.9
No. Observations:,1021,AIC:,-4394.0
Df Residuals:,932,BIC:,-3955.0
Df Model:,88,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0833,0.442,-9.235,0.000,-4.951,-3.216
LotArea,0.1936,0.027,7.081,0.000,0.140,0.247
OverallQual,0.0809,0.011,7.257,0.000,0.059,0.103
OverallCond,0.0553,0.009,6.402,0.000,0.038,0.072
YearBuilt,0.0701,0.010,6.952,0.000,0.050,0.090
YearRemodAdd,0.0157,0.004,3.766,0.000,0.008,0.024
MasVnrArea,0.0431,0.009,4.691,0.000,0.025,0.061
BsmtFinSF1,0.1707,0.015,11.262,0.000,0.141,0.200
BsmtFinSF2,0.0281,0.009,3.276,0.001,0.011,0.045

0,1,2,3
Omnibus:,296.039,Durbin-Watson:,1.993
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4124.133
Skew:,0.927,Prob(JB):,0.0
Kurtosis:,12.67,Cond. No.,1.05e+16


In [72]:
X_tr_sm.pop('Exterior1st_CemntBd')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,183.3
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:25,Log-Likelihood:,2285.0
No. Observations:,1021,AIC:,-4394.0
Df Residuals:,933,BIC:,-3960.0
Df Model:,87,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0639,0.442,-9.193,0.000,-4.931,-3.196
LotArea,0.1924,0.027,7.039,0.000,0.139,0.246
OverallQual,0.0809,0.011,7.259,0.000,0.059,0.103
OverallCond,0.0553,0.009,6.396,0.000,0.038,0.072
YearBuilt,0.0705,0.010,6.989,0.000,0.051,0.090
YearRemodAdd,0.0157,0.004,3.753,0.000,0.007,0.024
MasVnrArea,0.0429,0.009,4.661,0.000,0.025,0.061
BsmtFinSF1,0.1700,0.015,11.220,0.000,0.140,0.200
BsmtFinSF2,0.0282,0.009,3.291,0.001,0.011,0.045

0,1,2,3
Omnibus:,291.836,Durbin-Watson:,1.993
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3998.055
Skew:,0.913,Prob(JB):,0.0
Kurtosis:,12.521,Cond. No.,1.05e+16


In [73]:
X_tr_sm.pop('Exterior1st_VinylSd')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.945
Model:,OLS,Adj. R-squared:,0.94
Method:,Least Squares,F-statistic:,185.2
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:25,Log-Likelihood:,2283.9
No. Observations:,1021,AIC:,-4394.0
Df Residuals:,934,BIC:,-3965.0
Df Model:,86,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0669,0.442,-9.195,0.000,-4.935,-3.199
LotArea,0.1929,0.027,7.052,0.000,0.139,0.247
OverallQual,0.0802,0.011,7.200,0.000,0.058,0.102
OverallCond,0.0543,0.009,6.300,0.000,0.037,0.071
YearBuilt,0.0739,0.010,7.522,0.000,0.055,0.093
YearRemodAdd,0.0167,0.004,4.055,0.000,0.009,0.025
MasVnrArea,0.0426,0.009,4.634,0.000,0.025,0.061
BsmtFinSF1,0.1698,0.015,11.203,0.000,0.140,0.200
BsmtFinSF2,0.0281,0.009,3.272,0.001,0.011,0.045

0,1,2,3
Omnibus:,288.584,Durbin-Watson:,1.994
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3899.75
Skew:,0.903,Prob(JB):,0.0
Kurtosis:,12.403,Cond. No.,1.05e+16


In [74]:
X_tr_sm.pop('GarageCars')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.944
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,187.2
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:26,Log-Likelihood:,2282.8
No. Observations:,1021,AIC:,-4394.0
Df Residuals:,935,BIC:,-3970.0
Df Model:,85,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.1021,0.442,-9.284,0.000,-4.969,-3.235
LotArea,0.1935,0.027,7.072,0.000,0.140,0.247
OverallQual,0.0810,0.011,7.273,0.000,0.059,0.103
OverallCond,0.0531,0.009,6.184,0.000,0.036,0.070
YearBuilt,0.0743,0.010,7.572,0.000,0.055,0.094
YearRemodAdd,0.0172,0.004,4.196,0.000,0.009,0.025
MasVnrArea,0.0430,0.009,4.674,0.000,0.025,0.061
BsmtFinSF1,0.1701,0.015,11.216,0.000,0.140,0.200
BsmtFinSF2,0.0279,0.009,3.255,0.001,0.011,0.045

0,1,2,3
Omnibus:,281.385,Durbin-Watson:,1.991
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3810.529
Skew:,0.869,Prob(JB):,0.0
Kurtosis:,12.303,Cond. No.,1.05e+16


In [75]:
X_tr_sm.pop('Exterior2nd_Wd Sdng')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.944
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,189.3
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:26,Log-Likelihood:,2282.0
No. Observations:,1021,AIC:,-4394.0
Df Residuals:,936,BIC:,-3975.0
Df Model:,84,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.0296,0.438,-9.205,0.000,-4.889,-3.170
LotArea,0.1915,0.027,7.011,0.000,0.138,0.245
OverallQual,0.0804,0.011,7.228,0.000,0.059,0.102
OverallCond,0.0537,0.009,6.272,0.000,0.037,0.071
YearBuilt,0.0749,0.010,7.639,0.000,0.056,0.094
YearRemodAdd,0.0170,0.004,4.140,0.000,0.009,0.025
MasVnrArea,0.0429,0.009,4.661,0.000,0.025,0.061
BsmtFinSF1,0.1698,0.015,11.193,0.000,0.140,0.200
BsmtFinSF2,0.0277,0.009,3.230,0.001,0.011,0.045

0,1,2,3
Omnibus:,278.281,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3745.709
Skew:,0.857,Prob(JB):,0.0
Kurtosis:,12.226,Cond. No.,1.05e+16


In [76]:
X_tr_sm.pop('Exterior1st_Wd Sdng')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.944
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,191.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:26,Log-Likelihood:,2280.9
No. Observations:,1021,AIC:,-4394.0
Df Residuals:,937,BIC:,-3980.0
Df Model:,83,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9685,0.436,-9.108,0.000,-4.824,-3.113
LotArea,0.1912,0.027,6.995,0.000,0.138,0.245
OverallQual,0.0794,0.011,7.147,0.000,0.058,0.101
OverallCond,0.0549,0.009,6.442,0.000,0.038,0.072
YearBuilt,0.0787,0.009,8.344,0.000,0.060,0.097
YearRemodAdd,0.0166,0.004,4.064,0.000,0.009,0.025
MasVnrArea,0.0432,0.009,4.693,0.000,0.025,0.061
BsmtFinSF1,0.1700,0.015,11.202,0.000,0.140,0.200
BsmtFinSF2,0.0268,0.009,3.133,0.002,0.010,0.044

0,1,2,3
Omnibus:,277.736,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3727.932
Skew:,0.855,Prob(JB):,0.0
Kurtosis:,12.203,Cond. No.,1.05e+16


In [77]:
X_tr_sm.pop('GarageQual_Po')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.944
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,193.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:26,Log-Likelihood:,2279.8
No. Observations:,1021,AIC:,-4394.0
Df Residuals:,938,BIC:,-3984.0
Df Model:,82,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9739,0.436,-9.115,0.000,-4.830,-3.118
LotArea,0.1904,0.027,6.965,0.000,0.137,0.244
OverallQual,0.0793,0.011,7.130,0.000,0.057,0.101
OverallCond,0.0544,0.009,6.377,0.000,0.038,0.071
YearBuilt,0.0782,0.009,8.296,0.000,0.060,0.097
YearRemodAdd,0.0170,0.004,4.148,0.000,0.009,0.025
MasVnrArea,0.0434,0.009,4.712,0.000,0.025,0.061
BsmtFinSF1,0.1692,0.015,11.150,0.000,0.139,0.199
BsmtFinSF2,0.0265,0.009,3.094,0.002,0.010,0.043

0,1,2,3
Omnibus:,278.455,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3707.825
Skew:,0.861,Prob(JB):,0.0
Kurtosis:,12.176,Cond. No.,1.05e+16


In [78]:
X_tr_sm.pop('GarageQual_TA')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.944
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,195.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:27,Log-Likelihood:,2278.6
No. Observations:,1021,AIC:,-4393.0
Df Residuals:,939,BIC:,-3989.0
Df Model:,81,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9710,0.436,-9.103,0.000,-4.827,-3.115
LotArea,0.1899,0.027,6.941,0.000,0.136,0.244
OverallQual,0.0785,0.011,7.066,0.000,0.057,0.100
OverallCond,0.0534,0.009,6.279,0.000,0.037,0.070
YearBuilt,0.0769,0.009,8.191,0.000,0.059,0.095
YearRemodAdd,0.0178,0.004,4.396,0.000,0.010,0.026
MasVnrArea,0.0434,0.009,4.711,0.000,0.025,0.062
BsmtFinSF1,0.1695,0.015,11.167,0.000,0.140,0.199
BsmtFinSF2,0.0261,0.009,3.053,0.002,0.009,0.043

0,1,2,3
Omnibus:,280.142,Durbin-Watson:,1.985
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3634.966
Skew:,0.877,Prob(JB):,0.0
Kurtosis:,12.076,Cond. No.,1.06e+16


In [79]:
X_tr_sm.pop('MSZoning_FV')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.944
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,197.7
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:27,Log-Likelihood:,2277.4
No. Observations:,1021,AIC:,-4393.0
Df Residuals:,940,BIC:,-3993.0
Df Model:,80,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9577,0.436,-9.068,0.000,-4.814,-3.101
LotArea,0.1917,0.027,7.010,0.000,0.138,0.245
OverallQual,0.0793,0.011,7.143,0.000,0.058,0.101
OverallCond,0.0551,0.008,6.531,0.000,0.039,0.072
YearBuilt,0.0787,0.009,8.429,0.000,0.060,0.097
YearRemodAdd,0.0175,0.004,4.324,0.000,0.010,0.025
MasVnrArea,0.0434,0.009,4.703,0.000,0.025,0.061
BsmtFinSF1,0.1687,0.015,11.111,0.000,0.139,0.198
BsmtFinSF2,0.0263,0.009,3.071,0.002,0.009,0.043

0,1,2,3
Omnibus:,280.594,Durbin-Watson:,1.987
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3607.796
Skew:,0.882,Prob(JB):,0.0
Kurtosis:,12.039,Cond. No.,1.06e+16


In [80]:
X_tr_sm.pop('MSZoning_RM')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.944
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,199.9
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:27,Log-Likelihood:,2276.2
No. Observations:,1021,AIC:,-4392.0
Df Residuals:,941,BIC:,-3998.0
Df Model:,79,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9593,0.437,-9.066,0.000,-4.816,-3.102
LotArea,0.1921,0.027,7.021,0.000,0.138,0.246
OverallQual,0.0809,0.011,7.310,0.000,0.059,0.103
OverallCond,0.0562,0.008,6.689,0.000,0.040,0.073
YearBuilt,0.0781,0.009,8.374,0.000,0.060,0.096
YearRemodAdd,0.0174,0.004,4.306,0.000,0.009,0.025
MasVnrArea,0.0437,0.009,4.734,0.000,0.026,0.062
BsmtFinSF1,0.1685,0.015,11.093,0.000,0.139,0.198
BsmtFinSF2,0.0265,0.009,3.094,0.002,0.010,0.043

0,1,2,3
Omnibus:,275.704,Durbin-Watson:,1.985
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3536.181
Skew:,0.86,Prob(JB):,0.0
Kurtosis:,11.953,Cond. No.,1.06e+16


In [81]:
X_tr_sm.pop('MSZoning_RH')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.944
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,202.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:27,Log-Likelihood:,2275.4
No. Observations:,1021,AIC:,-4393.0
Df Residuals:,942,BIC:,-4004.0
Df Model:,78,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9564,0.437,-9.058,0.000,-4.814,-3.099
LotArea,0.1924,0.027,7.028,0.000,0.139,0.246
OverallQual,0.0808,0.011,7.300,0.000,0.059,0.102
OverallCond,0.0559,0.008,6.655,0.000,0.039,0.072
YearBuilt,0.0787,0.009,8.448,0.000,0.060,0.097
YearRemodAdd,0.0176,0.004,4.334,0.000,0.010,0.026
MasVnrArea,0.0436,0.009,4.730,0.000,0.026,0.062
BsmtFinSF1,0.1685,0.015,11.092,0.000,0.139,0.198
BsmtFinSF2,0.0263,0.009,3.073,0.002,0.010,0.043

0,1,2,3
Omnibus:,276.526,Durbin-Watson:,1.989
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3513.785
Skew:,0.867,Prob(JB):,0.0
Kurtosis:,11.921,Cond. No.,1.06e+16


In [82]:
X_tr_sm.pop('MSZoning_RL')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.944
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,204.8
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:27,Log-Likelihood:,2274.3
No. Observations:,1021,AIC:,-4393.0
Df Residuals:,943,BIC:,-4008.0
Df Model:,77,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9326,0.437,-9.005,0.000,-4.790,-3.076
LotArea,0.1958,0.027,7.179,0.000,0.142,0.249
OverallQual,0.0797,0.011,7.216,0.000,0.058,0.101
OverallCond,0.0564,0.008,6.710,0.000,0.040,0.073
YearBuilt,0.0829,0.009,9.372,0.000,0.066,0.100
YearRemodAdd,0.0170,0.004,4.218,0.000,0.009,0.025
MasVnrArea,0.0431,0.009,4.672,0.000,0.025,0.061
BsmtFinSF1,0.1677,0.015,11.042,0.000,0.138,0.198
BsmtFinSF2,0.0262,0.009,3.060,0.002,0.009,0.043

0,1,2,3
Omnibus:,276.216,Durbin-Watson:,1.989
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3503.125
Skew:,0.866,Prob(JB):,0.0
Kurtosis:,11.908,Cond. No.,1.06e+16


In [83]:
X_tr_sm.pop('Neighborhood_Veenker')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.943
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,207.1
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:28,Log-Likelihood:,2273.1
No. Observations:,1021,AIC:,-4392.0
Df Residuals:,944,BIC:,-4013.0
Df Model:,76,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9312,0.437,-8.995,0.000,-4.789,-3.074
LotArea,0.1963,0.027,7.193,0.000,0.143,0.250
OverallQual,0.0800,0.011,7.244,0.000,0.058,0.102
OverallCond,0.0575,0.008,6.859,0.000,0.041,0.074
YearBuilt,0.0827,0.009,9.347,0.000,0.065,0.100
YearRemodAdd,0.0169,0.004,4.187,0.000,0.009,0.025
MasVnrArea,0.0423,0.009,4.594,0.000,0.024,0.060
BsmtFinSF1,0.1679,0.015,11.045,0.000,0.138,0.198
BsmtFinSF2,0.0272,0.009,3.172,0.002,0.010,0.044

0,1,2,3
Omnibus:,270.499,Durbin-Watson:,1.983
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3401.07
Skew:,0.843,Prob(JB):,0.0
Kurtosis:,11.781,Cond. No.,1.06e+16


In [84]:
X_tr_sm.pop('GarageCond_Gd')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.943
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,209.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:28,Log-Likelihood:,2271.7
No. Observations:,1021,AIC:,-4391.0
Df Residuals:,945,BIC:,-4017.0
Df Model:,75,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9389,0.437,-9.007,0.000,-4.797,-3.081
LotArea,0.1976,0.027,7.238,0.000,0.144,0.251
OverallQual,0.0803,0.011,7.258,0.000,0.059,0.102
OverallCond,0.0573,0.008,6.837,0.000,0.041,0.074
YearBuilt,0.0830,0.009,9.370,0.000,0.066,0.100
YearRemodAdd,0.0171,0.004,4.236,0.000,0.009,0.025
MasVnrArea,0.0422,0.009,4.573,0.000,0.024,0.060
BsmtFinSF1,0.1681,0.015,11.051,0.000,0.138,0.198
BsmtFinSF2,0.0273,0.009,3.185,0.001,0.010,0.044

0,1,2,3
Omnibus:,268.614,Durbin-Watson:,1.978
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3372.392
Skew:,0.835,Prob(JB):,0.0
Kurtosis:,11.746,Cond. No.,1.06e+16


In [85]:

X_tr_sm.pop('Foundation_Slab')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.943
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,211.9
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:28,Log-Likelihood:,2270.2
No. Observations:,1021,AIC:,-4390.0
Df Residuals:,946,BIC:,-4021.0
Df Model:,74,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9280,0.438,-8.974,0.000,-4.787,-3.069
LotArea,0.1966,0.027,7.196,0.000,0.143,0.250
OverallQual,0.0816,0.011,7.396,0.000,0.060,0.103
OverallCond,0.0573,0.008,6.831,0.000,0.041,0.074
YearBuilt,0.0815,0.009,9.241,0.000,0.064,0.099
YearRemodAdd,0.0176,0.004,4.373,0.000,0.010,0.026
MasVnrArea,0.0419,0.009,4.538,0.000,0.024,0.060
BsmtFinSF1,0.1763,0.014,12.220,0.000,0.148,0.205
BsmtFinSF2,0.0289,0.009,3.385,0.001,0.012,0.046

0,1,2,3
Omnibus:,268.791,Durbin-Watson:,1.977
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3325.986
Skew:,0.84,Prob(JB):,0.0
Kurtosis:,11.681,Cond. No.,1.06e+16


In [86]:

X_tr_sm.pop('SaleCondition_Alloca')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.943
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,214.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:29,Log-Likelihood:,2269.1
No. Observations:,1021,AIC:,-4390.0
Df Residuals:,947,BIC:,-4025.0
Df Model:,73,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9209,0.438,-8.953,0.000,-4.780,-3.061
LotArea,0.1937,0.027,7.104,0.000,0.140,0.247
OverallQual,0.0821,0.011,7.440,0.000,0.060,0.104
OverallCond,0.0571,0.008,6.800,0.000,0.041,0.074
YearBuilt,0.0820,0.009,9.299,0.000,0.065,0.099
YearRemodAdd,0.0178,0.004,4.427,0.000,0.010,0.026
MasVnrArea,0.0415,0.009,4.493,0.000,0.023,0.060
BsmtFinSF1,0.1732,0.014,12.129,0.000,0.145,0.201
BsmtFinSF2,0.0278,0.008,3.268,0.001,0.011,0.044

0,1,2,3
Omnibus:,268.479,Durbin-Watson:,1.978
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3332.781
Skew:,0.838,Prob(JB):,0.0
Kurtosis:,11.691,Cond. No.,1.06e+16


In [87]:

X_tr_sm.pop('Heating_Wall')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.943
Model:,OLS,Adj. R-squared:,0.939
Method:,Least Squares,F-statistic:,217.2
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:29,Log-Likelihood:,2267.9
No. Observations:,1021,AIC:,-4390.0
Df Residuals:,948,BIC:,-4030.0
Df Model:,72,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9146,0.438,-8.933,0.000,-4.775,-3.055
LotArea,0.1930,0.027,7.076,0.000,0.140,0.247
OverallQual,0.0810,0.011,7.352,0.000,0.059,0.103
OverallCond,0.0566,0.008,6.746,0.000,0.040,0.073
YearBuilt,0.0818,0.009,9.265,0.000,0.064,0.099
YearRemodAdd,0.0176,0.004,4.357,0.000,0.010,0.025
MasVnrArea,0.0423,0.009,4.586,0.000,0.024,0.060
BsmtFinSF1,0.1710,0.014,12.032,0.000,0.143,0.199
BsmtFinSF2,0.0271,0.008,3.192,0.001,0.010,0.044

0,1,2,3
Omnibus:,267.048,Durbin-Watson:,1.976
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3312.831
Skew:,0.831,Prob(JB):,0.0
Kurtosis:,11.667,Cond. No.,1.06e+16


In [88]:

X_tr_sm.pop('GarageQual_Fa')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.943
Model:,OLS,Adj. R-squared:,0.938
Method:,Least Squares,F-statistic:,219.8
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:29,Log-Likelihood:,2266.4
No. Observations:,1021,AIC:,-4389.0
Df Residuals:,949,BIC:,-4034.0
Df Model:,71,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.9092,0.439,-8.913,0.000,-4.770,-3.049
LotArea,0.1933,0.027,7.079,0.000,0.140,0.247
OverallQual,0.0813,0.011,7.373,0.000,0.060,0.103
OverallCond,0.0580,0.008,6.928,0.000,0.042,0.074
YearBuilt,0.0847,0.009,9.800,0.000,0.068,0.102
YearRemodAdd,0.0170,0.004,4.226,0.000,0.009,0.025
MasVnrArea,0.0423,0.009,4.585,0.000,0.024,0.060
BsmtFinSF1,0.1703,0.014,11.977,0.000,0.142,0.198
BsmtFinSF2,0.0272,0.008,3.200,0.001,0.011,0.044

0,1,2,3
Omnibus:,267.32,Durbin-Watson:,1.982
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3261.242
Skew:,0.838,Prob(JB):,0.0
Kurtosis:,11.594,Cond. No.,1.06e+16


In [89]:

X_tr_sm.pop('LandContour_Low')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.942
Model:,OLS,Adj. R-squared:,0.938
Method:,Least Squares,F-statistic:,222.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:29,Log-Likelihood:,2264.6
No. Observations:,1021,AIC:,-4387.0
Df Residuals:,950,BIC:,-4037.0
Df Model:,70,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.8830,0.439,-8.847,0.000,-4.744,-3.022
LotArea,0.1808,0.026,6.829,0.000,0.129,0.233
OverallQual,0.0822,0.011,7.446,0.000,0.061,0.104
OverallCond,0.0583,0.008,6.968,0.000,0.042,0.075
YearBuilt,0.0851,0.009,9.833,0.000,0.068,0.102
YearRemodAdd,0.0170,0.004,4.229,0.000,0.009,0.025
MasVnrArea,0.0433,0.009,4.691,0.000,0.025,0.061
BsmtFinSF1,0.1696,0.014,11.913,0.000,0.142,0.197
BsmtFinSF2,0.0275,0.009,3.238,0.001,0.011,0.044

0,1,2,3
Omnibus:,270.672,Durbin-Watson:,1.978
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3353.632
Skew:,0.848,Prob(JB):,0.0
Kurtosis:,11.715,Cond. No.,1.06e+16


In [90]:

#X_tr_sm.pop('LandContour_Low')
#model1 = sm.OLS(y_tr,X_tr_sm)
#res1 = model1.fit()
#res1.summary()

from statsmodels.stats.outliers_influence import variance_inflation_factor

vif_data=pd.DataFrame()
vif_data['Feature'] = X_tr_sm.columns

vif_data['VIF']=[variance_inflation_factor(X_tr_sm.values,i) for i in range(len(X_tr_sm.columns))]
vif_data

Unnamed: 0,Feature,VIF
0,const,0.0
1,LotArea,2.226558
2,OverallQual,4.030415
3,OverallCond,1.788087
4,YearBuilt,4.948867
5,YearRemodAdd,2.609961
6,MasVnrArea,1.5975
7,BsmtFinSF1,inf
8,BsmtFinSF2,inf
9,BsmtUnfSF,inf


In [91]:

X_tr_sm.pop('MiscFeature_Shed')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.942
Model:,OLS,Adj. R-squared:,0.938
Method:,Least Squares,F-statistic:,224.0
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:30,Log-Likelihood:,2260.7
No. Observations:,1021,AIC:,-4381.0
Df Residuals:,951,BIC:,-4036.0
Df Model:,69,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.7018,0.435,-8.506,0.000,-4.556,-2.848
LotArea,0.1721,0.026,6.528,0.000,0.120,0.224
OverallQual,0.0839,0.011,7.587,0.000,0.062,0.106
OverallCond,0.0582,0.008,6.934,0.000,0.042,0.075
YearBuilt,0.0842,0.009,9.701,0.000,0.067,0.101
YearRemodAdd,0.0174,0.004,4.326,0.000,0.010,0.025
MasVnrArea,0.0445,0.009,4.812,0.000,0.026,0.063
BsmtFinSF1,0.1699,0.014,11.896,0.000,0.142,0.198
BsmtFinSF2,0.0268,0.009,3.146,0.002,0.010,0.044

0,1,2,3
Omnibus:,266.846,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3228.27
Skew:,0.838,Prob(JB):,0.0
Kurtosis:,11.548,Cond. No.,1.06e+16


In [92]:

X_tr_sm.pop('MiscFeature_Nonen')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.942
Model:,OLS,Adj. R-squared:,0.938
Method:,Least Squares,F-statistic:,227.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:31,Log-Likelihood:,2260.3
No. Observations:,1021,AIC:,-4383.0
Df Residuals:,952,BIC:,-4043.0
Df Model:,68,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.7032,0.435,-8.511,0.000,-4.557,-2.849
LotArea,0.1730,0.026,6.570,0.000,0.121,0.225
OverallQual,0.0838,0.011,7.588,0.000,0.062,0.106
OverallCond,0.0587,0.008,7.008,0.000,0.042,0.075
YearBuilt,0.0844,0.009,9.740,0.000,0.067,0.101
YearRemodAdd,0.0172,0.004,4.278,0.000,0.009,0.025
MasVnrArea,0.0444,0.009,4.806,0.000,0.026,0.063
BsmtFinSF1,0.1693,0.014,11.873,0.000,0.141,0.197
BsmtFinSF2,0.0264,0.009,3.101,0.002,0.010,0.043

0,1,2,3
Omnibus:,268.018,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3253.317
Skew:,0.843,Prob(JB):,0.0
Kurtosis:,11.581,Cond. No.,1.07e+16


In [93]:

X_tr_sm.pop('MiscFeature_Othr')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.942
Model:,OLS,Adj. R-squared:,0.938
Method:,Least Squares,F-statistic:,230.8
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:31,Log-Likelihood:,2259.9
No. Observations:,1021,AIC:,-4384.0
Df Residuals:,953,BIC:,-4049.0
Df Model:,67,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.6980,0.435,-8.500,0.000,-4.552,-2.844
LotArea,0.1734,0.026,6.584,0.000,0.122,0.225
OverallQual,0.0839,0.011,7.594,0.000,0.062,0.106
OverallCond,0.0593,0.008,7.094,0.000,0.043,0.076
YearBuilt,0.0846,0.009,9.764,0.000,0.068,0.102
YearRemodAdd,0.0169,0.004,4.217,0.000,0.009,0.025
MasVnrArea,0.0444,0.009,4.807,0.000,0.026,0.063
BsmtFinSF1,0.1694,0.014,11.882,0.000,0.141,0.197
BsmtFinSF2,0.0264,0.009,3.098,0.002,0.010,0.043

0,1,2,3
Omnibus:,266.873,Durbin-Watson:,1.984
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3233.062
Skew:,0.838,Prob(JB):,0.0
Kurtosis:,11.555,Cond. No.,1.07e+16


In [94]:
X_tr_sm.pop('MiscVal')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.942
Model:,OLS,Adj. R-squared:,0.938
Method:,Least Squares,F-statistic:,234.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:31,Log-Likelihood:,2259.8
No. Observations:,1021,AIC:,-4386.0
Df Residuals:,954,BIC:,-4055.0
Df Model:,66,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.6982,0.435,-8.505,0.000,-4.552,-2.845
LotArea,0.1728,0.026,6.593,0.000,0.121,0.224
OverallQual,0.0838,0.011,7.594,0.000,0.062,0.106
OverallCond,0.0591,0.008,7.099,0.000,0.043,0.075
YearBuilt,0.0846,0.009,9.766,0.000,0.068,0.102
YearRemodAdd,0.0169,0.004,4.215,0.000,0.009,0.025
MasVnrArea,0.0445,0.009,4.811,0.000,0.026,0.063
BsmtFinSF1,0.1694,0.014,11.885,0.000,0.141,0.197
BsmtFinSF2,0.0264,0.009,3.098,0.002,0.010,0.043

0,1,2,3
Omnibus:,266.858,Durbin-Watson:,1.984
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3232.061
Skew:,0.838,Prob(JB):,0.0
Kurtosis:,11.554,Cond. No.,1.07e+16


In [95]:
X_tr_sm.pop('RoofStyle_Shed')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.942
Model:,OLS,Adj. R-squared:,0.938
Method:,Least Squares,F-statistic:,237.8
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:31,Log-Likelihood:,2258.7
No. Observations:,1021,AIC:,-4385.0
Df Residuals:,955,BIC:,-4060.0
Df Model:,65,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.6899,0.435,-8.482,0.000,-4.544,-2.836
LotArea,0.1688,0.026,6.474,0.000,0.118,0.220
OverallQual,0.0834,0.011,7.552,0.000,0.062,0.105
OverallCond,0.0593,0.008,7.113,0.000,0.043,0.076
YearBuilt,0.0845,0.009,9.749,0.000,0.067,0.101
YearRemodAdd,0.0168,0.004,4.201,0.000,0.009,0.025
MasVnrArea,0.0440,0.009,4.761,0.000,0.026,0.062
BsmtFinSF1,0.1712,0.014,12.051,0.000,0.143,0.199
BsmtFinSF2,0.0267,0.009,3.134,0.002,0.010,0.043

0,1,2,3
Omnibus:,267.376,Durbin-Watson:,1.983
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3227.262
Skew:,0.841,Prob(JB):,0.0
Kurtosis:,11.546,Cond. No.,1.07e+16


In [96]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

vif_data=pd.DataFrame()
vif_data['Feature'] = X_tr_sm.columns

vif_data['VIF']=[variance_inflation_factor(X_tr_sm.values,i) for i in range(len(X_tr_sm.columns))]
vif_data

Unnamed: 0,Feature,VIF
0,const,0.0
1,LotArea,2.14639
2,OverallQual,4.011049
3,OverallCond,1.760808
4,YearBuilt,4.929843
5,YearRemodAdd,2.574547
6,MasVnrArea,1.591497
7,BsmtFinSF1,inf
8,BsmtFinSF2,inf
9,BsmtUnfSF,inf


In [97]:
X_tr_sm.pop('PoolQC_Nonen')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.938
Model:,OLS,Adj. R-squared:,0.934
Method:,Least Squares,F-statistic:,225.3
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:32,Log-Likelihood:,2224.9
No. Observations:,1021,AIC:,-4320.0
Df Residuals:,956,BIC:,-3999.0
Df Model:,64,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1742,0.021,-8.493,0.000,-0.214,-0.134
LotArea,0.1613,0.027,5.991,0.000,0.108,0.214
OverallQual,0.0786,0.011,6.898,0.000,0.056,0.101
OverallCond,0.0655,0.009,7.637,0.000,0.049,0.082
YearBuilt,0.0914,0.009,10.266,0.000,0.074,0.109
YearRemodAdd,0.0150,0.004,3.630,0.000,0.007,0.023
MasVnrArea,0.0388,0.010,4.074,0.000,0.020,0.057
BsmtFinSF1,0.1787,0.015,12.204,0.000,0.150,0.207
BsmtFinSF2,0.0267,0.009,3.039,0.002,0.009,0.044

0,1,2,3
Omnibus:,235.483,Durbin-Watson:,1.991
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3284.18
Skew:,0.644,Prob(JB):,0.0
Kurtosis:,11.691,Cond. No.,1.08e+16


In [98]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

vif_data=pd.DataFrame()
vif_data['Feature'] = X_tr_sm.columns

vif_data['VIF']=[variance_inflation_factor(X_tr_sm.values,i) for i in range(len(X_tr_sm.columns))]
vif_data

Unnamed: 0,Feature,VIF
0,const,0.0
1,LotArea,2.143661
2,OverallQual,3.999417
3,OverallCond,1.745911
4,YearBuilt,4.881097
5,YearRemodAdd,2.566359
6,MasVnrArea,1.583817
7,BsmtFinSF1,inf
8,BsmtFinSF2,inf
9,BsmtUnfSF,inf


In [99]:
X_tr_sm.pop('SaleCondition_Partial')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.938
Model:,OLS,Adj. R-squared:,0.934
Method:,Least Squares,F-statistic:,225.3
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:33,Log-Likelihood:,2224.9
No. Observations:,1021,AIC:,-4320.0
Df Residuals:,956,BIC:,-3999.0
Df Model:,64,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1742,0.021,-8.493,0.000,-0.214,-0.134
LotArea,0.1613,0.027,5.991,0.000,0.108,0.214
OverallQual,0.0786,0.011,6.898,0.000,0.056,0.101
OverallCond,0.0655,0.009,7.637,0.000,0.049,0.082
YearBuilt,0.0914,0.009,10.266,0.000,0.074,0.109
YearRemodAdd,0.0150,0.004,3.630,0.000,0.007,0.023
MasVnrArea,0.0388,0.010,4.074,0.000,0.020,0.057
BsmtFinSF1,0.1787,0.015,12.204,0.000,0.150,0.207
BsmtFinSF2,0.0267,0.009,3.039,0.002,0.009,0.044

0,1,2,3
Omnibus:,235.483,Durbin-Watson:,1.991
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3284.18
Skew:,0.644,Prob(JB):,0.0
Kurtosis:,11.691,Cond. No.,1.08e+16


In [100]:
X_tr_sm.pop('ExterCond_Po')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.938
Model:,OLS,Adj. R-squared:,0.934
Method:,Least Squares,F-statistic:,225.3
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:33,Log-Likelihood:,2224.9
No. Observations:,1021,AIC:,-4320.0
Df Residuals:,956,BIC:,-3999.0
Df Model:,64,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1742,0.021,-8.493,0.000,-0.214,-0.134
LotArea,0.1613,0.027,5.991,0.000,0.108,0.214
OverallQual,0.0786,0.011,6.898,0.000,0.056,0.101
OverallCond,0.0655,0.009,7.637,0.000,0.049,0.082
YearBuilt,0.0914,0.009,10.266,0.000,0.074,0.109
YearRemodAdd,0.0150,0.004,3.630,0.000,0.007,0.023
MasVnrArea,0.0388,0.010,4.074,0.000,0.020,0.057
BsmtFinSF1,0.1787,0.015,12.204,0.000,0.150,0.207
BsmtFinSF2,0.0267,0.009,3.039,0.002,0.009,0.044

0,1,2,3
Omnibus:,235.483,Durbin-Watson:,1.991
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3284.18
Skew:,0.644,Prob(JB):,0.0
Kurtosis:,11.691,Cond. No.,1.08e+16


In [101]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

vif_data=pd.DataFrame()
vif_data['Feature'] = X_tr_sm.columns

vif_data['VIF']=[variance_inflation_factor(X_tr_sm.values,i) for i in range(len(X_tr_sm.columns))]
vif_data

Unnamed: 0,Feature,VIF
0,const,0.0
1,LotArea,2.143661
2,OverallQual,3.999417
3,OverallCond,1.745911
4,YearBuilt,4.881097
5,YearRemodAdd,2.566359
6,MasVnrArea,1.583817
7,BsmtFinSF1,inf
8,BsmtFinSF2,inf
9,BsmtUnfSF,inf


In [102]:
X_tr_sm.pop('PoolQC_Gd')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.938
Model:,OLS,Adj. R-squared:,0.934
Method:,Least Squares,F-statistic:,225.3
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:34,Log-Likelihood:,2224.9
No. Observations:,1021,AIC:,-4320.0
Df Residuals:,956,BIC:,-3999.0
Df Model:,64,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.1238,0.043,-26.083,0.000,-1.208,-1.039
LotArea,0.1613,0.027,5.991,0.000,0.108,0.214
OverallQual,0.0786,0.011,6.898,0.000,0.056,0.101
OverallCond,0.0655,0.009,7.637,0.000,0.049,0.082
YearBuilt,0.0914,0.009,10.266,0.000,0.074,0.109
YearRemodAdd,0.0150,0.004,3.630,0.000,0.007,0.023
MasVnrArea,0.0388,0.010,4.074,0.000,0.020,0.057
BsmtFinSF1,0.1787,0.015,12.204,0.000,0.150,0.207
BsmtFinSF2,0.0267,0.009,3.039,0.002,0.009,0.044

0,1,2,3
Omnibus:,235.483,Durbin-Watson:,1.991
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3284.18
Skew:,0.644,Prob(JB):,0.0
Kurtosis:,11.691,Cond. No.,1.08e+16


In [103]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

vif_data=pd.DataFrame()
vif_data['Feature'] = X_tr_sm.columns

vif_data['VIF']=[variance_inflation_factor(X_tr_sm.values,i) for i in range(len(X_tr_sm.columns))]
vif_data

Unnamed: 0,Feature,VIF
0,const,2367.823244
1,LotArea,2.143661
2,OverallQual,3.999417
3,OverallCond,1.745911
4,YearBuilt,4.881097
5,YearRemodAdd,2.566359
6,MasVnrArea,1.583817
7,BsmtFinSF1,inf
8,BsmtFinSF2,inf
9,BsmtUnfSF,inf


In [104]:
X_tr_sm.pop('RoofStyle_Mansard')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.938
Model:,OLS,Adj. R-squared:,0.934
Method:,Least Squares,F-statistic:,228.3
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:35,Log-Likelihood:,2223.2
No. Observations:,1021,AIC:,-4318.0
Df Residuals:,957,BIC:,-4003.0
Df Model:,63,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.1230,0.043,-26.038,0.000,-1.208,-1.038
LotArea,0.1641,0.027,6.099,0.000,0.111,0.217
OverallQual,0.0788,0.011,6.906,0.000,0.056,0.101
OverallCond,0.0649,0.009,7.563,0.000,0.048,0.082
YearBuilt,0.0896,0.009,10.118,0.000,0.072,0.107
YearRemodAdd,0.0156,0.004,3.777,0.000,0.007,0.024
MasVnrArea,0.0390,0.010,4.095,0.000,0.020,0.058
BsmtFinSF1,0.1791,0.015,12.218,0.000,0.150,0.208
BsmtFinSF2,0.0266,0.009,3.026,0.003,0.009,0.044

0,1,2,3
Omnibus:,233.531,Durbin-Watson:,1.991
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3243.121
Skew:,0.636,Prob(JB):,0.0
Kurtosis:,11.638,Cond. No.,1.08e+16


In [105]:
X_tr_sm.pop('BsmtFinType2_BLQ')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.937
Model:,OLS,Adj. R-squared:,0.933
Method:,Least Squares,F-statistic:,231.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:35,Log-Likelihood:,2221.7
No. Observations:,1021,AIC:,-4317.0
Df Residuals:,958,BIC:,-4007.0
Df Model:,62,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.1250,0.043,-26.067,0.000,-1.210,-1.040
LotArea,0.1620,0.027,6.022,0.000,0.109,0.215
OverallQual,0.0798,0.011,7.005,0.000,0.057,0.102
OverallCond,0.0639,0.009,7.462,0.000,0.047,0.081
YearBuilt,0.0896,0.009,10.113,0.000,0.072,0.107
YearRemodAdd,0.0156,0.004,3.791,0.000,0.008,0.024
MasVnrArea,0.0385,0.010,4.039,0.000,0.020,0.057
BsmtFinSF1,0.1797,0.015,12.249,0.000,0.151,0.208
BsmtFinSF2,0.0235,0.009,2.724,0.007,0.007,0.040

0,1,2,3
Omnibus:,232.545,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3228.067
Skew:,0.631,Prob(JB):,0.0
Kurtosis:,11.619,Cond. No.,1.08e+16


In [106]:

from statsmodels.stats.outliers_influence import variance_inflation_factor

vif_data=pd.DataFrame()
vif_data['Feature'] = X_tr_sm.columns

vif_data['VIF']=[variance_inflation_factor(X_tr_sm.values,i) for i in range(len(X_tr_sm.columns))]
vif_data = vif_data[vif_data['VIF'] >5]
vif_data

Unnamed: 0,Feature,VIF
0,const,2365.86418
7,BsmtFinSF1,inf
8,BsmtFinSF2,inf
9,BsmtUnfSF,inf
10,TotalBsmtSF,inf
11,2ndFlrSF,7.235842
12,GrLivArea,10.783335
38,RoofMatl_CompShg,30.949624
42,RoofMatl_Tar&Grv,11.965147
43,RoofMatl_WdShake,8.143293


In [107]:
X_tr_sm.pop('BsmtFinSF1')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.937
Model:,OLS,Adj. R-squared:,0.933
Method:,Least Squares,F-statistic:,231.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:36,Log-Likelihood:,2221.7
No. Observations:,1021,AIC:,-4317.0
Df Residuals:,958,BIC:,-4007.0
Df Model:,62,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.1250,0.043,-26.067,0.000,-1.210,-1.040
LotArea,0.1620,0.027,6.022,0.000,0.109,0.215
OverallQual,0.0798,0.011,7.005,0.000,0.057,0.102
OverallCond,0.0639,0.009,7.462,0.000,0.047,0.081
YearBuilt,0.0896,0.009,10.113,0.000,0.072,0.107
YearRemodAdd,0.0156,0.004,3.791,0.000,0.008,0.024
MasVnrArea,0.0385,0.010,4.039,0.000,0.020,0.057
BsmtFinSF2,-0.0235,0.009,-2.686,0.007,-0.041,-0.006
BsmtUnfSF,-0.0430,0.008,-5.622,0.000,-0.058,-0.028

0,1,2,3
Omnibus:,232.545,Durbin-Watson:,1.99
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3228.067
Skew:,0.631,Prob(JB):,0.0
Kurtosis:,11.619,Cond. No.,1.08e+16


In [108]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

vif_data=pd.DataFrame()
vif_data['Feature'] = X_tr_sm.columns

vif_data['VIF']=[variance_inflation_factor(X_tr_sm.values,i) for i in range(len(X_tr_sm.columns))]
vif_data = vif_data[vif_data['VIF'] >5]
vif_data

Unnamed: 0,Feature,VIF
0,const,2365.86418
9,TotalBsmtSF,6.777707
10,2ndFlrSF,7.235842
11,GrLivArea,10.783335
37,RoofMatl_CompShg,30.949624
41,RoofMatl_Tar&Grv,11.965147
42,RoofMatl_WdShake,8.143293
43,RoofMatl_WdShngl,7.791651
50,BsmtQual_TA,6.164024
55,KitchenQual_Gd,6.702368


In [109]:

X_tr_sm.pop('RoofMatl_CompShg')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.881
Method:,Least Squares,F-statistic:,125.3
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:37,Log-Likelihood:,1926.6
No. Observations:,1021,AIC:,-3729.0
Df Residuals:,959,BIC:,-3424.0
Df Model:,61,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0914,0.028,-3.277,0.001,-0.146,-0.037
LotArea,0.0966,0.036,2.701,0.007,0.026,0.167
OverallQual,0.1172,0.015,7.765,0.000,0.088,0.147
OverallCond,0.0492,0.011,4.316,0.000,0.027,0.072
YearBuilt,0.0924,0.012,7.816,0.000,0.069,0.116
YearRemodAdd,0.0171,0.006,3.099,0.002,0.006,0.028
MasVnrArea,0.0253,0.013,1.993,0.047,0.000,0.050
BsmtFinSF2,0.0019,0.012,0.167,0.867,-0.021,0.025
BsmtUnfSF,-0.0056,0.010,-0.559,0.576,-0.025,0.014

0,1,2,3
Omnibus:,705.601,Durbin-Watson:,2.013
Prob(Omnibus):,0.0,Jarque-Bera (JB):,301112.986
Skew:,-1.919,Prob(JB):,0.0
Kurtosis:,87.044,Cond. No.,1.09e+16


In [110]:
X_tr_sm.pop('BsmtFinSF2')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,127.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:37,Log-Likelihood:,1926.6
No. Observations:,1021,AIC:,-3731.0
Df Residuals:,960,BIC:,-3431.0
Df Model:,60,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0912,0.028,-3.274,0.001,-0.146,-0.037
LotArea,0.0966,0.036,2.704,0.007,0.026,0.167
OverallQual,0.1172,0.015,7.767,0.000,0.088,0.147
OverallCond,0.0492,0.011,4.317,0.000,0.027,0.072
YearBuilt,0.0924,0.012,7.819,0.000,0.069,0.116
YearRemodAdd,0.0171,0.006,3.102,0.002,0.006,0.028
MasVnrArea,0.0251,0.013,1.987,0.047,0.000,0.050
BsmtUnfSF,-0.0060,0.010,-0.617,0.537,-0.025,0.013
TotalBsmtSF,0.1312,0.039,3.385,0.001,0.055,0.207

0,1,2,3
Omnibus:,707.338,Durbin-Watson:,2.012
Prob(Omnibus):,0.0,Jarque-Bera (JB):,302877.163
Skew:,-1.927,Prob(JB):,0.0
Kurtosis:,87.289,Cond. No.,1.09e+16


In [111]:
X_tr_sm.pop('RoofMatl_Tar&Grv')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,129.8
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:38,Log-Likelihood:,1926.6
No. Observations:,1021,AIC:,-3733.0
Df Residuals:,961,BIC:,-3438.0
Df Model:,59,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0912,0.028,-3.276,0.001,-0.146,-0.037
LotArea,0.0965,0.036,2.705,0.007,0.026,0.167
OverallQual,0.1172,0.015,7.774,0.000,0.088,0.147
OverallCond,0.0492,0.011,4.322,0.000,0.027,0.072
YearBuilt,0.0924,0.012,7.822,0.000,0.069,0.116
YearRemodAdd,0.0171,0.006,3.103,0.002,0.006,0.028
MasVnrArea,0.0251,0.013,1.987,0.047,0.000,0.050
BsmtUnfSF,-0.0060,0.010,-0.617,0.537,-0.025,0.013
TotalBsmtSF,0.1312,0.039,3.387,0.001,0.055,0.207

0,1,2,3
Omnibus:,707.26,Durbin-Watson:,2.012
Prob(Omnibus):,0.0,Jarque-Bera (JB):,302784.663
Skew:,-1.926,Prob(JB):,0.0
Kurtosis:,87.276,Cond. No.,1.09e+16


In [112]:
X_tr_sm.pop('LandSlope_Sev')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,132.1
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:38,Log-Likelihood:,1926.6
No. Observations:,1021,AIC:,-3735.0
Df Residuals:,962,BIC:,-3444.0
Df Model:,58,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0912,0.028,-3.279,0.001,-0.146,-0.037
LotArea,0.0944,0.030,3.198,0.001,0.036,0.152
OverallQual,0.1171,0.015,7.778,0.000,0.088,0.147
OverallCond,0.0492,0.011,4.326,0.000,0.027,0.072
YearBuilt,0.0924,0.012,7.833,0.000,0.069,0.116
YearRemodAdd,0.0171,0.005,3.103,0.002,0.006,0.028
MasVnrArea,0.0251,0.013,1.989,0.047,0.000,0.050
BsmtUnfSF,-0.0060,0.010,-0.618,0.537,-0.025,0.013
TotalBsmtSF,0.1310,0.039,3.387,0.001,0.055,0.207

0,1,2,3
Omnibus:,706.611,Durbin-Watson:,2.012
Prob(Omnibus):,0.0,Jarque-Bera (JB):,301824.004
Skew:,-1.924,Prob(JB):,0.0
Kurtosis:,87.143,Cond. No.,1.09e+16


In [113]:
X_tr_sm.pop('RoofMatl_Metal')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,134.6
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:38,Log-Likelihood:,1926.6
No. Observations:,1021,AIC:,-3737.0
Df Residuals:,963,BIC:,-3451.0
Df Model:,57,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0913,0.028,-3.283,0.001,-0.146,-0.037
LotArea,0.0944,0.029,3.200,0.001,0.036,0.152
OverallQual,0.1172,0.015,7.792,0.000,0.088,0.147
OverallCond,0.0492,0.011,4.327,0.000,0.027,0.071
YearBuilt,0.0925,0.012,7.841,0.000,0.069,0.116
YearRemodAdd,0.0171,0.005,3.105,0.002,0.006,0.028
MasVnrArea,0.0251,0.013,1.988,0.047,0.000,0.050
BsmtUnfSF,-0.0060,0.010,-0.618,0.537,-0.025,0.013
TotalBsmtSF,0.1309,0.039,3.387,0.001,0.055,0.207

0,1,2,3
Omnibus:,706.48,Durbin-Watson:,2.012
Prob(Omnibus):,0.0,Jarque-Bera (JB):,301614.283
Skew:,-1.923,Prob(JB):,0.0
Kurtosis:,87.113,Cond. No.,1.09e+16


In [114]:
X_tr_sm.pop('RoofMatl_Roll')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,137.1
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:38,Log-Likelihood:,1926.6
No. Observations:,1021,AIC:,-3739.0
Df Residuals:,964,BIC:,-3458.0
Df Model:,56,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0914,0.028,-3.291,0.001,-0.146,-0.037
LotArea,0.0945,0.029,3.205,0.001,0.037,0.152
OverallQual,0.1172,0.015,7.794,0.000,0.088,0.147
OverallCond,0.0492,0.011,4.329,0.000,0.027,0.071
YearBuilt,0.0926,0.012,7.857,0.000,0.069,0.116
YearRemodAdd,0.0170,0.005,3.099,0.002,0.006,0.028
MasVnrArea,0.0254,0.013,2.022,0.043,0.001,0.050
BsmtUnfSF,-0.0060,0.010,-0.617,0.537,-0.025,0.013
TotalBsmtSF,0.1312,0.039,3.399,0.001,0.055,0.207

0,1,2,3
Omnibus:,706.848,Durbin-Watson:,2.012
Prob(Omnibus):,0.0,Jarque-Bera (JB):,301992.957
Skew:,-1.925,Prob(JB):,0.0
Kurtosis:,87.166,Cond. No.,1.09e+16


In [115]:
X_tr_sm.pop('RoofMatl_WdShake')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,139.7
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:38,Log-Likelihood:,1926.4
No. Observations:,1021,AIC:,-3741.0
Df Residuals:,965,BIC:,-3465.0
Df Model:,55,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0918,0.028,-3.306,0.001,-0.146,-0.037
LotArea,0.0945,0.029,3.206,0.001,0.037,0.152
OverallQual,0.1174,0.015,7.817,0.000,0.088,0.147
OverallCond,0.0495,0.011,4.365,0.000,0.027,0.072
YearBuilt,0.0926,0.012,7.860,0.000,0.069,0.116
YearRemodAdd,0.0168,0.005,3.071,0.002,0.006,0.028
MasVnrArea,0.0252,0.013,2.006,0.045,0.001,0.050
BsmtUnfSF,-0.0060,0.010,-0.615,0.539,-0.025,0.013
TotalBsmtSF,0.1284,0.038,3.357,0.001,0.053,0.203

0,1,2,3
Omnibus:,707.494,Durbin-Watson:,2.012
Prob(Omnibus):,0.0,Jarque-Bera (JB):,302731.089
Skew:,-1.928,Prob(JB):,0.0
Kurtosis:,87.269,Cond. No.,1.09e+16


In [116]:
X_tr_sm.pop('BsmtUnfSF')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,142.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:39,Log-Likelihood:,1926.2
No. Observations:,1021,AIC:,-3742.0
Df Residuals:,966,BIC:,-3471.0
Df Model:,54,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0922,0.028,-3.323,0.001,-0.147,-0.038
LotArea,0.0948,0.029,3.221,0.001,0.037,0.153
OverallQual,0.1166,0.015,7.797,0.000,0.087,0.146
OverallCond,0.0506,0.011,4.521,0.000,0.029,0.073
YearBuilt,0.0935,0.012,8.000,0.000,0.071,0.116
YearRemodAdd,0.0165,0.005,3.028,0.003,0.006,0.027
MasVnrArea,0.0257,0.013,2.056,0.040,0.001,0.050
TotalBsmtSF,0.1185,0.035,3.416,0.001,0.050,0.187
2ndFlrSF,0.0221,0.015,1.505,0.133,-0.007,0.051

0,1,2,3
Omnibus:,697.608,Durbin-Watson:,2.013
Prob(Omnibus):,0.0,Jarque-Bera (JB):,292094.969
Skew:,-1.881,Prob(JB):,0.0
Kurtosis:,85.776,Cond. No.,1.09e+16


In [117]:
X_tr_sm.pop('HouseStyle_1.5Unf')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,145.1
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:39,Log-Likelihood:,1925.8
No. Observations:,1021,AIC:,-3744.0
Df Residuals:,967,BIC:,-3477.0
Df Model:,53,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0918,0.028,-3.310,0.001,-0.146,-0.037
LotArea,0.0953,0.029,3.239,0.001,0.038,0.153
OverallQual,0.1180,0.015,7.945,0.000,0.089,0.147
OverallCond,0.0506,0.011,4.522,0.000,0.029,0.073
YearBuilt,0.0931,0.012,7.975,0.000,0.070,0.116
YearRemodAdd,0.0162,0.005,2.977,0.003,0.006,0.027
MasVnrArea,0.0257,0.013,2.052,0.040,0.001,0.050
TotalBsmtSF,0.1191,0.035,3.434,0.001,0.051,0.187
2ndFlrSF,0.0221,0.015,1.507,0.132,-0.007,0.051

0,1,2,3
Omnibus:,694.617,Durbin-Watson:,2.011
Prob(Omnibus):,0.0,Jarque-Bera (JB):,289424.891
Skew:,-1.867,Prob(JB):,0.0
Kurtosis:,85.398,Cond. No.,1.09e+16


In [118]:
X_tr_sm.pop('Exterior1st_Plywood')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,147.9
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:39,Log-Likelihood:,1925.4
No. Observations:,1021,AIC:,-3745.0
Df Residuals:,968,BIC:,-3484.0
Df Model:,52,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0912,0.028,-3.288,0.001,-0.146,-0.037
LotArea,0.0930,0.029,3.172,0.002,0.035,0.151
OverallQual,0.1194,0.015,8.074,0.000,0.090,0.148
OverallCond,0.0503,0.011,4.498,0.000,0.028,0.072
YearBuilt,0.0927,0.012,7.948,0.000,0.070,0.116
YearRemodAdd,0.0166,0.005,3.061,0.002,0.006,0.027
MasVnrArea,0.0256,0.013,2.045,0.041,0.001,0.050
TotalBsmtSF,0.1233,0.034,3.586,0.000,0.056,0.191
2ndFlrSF,0.0248,0.014,1.719,0.086,-0.004,0.053

0,1,2,3
Omnibus:,688.353,Durbin-Watson:,2.011
Prob(Omnibus):,0.0,Jarque-Bera (JB):,282835.759
Skew:,-1.838,Prob(JB):,0.0
Kurtosis:,84.455,Cond. No.,1.09e+16


In [119]:

X_tr_sm.pop('Exterior2nd_Other')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,147.9
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:39,Log-Likelihood:,1925.4
No. Observations:,1021,AIC:,-3745.0
Df Residuals:,968,BIC:,-3484.0
Df Model:,52,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0912,0.028,-3.288,0.001,-0.146,-0.037
LotArea,0.0930,0.029,3.172,0.002,0.035,0.151
OverallQual,0.1194,0.015,8.074,0.000,0.090,0.148
OverallCond,0.0503,0.011,4.498,0.000,0.028,0.072
YearBuilt,0.0927,0.012,7.948,0.000,0.070,0.116
YearRemodAdd,0.0166,0.005,3.061,0.002,0.006,0.027
MasVnrArea,0.0256,0.013,2.045,0.041,0.001,0.050
TotalBsmtSF,0.1233,0.034,3.586,0.000,0.056,0.191
2ndFlrSF,0.0248,0.014,1.719,0.086,-0.004,0.053

0,1,2,3
Omnibus:,688.353,Durbin-Watson:,2.011
Prob(Omnibus):,0.0,Jarque-Bera (JB):,282835.759
Skew:,-1.838,Prob(JB):,0.0
Kurtosis:,84.455,Cond. No.,123.0


In [120]:
X_tr_sm.pop('BsmtCond_Po')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,150.8
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:39,Log-Likelihood:,1924.8
No. Observations:,1021,AIC:,-3746.0
Df Residuals:,969,BIC:,-3489.0
Df Model:,51,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0879,0.028,-3.192,0.001,-0.142,-0.034
LotArea,0.0926,0.029,3.159,0.002,0.035,0.150
OverallQual,0.1177,0.015,8.013,0.000,0.089,0.146
OverallCond,0.0492,0.011,4.418,0.000,0.027,0.071
YearBuilt,0.0927,0.012,7.946,0.000,0.070,0.116
YearRemodAdd,0.0167,0.005,3.085,0.002,0.006,0.027
MasVnrArea,0.0259,0.013,2.071,0.039,0.001,0.050
TotalBsmtSF,0.1226,0.034,3.567,0.000,0.055,0.190
2ndFlrSF,0.0244,0.014,1.692,0.091,-0.004,0.053

0,1,2,3
Omnibus:,688.594,Durbin-Watson:,2.012
Prob(Omnibus):,0.0,Jarque-Bera (JB):,282320.472
Skew:,-1.84,Prob(JB):,0.0
Kurtosis:,84.381,Cond. No.,123.0


In [121]:
X_tr_sm.pop('SaleType_Con')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,153.6
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:40,Log-Likelihood:,1923.7
No. Observations:,1021,AIC:,-3745.0
Df Residuals:,970,BIC:,-3494.0
Df Model:,50,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0883,0.028,-3.206,0.001,-0.142,-0.034
LotArea,0.0911,0.029,3.106,0.002,0.034,0.149
OverallQual,0.1181,0.015,8.037,0.000,0.089,0.147
OverallCond,0.0489,0.011,4.396,0.000,0.027,0.071
YearBuilt,0.0926,0.012,7.932,0.000,0.070,0.115
YearRemodAdd,0.0167,0.005,3.090,0.002,0.006,0.027
MasVnrArea,0.0254,0.013,2.028,0.043,0.001,0.050
TotalBsmtSF,0.1226,0.034,3.563,0.000,0.055,0.190
2ndFlrSF,0.0245,0.014,1.696,0.090,-0.004,0.053

0,1,2,3
Omnibus:,685.365,Durbin-Watson:,2.01
Prob(Omnibus):,0.0,Jarque-Bera (JB):,279150.797
Skew:,-1.825,Prob(JB):,0.0
Kurtosis:,83.923,Cond. No.,123.0


In [122]:
X_tr_sm.pop('Neighborhood_NPkVill')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,156.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:40,Log-Likelihood:,1922.5
No. Observations:,1021,AIC:,-3745.0
Df Residuals:,971,BIC:,-3498.0
Df Model:,49,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0890,0.028,-3.231,0.001,-0.143,-0.035
LotArea,0.0907,0.029,3.091,0.002,0.033,0.148
OverallQual,0.1184,0.015,8.055,0.000,0.090,0.147
OverallCond,0.0494,0.011,4.436,0.000,0.028,0.071
YearBuilt,0.0921,0.012,7.891,0.000,0.069,0.115
YearRemodAdd,0.0164,0.005,3.020,0.003,0.006,0.027
MasVnrArea,0.0240,0.012,1.922,0.055,-0.001,0.049
TotalBsmtSF,0.1249,0.034,3.631,0.000,0.057,0.192
2ndFlrSF,0.0248,0.014,1.715,0.087,-0.004,0.053

0,1,2,3
Omnibus:,685.551,Durbin-Watson:,2.01
Prob(Omnibus):,0.0,Jarque-Bera (JB):,277467.543
Skew:,-1.827,Prob(JB):,0.0
Kurtosis:,83.678,Cond. No.,123.0


In [123]:
X_tr_sm.pop('Neighborhood_BrkSide')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.887
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,159.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:40,Log-Likelihood:,1921.1
No. Observations:,1021,AIC:,-3744.0
Df Residuals:,972,BIC:,-3503.0
Df Model:,48,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0862,0.028,-3.130,0.002,-0.140,-0.032
LotArea,0.0892,0.029,3.039,0.002,0.032,0.147
OverallQual,0.1199,0.015,8.169,0.000,0.091,0.149
OverallCond,0.0502,0.011,4.506,0.000,0.028,0.072
YearBuilt,0.0894,0.012,7.735,0.000,0.067,0.112
YearRemodAdd,0.0160,0.005,2.950,0.003,0.005,0.027
MasVnrArea,0.0236,0.012,1.889,0.059,-0.001,0.048
TotalBsmtSF,0.1264,0.034,3.673,0.000,0.059,0.194
2ndFlrSF,0.0254,0.014,1.761,0.079,-0.003,0.054

0,1,2,3
Omnibus:,679.493,Durbin-Watson:,2.009
Prob(Omnibus):,0.0,Jarque-Bera (JB):,272558.685
Skew:,-1.798,Prob(JB):,0.0
Kurtosis:,82.962,Cond. No.,123.0


In [124]:
X_tr_sm.pop('RoofMatl_Membran')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.887
Model:,OLS,Adj. R-squared:,0.882
Method:,Least Squares,F-statistic:,162.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:40,Log-Likelihood:,1919.8
No. Observations:,1021,AIC:,-3744.0
Df Residuals:,973,BIC:,-3507.0
Df Model:,47,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0847,0.028,-3.077,0.002,-0.139,-0.031
LotArea,0.0924,0.029,3.153,0.002,0.035,0.150
OverallQual,0.1213,0.015,8.273,0.000,0.093,0.150
OverallCond,0.0512,0.011,4.607,0.000,0.029,0.073
YearBuilt,0.0885,0.012,7.663,0.000,0.066,0.111
YearRemodAdd,0.0163,0.005,3.001,0.003,0.006,0.027
MasVnrArea,0.0236,0.013,1.886,0.060,-0.001,0.048
TotalBsmtSF,0.1269,0.034,3.686,0.000,0.059,0.194
2ndFlrSF,0.0257,0.014,1.775,0.076,-0.003,0.054

0,1,2,3
Omnibus:,679.266,Durbin-Watson:,2.007
Prob(Omnibus):,0.0,Jarque-Bera (JB):,270888.011
Skew:,-1.799,Prob(JB):,0.0
Kurtosis:,82.716,Cond. No.,123.0


In [125]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

vif_data=pd.DataFrame()
vif_data['Feature'] = X_tr_sm.columns

vif_data['VIF']=[variance_inflation_factor(X_tr_sm.values,i) for i in range(len(X_tr_sm.columns))]
vif_data = vif_data[vif_data['VIF'] >5]
vif_data


Unnamed: 0,Feature,VIF
0,const,541.158274
8,2ndFlrSF,6.626523
9,GrLivArea,9.530158
36,BsmtQual_TA,5.993641
40,KitchenQual_Gd,6.653782
41,KitchenQual_TA,8.92855


In [126]:
X_tr_sm.pop('GrLivArea')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.874
Model:,OLS,Adj. R-squared:,0.868
Method:,Least Squares,F-statistic:,146.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:41,Log-Likelihood:,1862.9
No. Observations:,1021,AIC:,-3632.0
Df Residuals:,974,BIC:,-3400.0
Df Model:,46,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0647,0.029,-2.229,0.026,-0.122,-0.008
LotArea,0.1238,0.031,4.018,0.000,0.063,0.184
OverallQual,0.1449,0.015,9.456,0.000,0.115,0.175
OverallCond,0.0499,0.012,4.249,0.000,0.027,0.073
YearBuilt,0.0705,0.012,5.836,0.000,0.047,0.094
YearRemodAdd,0.0187,0.006,3.264,0.001,0.007,0.030
MasVnrArea,0.0368,0.013,2.799,0.005,0.011,0.063
TotalBsmtSF,0.3518,0.029,12.199,0.000,0.295,0.408
2ndFlrSF,0.1314,0.011,11.793,0.000,0.110,0.153

0,1,2,3
Omnibus:,578.023,Durbin-Watson:,1.959
Prob(Omnibus):,0.0,Jarque-Bera (JB):,166671.888
Skew:,-1.374,Prob(JB):,0.0
Kurtosis:,65.532,Cond. No.,122.0


In [127]:
X_tr_sm.pop('HouseStyle_2.5Fin')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.874
Model:,OLS,Adj. R-squared:,0.868
Method:,Least Squares,F-statistic:,149.8
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:41,Log-Likelihood:,1862.9
No. Observations:,1021,AIC:,-3634.0
Df Residuals:,975,BIC:,-3407.0
Df Model:,45,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0646,0.029,-2.228,0.026,-0.122,-0.008
LotArea,0.1240,0.031,4.026,0.000,0.064,0.184
OverallQual,0.1450,0.015,9.474,0.000,0.115,0.175
OverallCond,0.0499,0.012,4.249,0.000,0.027,0.073
YearBuilt,0.0710,0.012,5.922,0.000,0.047,0.094
YearRemodAdd,0.0185,0.006,3.250,0.001,0.007,0.030
MasVnrArea,0.0370,0.013,2.819,0.005,0.011,0.063
TotalBsmtSF,0.3509,0.029,12.225,0.000,0.295,0.407
2ndFlrSF,0.1310,0.011,11.851,0.000,0.109,0.153

0,1,2,3
Omnibus:,577.086,Durbin-Watson:,1.959
Prob(Omnibus):,0.0,Jarque-Bera (JB):,166273.213
Skew:,-1.37,Prob(JB):,0.0
Kurtosis:,65.458,Cond. No.,122.0


In [128]:
X_tr_sm.pop('BedroomAbvGr')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.874
Model:,OLS,Adj. R-squared:,0.868
Method:,Least Squares,F-statistic:,153.3
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:41,Log-Likelihood:,1862.7
No. Observations:,1021,AIC:,-3635.0
Df Residuals:,976,BIC:,-3414.0
Df Model:,44,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0639,0.029,-2.205,0.028,-0.121,-0.007
LotArea,0.1241,0.031,4.032,0.000,0.064,0.185
OverallQual,0.1449,0.015,9.469,0.000,0.115,0.175
OverallCond,0.0502,0.012,4.281,0.000,0.027,0.073
YearBuilt,0.0720,0.012,6.073,0.000,0.049,0.095
YearRemodAdd,0.0184,0.006,3.232,0.001,0.007,0.030
MasVnrArea,0.0373,0.013,2.844,0.005,0.012,0.063
TotalBsmtSF,0.3548,0.028,12.745,0.000,0.300,0.409
2ndFlrSF,0.1338,0.010,13.635,0.000,0.115,0.153

0,1,2,3
Omnibus:,582.64,Durbin-Watson:,1.961
Prob(Omnibus):,0.0,Jarque-Bera (JB):,169182.01
Skew:,-1.395,Prob(JB):,0.0
Kurtosis:,66.001,Cond. No.,120.0


In [129]:

X_tr_sm.pop('Heating_OthW')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.873
Model:,OLS,Adj. R-squared:,0.868
Method:,Least Squares,F-statistic:,156.8
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:41,Log-Likelihood:,1862.1
No. Observations:,1021,AIC:,-3636.0
Df Residuals:,977,BIC:,-3419.0
Df Model:,43,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0649,0.029,-2.240,0.025,-0.122,-0.008
LotArea,0.1246,0.031,4.048,0.000,0.064,0.185
OverallQual,0.1428,0.015,9.405,0.000,0.113,0.173
OverallCond,0.0516,0.012,4.430,0.000,0.029,0.075
YearBuilt,0.0741,0.012,6.333,0.000,0.051,0.097
YearRemodAdd,0.0179,0.006,3.152,0.002,0.007,0.029
MasVnrArea,0.0373,0.013,2.845,0.005,0.012,0.063
TotalBsmtSF,0.3547,0.028,12.740,0.000,0.300,0.409
2ndFlrSF,0.1334,0.010,13.599,0.000,0.114,0.153

0,1,2,3
Omnibus:,583.995,Durbin-Watson:,1.964
Prob(Omnibus):,0.0,Jarque-Bera (JB):,169350.911
Skew:,-1.402,Prob(JB):,0.0
Kurtosis:,66.031,Cond. No.,120.0


In [130]:
X_tr_sm.pop('Functional_Min1')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.873
Model:,OLS,Adj. R-squared:,0.868
Method:,Least Squares,F-statistic:,160.5
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:41,Log-Likelihood:,1861.5
No. Observations:,1021,AIC:,-3637.0
Df Residuals:,978,BIC:,-3425.0
Df Model:,42,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0587,0.028,-2.066,0.039,-0.115,-0.003
LotArea,0.1246,0.031,4.047,0.000,0.064,0.185
OverallQual,0.1428,0.015,9.401,0.000,0.113,0.173
OverallCond,0.0531,0.012,4.586,0.000,0.030,0.076
YearBuilt,0.0744,0.012,6.362,0.000,0.051,0.097
YearRemodAdd,0.0178,0.006,3.127,0.002,0.007,0.029
MasVnrArea,0.0377,0.013,2.872,0.004,0.012,0.063
TotalBsmtSF,0.3529,0.028,12.696,0.000,0.298,0.407
2ndFlrSF,0.1329,0.010,13.560,0.000,0.114,0.152

0,1,2,3
Omnibus:,583.019,Durbin-Watson:,1.971
Prob(Omnibus):,0.0,Jarque-Bera (JB):,168066.291
Skew:,-1.399,Prob(JB):,0.0
Kurtosis:,65.792,Cond. No.,120.0


In [131]:
X_tr_sm.pop('HalfBath')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.873
Model:,OLS,Adj. R-squared:,0.868
Method:,Least Squares,F-statistic:,164.3
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:42,Log-Likelihood:,1860.6
No. Observations:,1021,AIC:,-3637.0
Df Residuals:,979,BIC:,-3430.0
Df Model:,41,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0565,0.028,-1.989,0.047,-0.112,-0.001
LotArea,0.1251,0.031,4.063,0.000,0.065,0.186
OverallQual,0.1439,0.015,9.491,0.000,0.114,0.174
OverallCond,0.0534,0.012,4.613,0.000,0.031,0.076
YearBuilt,0.0707,0.011,6.241,0.000,0.048,0.093
YearRemodAdd,0.0178,0.006,3.130,0.002,0.007,0.029
MasVnrArea,0.0373,0.013,2.845,0.005,0.012,0.063
TotalBsmtSF,0.3522,0.028,12.668,0.000,0.298,0.407
2ndFlrSF,0.1258,0.008,15.625,0.000,0.110,0.142

0,1,2,3
Omnibus:,584.973,Durbin-Watson:,1.973
Prob(Omnibus):,0.0,Jarque-Bera (JB):,170052.82
Skew:,-1.406,Prob(JB):,0.0
Kurtosis:,66.162,Cond. No.,119.0


In [132]:
X_tr_sm.pop('Functional_Typ')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.873
Model:,OLS,Adj. R-squared:,0.868
Method:,Least Squares,F-statistic:,168.2
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:42,Log-Likelihood:,1859.6
No. Observations:,1021,AIC:,-3637.0
Df Residuals:,980,BIC:,-3435.0
Df Model:,40,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0502,0.028,-1.792,0.074,-0.105,0.005
LotArea,0.1247,0.031,4.047,0.000,0.064,0.185
OverallQual,0.1453,0.015,9.598,0.000,0.116,0.175
OverallCond,0.0559,0.011,4.888,0.000,0.033,0.078
YearBuilt,0.0723,0.011,6.423,0.000,0.050,0.094
YearRemodAdd,0.0171,0.006,3.017,0.003,0.006,0.028
MasVnrArea,0.0378,0.013,2.885,0.004,0.012,0.064
TotalBsmtSF,0.3496,0.028,12.598,0.000,0.295,0.404
2ndFlrSF,0.1256,0.008,15.597,0.000,0.110,0.141

0,1,2,3
Omnibus:,583.706,Durbin-Watson:,1.975
Prob(Omnibus):,0.0,Jarque-Bera (JB):,166807.873
Skew:,-1.404,Prob(JB):,0.0
Kurtosis:,65.555,Cond. No.,110.0


In [133]:


X_tr_sm.pop('Functional_Min2')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.873
Model:,OLS,Adj. R-squared:,0.868
Method:,Least Squares,F-statistic:,172.6
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:42,Log-Likelihood:,1859.5
No. Observations:,1021,AIC:,-3639.0
Df Residuals:,981,BIC:,-3442.0
Df Model:,39,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0494,0.028,-1.766,0.078,-0.104,0.006
LotArea,0.1247,0.031,4.050,0.000,0.064,0.185
OverallQual,0.1446,0.015,9.609,0.000,0.115,0.174
OverallCond,0.0556,0.011,4.869,0.000,0.033,0.078
YearBuilt,0.0719,0.011,6.411,0.000,0.050,0.094
YearRemodAdd,0.0173,0.006,3.061,0.002,0.006,0.028
MasVnrArea,0.0377,0.013,2.875,0.004,0.012,0.063
TotalBsmtSF,0.3495,0.028,12.601,0.000,0.295,0.404
2ndFlrSF,0.1258,0.008,15.642,0.000,0.110,0.142

0,1,2,3
Omnibus:,582.956,Durbin-Watson:,1.976
Prob(Omnibus):,0.0,Jarque-Bera (JB):,166479.07
Skew:,-1.401,Prob(JB):,0.0
Kurtosis:,65.494,Cond. No.,110.0


In [134]:
X_tr_sm.pop('SaleType_New')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.873
Model:,OLS,Adj. R-squared:,0.868
Method:,Least Squares,F-statistic:,176.9
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:42,Log-Likelihood:,1858.4
No. Observations:,1021,AIC:,-3639.0
Df Residuals:,982,BIC:,-3447.0
Df Model:,38,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0480,0.028,-1.717,0.086,-0.103,0.007
LotArea,0.1254,0.031,4.072,0.000,0.065,0.186
OverallQual,0.1442,0.015,9.578,0.000,0.115,0.174
OverallCond,0.0548,0.011,4.804,0.000,0.032,0.077
YearBuilt,0.0733,0.011,6.563,0.000,0.051,0.095
YearRemodAdd,0.0181,0.006,3.214,0.001,0.007,0.029
MasVnrArea,0.0373,0.013,2.843,0.005,0.012,0.063
TotalBsmtSF,0.3534,0.028,12.794,0.000,0.299,0.408
2ndFlrSF,0.1255,0.008,15.601,0.000,0.110,0.141

0,1,2,3
Omnibus:,571.62,Durbin-Watson:,1.98
Prob(Omnibus):,0.0,Jarque-Bera (JB):,159286.911
Skew:,-1.352,Prob(JB):,0.0
Kurtosis:,64.131,Cond. No.,110.0


In [135]:
vif_data=pd.DataFrame()
vif_data['Feature'] = X_tr_sm.columns

vif_data['VIF']=[variance_inflation_factor(X_tr_sm.values,i) for i in range(len(X_tr_sm.columns))]
vif_data = vif_data[vif_data['VIF'] >5]
vif_data


Unnamed: 0,Feature,VIF
0,const,499.464579
32,BsmtQual_TA,5.28733
35,KitchenQual_Gd,6.503783
36,KitchenQual_TA,8.706583


In [136]:
X_tr_sm.pop('KitchenQual_TA')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.866
Model:,OLS,Adj. R-squared:,0.861
Method:,Least Squares,F-statistic:,172.2
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:43,Log-Likelihood:,1834.1
No. Observations:,1021,AIC:,-3592.0
Df Residuals:,983,BIC:,-3405.0
Df Model:,37,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1075,0.027,-3.948,0.000,-0.161,-0.054
LotArea,0.1153,0.031,3.662,0.000,0.054,0.177
OverallQual,0.1661,0.015,11.031,0.000,0.137,0.196
OverallCond,0.0573,0.012,4.909,0.000,0.034,0.080
YearBuilt,0.0736,0.011,6.439,0.000,0.051,0.096
YearRemodAdd,0.0248,0.006,4.383,0.000,0.014,0.036
MasVnrArea,0.0358,0.013,2.672,0.008,0.010,0.062
TotalBsmtSF,0.3814,0.028,13.638,0.000,0.327,0.436
2ndFlrSF,0.1298,0.008,15.820,0.000,0.114,0.146

0,1,2,3
Omnibus:,532.77,Durbin-Watson:,1.994
Prob(Omnibus):,0.0,Jarque-Bera (JB):,144883.945
Skew:,-1.17,Prob(JB):,0.0
Kurtosis:,61.311,Cond. No.,108.0


In [137]:
X_tr_sm.pop('KitchenQual_Fa')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.866
Model:,OLS,Adj. R-squared:,0.861
Method:,Least Squares,F-statistic:,177.1
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:43,Log-Likelihood:,1834.0
No. Observations:,1021,AIC:,-3594.0
Df Residuals:,984,BIC:,-3412.0
Df Model:,36,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1051,0.027,-3.909,0.000,-0.158,-0.052
LotArea,0.1149,0.031,3.651,0.000,0.053,0.177
OverallQual,0.1659,0.015,11.025,0.000,0.136,0.195
OverallCond,0.0569,0.012,4.885,0.000,0.034,0.080
YearBuilt,0.0730,0.011,6.418,0.000,0.051,0.095
YearRemodAdd,0.0247,0.006,4.360,0.000,0.014,0.036
MasVnrArea,0.0357,0.013,2.665,0.008,0.009,0.062
TotalBsmtSF,0.3819,0.028,13.667,0.000,0.327,0.437
2ndFlrSF,0.1299,0.008,15.841,0.000,0.114,0.146

0,1,2,3
Omnibus:,531.692,Durbin-Watson:,1.994
Prob(Omnibus):,0.0,Jarque-Bera (JB):,144312.215
Skew:,-1.165,Prob(JB):,0.0
Kurtosis:,61.196,Cond. No.,108.0


In [138]:
vif_data=pd.DataFrame()
vif_data['Feature'] = X_tr_sm.columns

vif_data['VIF']=[variance_inflation_factor(X_tr_sm.values,i) for i in range(len(X_tr_sm.columns))]
vif_data = vif_data[vif_data['VIF'] >5]
vif_data

Unnamed: 0,Feature,VIF
0,const,441.253458
32,BsmtQual_TA,5.069134


In [139]:
X_tr_sm.pop('BsmtQual_TA')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.854
Model:,OLS,Adj. R-squared:,0.848
Method:,Least Squares,F-statistic:,164.2
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:43,Log-Likelihood:,1788.0
No. Observations:,1021,AIC:,-3504.0
Df Residuals:,985,BIC:,-3326.0
Df Model:,35,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1860,0.027,-6.968,0.000,-0.238,-0.134
LotArea,0.1226,0.033,3.725,0.000,0.058,0.187
OverallQual,0.1930,0.015,12.482,0.000,0.163,0.223
OverallCond,0.0397,0.012,3.297,0.001,0.016,0.063
YearBuilt,0.0997,0.012,8.651,0.000,0.077,0.122
YearRemodAdd,0.0317,0.006,5.399,0.000,0.020,0.043
MasVnrArea,0.0398,0.014,2.841,0.005,0.012,0.067
TotalBsmtSF,0.3065,0.028,10.928,0.000,0.251,0.362
2ndFlrSF,0.1224,0.009,14.341,0.000,0.106,0.139

0,1,2,3
Omnibus:,364.324,Durbin-Watson:,2.045
Prob(Omnibus):,0.0,Jarque-Bera (JB):,73371.529
Skew:,-0.355,Prob(JB):,0.0
Kurtosis:,44.523,Cond. No.,105.0


In [140]:
X_tr_sm.pop('PoolQC_Fa')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.854
Model:,OLS,Adj. R-squared:,0.848
Method:,Least Squares,F-statistic:,169.0
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:43,Log-Likelihood:,1787.4
No. Observations:,1021,AIC:,-3505.0
Df Residuals:,986,BIC:,-3332.0
Df Model:,34,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1858,0.027,-6.960,0.000,-0.238,-0.133
LotArea,0.1215,0.033,3.695,0.000,0.057,0.186
OverallQual,0.1930,0.015,12.487,0.000,0.163,0.223
OverallCond,0.0399,0.012,3.318,0.001,0.016,0.063
YearBuilt,0.1000,0.012,8.681,0.000,0.077,0.123
YearRemodAdd,0.0317,0.006,5.400,0.000,0.020,0.043
MasVnrArea,0.0407,0.014,2.911,0.004,0.013,0.068
TotalBsmtSF,0.3012,0.028,10.939,0.000,0.247,0.355
2ndFlrSF,0.1211,0.008,14.358,0.000,0.105,0.138

0,1,2,3
Omnibus:,396.649,Durbin-Watson:,2.045
Prob(Omnibus):,0.0,Jarque-Bera (JB):,75636.235
Skew:,-0.585,Prob(JB):,0.0
Kurtosis:,45.149,Cond. No.,76.5


In [141]:
X_tr_sm.pop('BsmtQual_Fa')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.853
Model:,OLS,Adj. R-squared:,0.849
Method:,Least Squares,F-statistic:,174.2
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:43,Log-Likelihood:,1787.4
No. Observations:,1021,AIC:,-3507.0
Df Residuals:,987,BIC:,-3339.0
Df Model:,33,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1851,0.027,-6.951,0.000,-0.237,-0.133
LotArea,0.1213,0.033,3.691,0.000,0.057,0.186
OverallQual,0.1930,0.015,12.492,0.000,0.163,0.223
OverallCond,0.0397,0.012,3.306,0.001,0.016,0.063
YearBuilt,0.0991,0.011,8.791,0.000,0.077,0.121
YearRemodAdd,0.0318,0.006,5.433,0.000,0.020,0.043
MasVnrArea,0.0406,0.014,2.904,0.004,0.013,0.068
TotalBsmtSF,0.3007,0.027,10.936,0.000,0.247,0.355
2ndFlrSF,0.1211,0.008,14.365,0.000,0.105,0.138

0,1,2,3
Omnibus:,395.882,Durbin-Watson:,2.044
Prob(Omnibus):,0.0,Jarque-Bera (JB):,75361.728
Skew:,-0.582,Prob(JB):,0.0
Kurtosis:,45.073,Cond. No.,76.5


In [142]:
X_tr_sm.pop('KitchenAbvGr')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.853
Model:,OLS,Adj. R-squared:,0.849
Method:,Least Squares,F-statistic:,179.6
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:44,Log-Likelihood:,1786.8
No. Observations:,1021,AIC:,-3508.0
Df Residuals:,988,BIC:,-3345.0
Df Model:,32,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1945,0.025,-7.762,0.000,-0.244,-0.145
LotArea,0.1224,0.033,3.725,0.000,0.058,0.187
OverallQual,0.1953,0.015,12.764,0.000,0.165,0.225
OverallCond,0.0414,0.012,3.479,0.001,0.018,0.065
YearBuilt,0.1001,0.011,8.925,0.000,0.078,0.122
YearRemodAdd,0.0317,0.006,5.412,0.000,0.020,0.043
MasVnrArea,0.0400,0.014,2.865,0.004,0.013,0.067
TotalBsmtSF,0.2993,0.027,10.897,0.000,0.245,0.353
2ndFlrSF,0.1199,0.008,14.363,0.000,0.103,0.136

0,1,2,3
Omnibus:,394.206,Durbin-Watson:,2.043
Prob(Omnibus):,0.0,Jarque-Bera (JB):,74388.158
Skew:,-0.575,Prob(JB):,0.0
Kurtosis:,44.8,Cond. No.,75.6


In [143]:
X_tr_sm.pop('PoolArea')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.853
Model:,OLS,Adj. R-squared:,0.849
Method:,Least Squares,F-statistic:,185.4
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:44,Log-Likelihood:,1786.2
No. Observations:,1021,AIC:,-3508.0
Df Residuals:,989,BIC:,-3351.0
Df Model:,31,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1937,0.025,-7.732,0.000,-0.243,-0.145
LotArea,0.1208,0.033,3.679,0.000,0.056,0.185
OverallQual,0.1950,0.015,12.748,0.000,0.165,0.225
OverallCond,0.0418,0.012,3.514,0.000,0.018,0.065
YearBuilt,0.1005,0.011,8.959,0.000,0.078,0.123
YearRemodAdd,0.0315,0.006,5.386,0.000,0.020,0.043
MasVnrArea,0.0411,0.014,2.946,0.003,0.014,0.068
TotalBsmtSF,0.2928,0.027,10.905,0.000,0.240,0.345
2ndFlrSF,0.1185,0.008,14.350,0.000,0.102,0.135

0,1,2,3
Omnibus:,452.89,Durbin-Watson:,2.044
Prob(Omnibus):,0.0,Jarque-Bera (JB):,79698.354
Skew:,-0.901,Prob(JB):,0.0
Kurtosis:,46.246,Cond. No.,75.1


In [144]:
X_tr_sm.pop('Functional_Sev')
model1 = sm.OLS(y_tr,X_tr_sm)
res1 = model1.fit()
res1.summary()

0,1,2,3
Dep. Variable:,SalePrice,R-squared:,0.853
Model:,OLS,Adj. R-squared:,0.848
Method:,Least Squares,F-statistic:,191.1
Date:,"Sun, 21 Jan 2024",Prob (F-statistic):,0.0
Time:,21:13:44,Log-Likelihood:,1784.8
No. Observations:,1021,AIC:,-3508.0
Df Residuals:,990,BIC:,-3355.0
Df Model:,30,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1936,0.025,-7.721,0.000,-0.243,-0.144
LotArea,0.1196,0.033,3.640,0.000,0.055,0.184
OverallQual,0.1953,0.015,12.758,0.000,0.165,0.225
OverallCond,0.0420,0.012,3.528,0.000,0.019,0.065
YearBuilt,0.1008,0.011,8.982,0.000,0.079,0.123
YearRemodAdd,0.0315,0.006,5.377,0.000,0.020,0.043
MasVnrArea,0.0410,0.014,2.939,0.003,0.014,0.068
TotalBsmtSF,0.2922,0.027,10.875,0.000,0.239,0.345
2ndFlrSF,0.1180,0.008,14.286,0.000,0.102,0.134

0,1,2,3
Omnibus:,449.233,Durbin-Watson:,2.046
Prob(Omnibus):,0.0,Jarque-Bera (JB):,78192.149
Skew:,-0.886,Prob(JB):,0.0
Kurtosis:,45.835,Cond. No.,75.1


In [145]:

vif_data=pd.DataFrame()
vif_data['Feature'] = X_tr_sm.columns

vif_data['VIF']=[variance_inflation_factor(X_tr_sm.values,i) for i in range(len(X_tr_sm.columns))]
vif_data = vif_data[vif_data['VIF'] >5]
vif_data

Unnamed: 0,Feature,VIF
0,const,350.518902


In [146]:
X_tr_sm.head()

Unnamed: 0,const,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,TotalBsmtSF,2ndFlrSF,BsmtFullBath,...,Condition2_PosA,Condition2_PosN,BldgType_Twnhs,BldgType_TwnhsE,RoofMatl_WdShngl,Exterior1st_BrkFace,Exterior2nd_CmentBd,BsmtQual_Gd,BsmtExposure_Gd,KitchenQual_Gd
318,1.0,0.040197,0.666667,0.5,0.876812,0.716667,0.16,0.220458,0.616949,0.333333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
239,1.0,0.03478,0.555556,0.375,0.528986,0.0,0.0,0.120295,0.333656,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
986,1.0,0.018743,0.555556,0.875,0.275362,0.883333,0.0,0.079378,0.307022,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1416,1.0,0.046928,0.333333,0.625,0.094203,0.0,0.0,0.127169,0.505569,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
390,1.0,0.033209,0.444444,0.875,0.202899,0.0,0.0,0.140917,0.19661,0.333333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [147]:
X_tr = X_tr_sm.pop('const')
X_tr = X_tr_sm
X_te = X_te[X_tr.columns]

In [148]:
#Linear Regression

lm = LinearRegression()

# Fit a line
lm.fit(X_tr, y_tr)
y_pred_train = lm.predict(X_tr)
y_pred_test = lm.predict(X_te)

metric = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric.append(mse_test_lr**0.5)

0.8527538537081485
0.8079629391635267
1.8121967627922027
1.046476172107447
0.0017749233719806098
0.0023837726016114964


In [149]:
from sklearn.model_selection import GridSearchCV

params = {'alpha': [0.0001, 0.001, 0.01, 0.05, 0.1, 
 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 2.0, 3.0, 
 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 20, 50, 100, 500, 1000 ]}

ridge = Ridge()

# cross validation
folds = 5
model_cv = GridSearchCV(estimator = ridge, 
                        param_grid = params, 
                        scoring= 'neg_mean_absolute_error',  
                        cv = folds, 
                        return_train_score=True,
                        verbose = 1)            
model_cv.fit(X_tr, y_tr) 

Fitting 5 folds for each of 28 candidates, totalling 140 fits


In [150]:
print(model_cv.best_params_)

{'alpha': 0.0001}


In [151]:
alpha = 0.0001
ridge = Ridge(alpha=alpha)

ridge.fit(X_tr, y_tr)
print(ridge.coef_)

[ 0.11955177  0.1953491   0.04199893  0.10082208  0.03147895  0.0409893
  0.29216545  0.11799563  0.04163408  0.07153686  0.03174846  0.04780743
  0.01817156  0.05742918 -0.02186065  0.08235746  0.06637786  0.02663451
  0.05556421  0.01478561  0.09241723 -0.40172057 -0.06846385 -0.03136015
  0.11983852  0.05333309  0.03026974 -0.02487368  0.03460425 -0.00865652]


In [152]:

# Lets calculate some metrics such as R2 score, RSS and RMSE
y_test = y_te
y_train = y_tr
y_pred_train = ridge.predict(X_tr)
y_pred_test = ridge.predict(X_te)

metric2 = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric2.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric2.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric2.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric2.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric2.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric2.append(mse_test_lr**0.5)

0.8527538535153367
0.8079685442415017
1.8121967651651887
1.0464456281045447
0.0017749233743047882
0.0023837030252950903


In [153]:
lasso = Lasso()

# cross validation
model_cv = GridSearchCV(estimator = lasso, 
                        param_grid = params, 
                        scoring= 'neg_mean_absolute_error', 
                        cv = folds, 
                        return_train_score=True,
                        verbose = 1)            

model_cv.fit(X_tr, y_tr) 
print(model_cv.best_params_)

Fitting 5 folds for each of 28 candidates, totalling 140 fits
{'alpha': 0.0001}


In [154]:
alpha = 0.0001
lasso = Lasso(alpha=alpha)

lasso.fit(X_tr, y_tr)
print(lasso.coef_)

[ 0.06800817  0.20890446  0.03104815  0.08801348  0.03249014  0.03600423
  0.27120286  0.11138906  0.04033693  0.0774634   0.03134977  0.01370823
  0.01860365  0.05343982 -0.01795687  0.08135871  0.06379272  0.02241813
  0.04444355  0.01375866  0.         -0.28033244 -0.06469419 -0.02957078
  0.10023379  0.05060388  0.02509769 -0.02339904  0.0355977  -0.00668081]


In [155]:
# Lets calculate some metrics such as R2 score, RSS and RMSE

y_pred_train = lasso.predict(X_tr)
y_pred_test = lasso.predict(X_te)

metric3 = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric3.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric3.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric3.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric3.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric3.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric3.append(mse_test_lr**0.5)

finalMetric = metric3

0.8495729001133265
0.8164949358639633
1.851345589109891
0.9999823796664704
0.001813266982477856
0.002277864190584215


In [156]:
# Creating a table which contain all the metrics

lr_table = {'Metric': ['R2 Score (Train)','R2 Score (Test)','RSS (Train)','RSS (Test)',
                       'MSE (Train)','MSE (Test)'], 
        'Linear Regression': metric
        }

lr_metric = pd.DataFrame(lr_table ,columns = ['Metric', 'Linear Regression'] )

rg_metric = pd.Series(metric2, name = 'Ridge Regression')
ls_metric = pd.Series(metric3, name = 'Lasso Regression')

final_metric = pd.concat([lr_metric, rg_metric, ls_metric], axis = 1)

final_metric

betas = pd.DataFrame(index=X_tr.columns)
betas.rows = X_tr.columns

betas['Linear'] = lm.coef_
betas['Ridge'] = ridge.coef_
betas['Lasso'] = lasso.coef_

lasso_final = lasso
X_tr_final = X_tr
X_te_final = X_te

pd.set_option('display.max_rows', None)
betas.head(68)

Unnamed: 0,Linear,Ridge,Lasso
LotArea,0.119557,0.119552,0.068008
OverallQual,0.195348,0.195349,0.208904
OverallCond,0.042,0.041999,0.031048
YearBuilt,0.100823,0.100822,0.088013
YearRemodAdd,0.031479,0.031479,0.03249
MasVnrArea,0.040989,0.040989,0.036004
TotalBsmtSF,0.292177,0.292165,0.271203
2ndFlrSF,0.117998,0.117996,0.111389
BsmtFullBath,0.041634,0.041634,0.040337
GarageArea,0.071535,0.071537,0.077463


In [157]:
lr_table = {'Metric': ['R2 Score (Train)','R2 Score (Test)','RSS (Train)','RSS (Test)',
                       'MSE (Train)','MSE (Test)'], 
        'Linear Regression': metric
        }

lr_metric = pd.DataFrame(lr_table ,columns = ['Metric', 'Linear Regression'] )

rg_metric = pd.Series(metric2, name = 'Ridge Regression')
ls_metric = pd.Series(metric3, name = 'Lasso Regression')

final_metric_all = pd.concat([lr_metric, rg_metric, ls_metric], axis = 1)

final_metric_all

Unnamed: 0,Metric,Linear Regression,Ridge Regression,Lasso Regression
0,R2 Score (Train),0.852754,0.852754,0.849573
1,R2 Score (Test),0.807963,0.807969,0.816495
2,RSS (Train),1.812197,1.812197,1.851346
3,RSS (Test),1.046476,1.046446,0.999982
4,MSE (Train),0.04213,0.04213,0.042582
5,MSE (Test),0.048824,0.048823,0.047727


In [158]:
alpha = 0.0001 * 2
ridge = Ridge(alpha=alpha)

ridge.fit(X_tr, y_tr)
print(ridge.coef_)

y_pred_train = ridge.predict(X_tr)
y_pred_test = ridge.predict(X_te)

metric2 = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric2.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric2.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric2.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric2.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric2.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric2.append(mse_test_lr**0.5)

[ 0.11954617  0.19535025  0.0419979   0.10082083  0.03147922  0.04098941
  0.29215382  0.11799354  0.04163456  0.0715384   0.03174913  0.04780573
  0.01817182  0.05742926 -0.02186055  0.08235816  0.06637838  0.02663451
  0.05556418  0.01478577  0.09240835 -0.40167427 -0.06846396 -0.03136039
  0.1198367   0.05333327  0.03026874 -0.0248734   0.03460442 -0.00865635]
0.8527538529370576
0.8079741471634462
1.81219677228222
1.0464150958505951
0.001774923381275436
0.0023836334757416744


In [159]:
alpha = 0.0001*2
lasso = Lasso(alpha=alpha)

lasso.fit(X_tr, y_tr)
print(lasso.coef_)

    
    
y_pred_train = lasso.predict(X_tr)
y_pred_test = lasso.predict(X_te)

metric3 = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric3.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric3.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric3.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric3.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric3.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric3.append(mse_test_lr**0.5)

[ 0.02328761  0.21864068  0.01925576  0.07831268  0.03320345  0.03165003
  0.24820434  0.10432207  0.039456    0.08156663  0.03162842  0.
  0.01892051  0.05037322 -0.01459698  0.08103832  0.06184936  0.01868675
  0.03426432  0.01316577  0.         -0.15787684 -0.06135285 -0.02811148
  0.08017276  0.04776383  0.01994737 -0.02186476  0.03691688 -0.00506139]
0.8426771248747098
0.8167186063716918
1.9362136951966544
0.9987635219328559
0.0018963895153738045
0.002275087749277576


In [160]:
betas = pd.DataFrame(index=X_tr.columns)
betas.rows = X_tr.columns

betas['Linear'] = lm.coef_
betas['Ridge'] = ridge.coef_
betas['Lasso'] = lasso.coef_

pd.set_option('display.max_rows', None)
betas.head(68)

Unnamed: 0,Linear,Ridge,Lasso
LotArea,0.119557,0.119546,0.023288
OverallQual,0.195348,0.19535,0.218641
OverallCond,0.042,0.041998,0.019256
YearBuilt,0.100823,0.100821,0.078313
YearRemodAdd,0.031479,0.031479,0.033203
MasVnrArea,0.040989,0.040989,0.03165
TotalBsmtSF,0.292177,0.292154,0.248204
2ndFlrSF,0.117998,0.117994,0.104322
BsmtFullBath,0.041634,0.041635,0.039456
GarageArea,0.071535,0.071538,0.081567


In [161]:
lr_table = {'Metric': ['R2 Score (Train)','R2 Score (Test)','RSS (Train)','RSS (Test)',
                       'MSE (Train)','MSE (Test)'], 
        'Linear Regression': metric
        }

lr_metric = pd.DataFrame(lr_table ,columns = ['Metric', 'Linear Regression'] )

rg_metric = pd.Series(metric2, name = 'Ridge Regression')
ls_metric = pd.Series(metric3, name = 'Lasso Regression')

final_metric = pd.concat([lr_metric, rg_metric, ls_metric], axis = 1)

final_metric

Unnamed: 0,Metric,Linear Regression,Ridge Regression,Lasso Regression
0,R2 Score (Train),0.852754,0.852754,0.842677
1,R2 Score (Test),0.807963,0.807974,0.816719
2,RSS (Train),1.812197,1.812197,1.936214
3,RSS (Test),1.046476,1.046415,0.998764
4,MSE (Train),0.04213,0.04213,0.043548
5,MSE (Test),0.048824,0.048822,0.047698


In [162]:
#Removing five most predictor variables from lasso Regresson
columns_rem = ['OverallQual','TotalBsmtSF','Condition2_PosN','2ndFlrSF','RoofMatl_WdShngl']
X_tr = X_tr.drop(columns_rem, axis=1)
X_te = X_te.drop(columns_rem, axis=1)

X_tr.head()
X_te.head()

Unnamed: 0,LotArea,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFullBath,GarageArea,WoodDeckSF,Street_Pave,LotConfig_CulDSac,...,Neighborhood_StoneBr,Condition1_Norm,Condition2_PosA,BldgType_Twnhs,BldgType_TwnhsE,Exterior1st_BrkFace,Exterior2nd_CmentBd,BsmtQual_Gd,BsmtExposure_Gd,KitchenQual_Gd
1436,0.035991,0.625,0.717391,0.35,0.0,0.0,0.372355,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
57,0.048354,0.5,0.956522,0.9,0.0,0.0,0.398449,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
780,0.030732,0.5,0.891304,0.766667,0.02375,0.0,0.283498,0.256709,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
382,0.037136,0.5,0.971014,0.933333,0.0,0.0,0.450635,0.168028,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
1170,0.040104,0.625,0.76087,0.45,0.0,0.333333,0.252468,0.236873,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [163]:
alpha = 0.0001
lassoNew = Lasso(alpha=alpha)

lassoNew.fit(X_tr, y_tr)
print(lassoNew.coef_)



y_pred_train = lassoNew.predict(X_tr)
y_pred_test = lassoNew.predict(X_te)

metric3 = []
r2_train_lr = r2_score(y_train, y_pred_train)
print(r2_train_lr)
metric3.append(r2_train_lr)

r2_test_lr = r2_score(y_test, y_pred_test)
print(r2_test_lr)
metric3.append(r2_test_lr)

rss1_lr = np.sum(np.square(y_train - y_pred_train))
print(rss1_lr)
metric3.append(rss1_lr)

rss2_lr = np.sum(np.square(y_test - y_pred_test))
print(rss2_lr)
metric3.append(rss2_lr)

mse_train_lr = mean_squared_error(y_train, y_pred_train)
print(mse_train_lr)
metric3.append(mse_train_lr**0.5)

mse_test_lr = mean_squared_error(y_test, y_pred_test)
print(mse_test_lr)
metric3.append(mse_test_lr**0.5)

[ 0.17348568  0.02639497  0.107466    0.06166547  0.11268112  0.02440733
  0.16046915  0.05978435  0.05642832  0.0172809   0.08722709 -0.03465625
  0.14195385  0.09922045  0.03713662  0.09668362  0.01046756  0.11615925
 -0.0713437  -0.04120801  0.05632881  0.02868995 -0.01442024  0.04801057
 -0.00152598]
0.736371212253173
0.6968267127231837
3.244548314267637
1.652095797408792
0.0031778142157371566
0.003763316167218205


In [164]:
betas = pd.DataFrame(index=X_tr.columns)
betas.rows = X_tr.columns

betas['LassoRemoved5Col'] = lassoNew.coef_

pd.set_option('display.max_rows', None)
betas.head(68)

Unnamed: 0,LassoRemoved5Col
LotArea,0.173486
OverallCond,0.026395
YearBuilt,0.107466
YearRemodAdd,0.061665
MasVnrArea,0.112681
BsmtFullBath,0.024407
GarageArea,0.160469
WoodDeckSF,0.059784
Street_Pave,0.056428
LotConfig_CulDSac,0.017281


In [165]:
# final co-efficients using Lasso method with Alpha = 0.0001 are below.
# R2 square are below
betas = pd.DataFrame(index=X_tr_final.columns)
betas.rows = X_tr_final.columns

betas['LassoRemoved5Col'] = lasso_final.coef_

pd.set_option('display.max_rows', None)
print(betas.head(68))
final_metric_all

                      LassoRemoved5Col
LotArea                       0.068008
OverallQual                   0.208904
OverallCond                   0.031048
YearBuilt                     0.088013
YearRemodAdd                  0.032490
MasVnrArea                    0.036004
TotalBsmtSF                   0.271203
2ndFlrSF                      0.111389
BsmtFullBath                  0.040337
GarageArea                    0.077463
WoodDeckSF                    0.031350
Street_Pave                   0.013708
LotConfig_CulDSac             0.018604
Neighborhood_Crawfor          0.053440
Neighborhood_Mitchel         -0.017957
Neighborhood_NoRidge          0.081359
Neighborhood_NridgHt          0.063793
Neighborhood_Somerst          0.022418
Neighborhood_StoneBr          0.044444
Condition1_Norm               0.013759
Condition2_PosA               0.000000
Condition2_PosN              -0.280332
BldgType_Twnhs               -0.064694
BldgType_TwnhsE              -0.029571
RoofMatl_WdShngl         

Unnamed: 0,Metric,Linear Regression,Ridge Regression,Lasso Regression
0,R2 Score (Train),0.852754,0.852754,0.849573
1,R2 Score (Test),0.807963,0.807969,0.816495
2,RSS (Train),1.812197,1.812197,1.851346
3,RSS (Test),1.046476,1.046446,0.999982
4,MSE (Train),0.04213,0.04213,0.042582
5,MSE (Test),0.048824,0.048823,0.047727
