# HOUSE PRICE - LINEAR REGRESSION

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import stats
import seaborn as sns
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import pearsonr

In [2]:
house=pd.read_csv("C:/Users/PRADEEP PRIYA/Downloads/HousePrices.csv")

In [3]:
house.columns

Index(['Id', 'Dwell_Type', 'Zone_Class', 'LotFrontage', 'LotArea', 'Road_Type',
       'Alley', 'Property_Shape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2',
       'Dwelling_Type', 'HouseStyle', 'OverallQual', 'OverallCond',
       'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st',
       'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual', 'ExterCond',
       'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',
       'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',
       'Heating', 'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF',
       '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath',
       'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCo

# CHECKING THE NULL VALUES AND REPLACING IT

In [4]:
null=house.isnull().sum()

In [5]:
null.tail(50)

BsmtCond                 59
BsmtExposure             61
BsmtFinType1             59
BsmtFinSF1                0
BsmtFinType2             60
BsmtFinSF2                0
BsmtUnfSF                 0
TotalBsmtSF               0
Heating                   0
HeatingQC                 0
CentralAir                0
Electrical                1
1stFlrSF                  0
2ndFlrSF                  0
LowQualFinSF              0
GrLivArea                 0
BsmtFullBath              0
BsmtHalfBath              0
FullBath                  0
HalfBath                  0
BedroomAbvGr              0
KitchenAbvGr              0
KitchenQual               0
TotRmsAbvGrd              0
Functional                0
Fireplaces                0
FireplaceQu             988
GarageType              113
GarageYrBlt             113
GarageFinish            113
GarageCars                0
GarageArea                0
GarageQual              113
GarageCond              113
PavedDrive                0
WoodDeckSF          

In [6]:
house.drop("Id",axis=1,inplace=True)

In [7]:
house.columns

Index(['Dwell_Type', 'Zone_Class', 'LotFrontage', 'LotArea', 'Road_Type',
       'Alley', 'Property_Shape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2',
       'Dwelling_Type', 'HouseStyle', 'OverallQual', 'OverallCond',
       'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st',
       'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual', 'ExterCond',
       'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',
       'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',
       'Heating', 'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF',
       '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath',
       'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond', '

In [8]:
house.LotFrontage.isnull().sum()

320

In [9]:
house.LotFrontage.isnull().mean()

0.15436565364206464

In [10]:
house.LotFrontage.describe()

count    1753.000000
mean       72.056475
std        27.987475
min        21.000000
25%        60.000000
50%        70.000000
75%        83.000000
max       313.000000
Name: LotFrontage, dtype: float64

In [11]:
house.LotFrontage.fillna(house.LotFrontage.mean(),inplace=True)

In [12]:
house.MasVnrType.describe()

count     2059
unique       4
top       None
freq      1233
Name: MasVnrType, dtype: object

In [13]:
house.MasVnrType.isnull().mean()

0.006753497346840328

In [14]:
house.MasVnrType.fillna("others",inplace=True)

In [15]:
house.MasVnrType.isnull().sum()

0

In [16]:
house.MasVnrArea.describe()

count    2059.000000
mean      101.875668
std       179.169217
min         0.000000
25%         0.000000
50%         0.000000
75%       161.500000
max      1600.000000
Name: MasVnrArea, dtype: float64

In [17]:
house.MasVnrArea.isnull().mean()

0.006753497346840328

In [18]:
house.MasVnrArea.fillna("Others",inplace=True)

In [19]:
house.MasVnrArea.isnull().sum()

0

In [20]:
house.BsmtQual.describe()

count     2014
unique       4
top         Gd
freq       904
Name: BsmtQual, dtype: object

In [21]:
house.BsmtQual.isnull().mean()

0.028461167390255667

In [22]:
house.BsmtQual.fillna("Others",inplace=True)

In [23]:
house.BsmtCond.describe()

count     2014
unique       4
top         TA
freq      1851
Name: BsmtCond, dtype: object

In [24]:
house.BsmtCond.isnull().mean()

0.028461167390255667

In [25]:
house.BsmtCond.fillna("Others",inplace=True)

In [26]:
house.BsmtExposure.describe()

count     2012
unique       4
top         No
freq      1349
Name: BsmtExposure, dtype: object

In [27]:
house.BsmtExposure.isnull().mean()

0.02942595272551857

In [28]:
house.BsmtExposure.fillna("Others",inplace=True)

In [29]:
house.BsmtFinType1.describe()

count     2014
unique       6
top        Unf
freq       604
Name: BsmtFinType1, dtype: object

In [30]:
house.BsmtFinType1.isnull().mean()

0.028461167390255667

In [31]:
house.BsmtFinType1.fillna("Others",inplace=True)

In [32]:
house.BsmtFinType2.describe()

count     2013
unique       6
top        Unf
freq      1770
Name: BsmtFinType2, dtype: object

In [33]:
house.BsmtFinType2.isnull().sum()

60

In [34]:
house.BsmtFinType2.fillna("Others",inplace=True)

In [35]:
house.Electrical.describe()

count      2072
unique        5
top       SBrkr
freq       1901
Name: Electrical, dtype: object

In [36]:
house.dropna(subset=["Electrical"],inplace=True)

In [37]:
house.Electrical.isnull().sum()

0

In [38]:
house.FireplaceQu.describe()

count     1085
unique       5
top         Gd
freq       530
Name: FireplaceQu, dtype: object

In [39]:
house.FireplaceQu.isnull().mean()

0.47635135135135137

In [40]:
house.FireplaceQu.fillna("Others",inplace=True)

In [41]:
house.GarageType.describe()

count       1959
unique         6
top       Attchd
freq        1252
Name: GarageType, dtype: object

In [42]:
house.GarageType.fillna("Others",inplace=True)

In [43]:
house.GarageYrBlt.fillna("Others",inplace=True)

In [44]:
house.GarageFinish.fillna("Others",inplace=True)

In [45]:
house.GarageQual.fillna("Others",inplace=True)

In [46]:
house.GarageCond.fillna("Others",inplace=True)

# CHANGING THE DATA TYPES

In [47]:
house.YearBuilt = house.YearBuilt.astype(str)

In [48]:
house.YearRemodAdd = house.YearRemodAdd.astype(str)

In [49]:
house.BedroomAbvGr = house.BedroomAbvGr.replace(8,6)
house.BedroomAbvGr = house.BedroomAbvGr.astype(str)

In [50]:
house.KitchenAbvGr = house.KitchenAbvGr.replace([3,0],2)

In [51]:
house.TotRmsAbvGrd = house.TotRmsAbvGrd.replace([2,14],12)
house.TotRmsAbvGrd = house.TotRmsAbvGrd.astype(str)

In [52]:
house.Fireplaces = house.Fireplaces.astype(str)

In [53]:
house.GarageCars = house.GarageCars.replace(4,0)
house.GarageCars = house.GarageCars.astype(str)

In [54]:
house.OverallQual = house.OverallQual.astype(str)

In [55]:
house.OverallCond = house.OverallCond.astype(str)

In [56]:
house.BsmtFullBath = house.BsmtFullBath.replace(3,2)
house.BsmtFullBath = house.BsmtFullBath.astype(str)

In [57]:
house.BsmtHalfBath = house.BsmtHalfBath.replace(2,1)

# DROPPING THE COLUMNS

In [58]:
house.drop("PoolQC",axis=1)

Unnamed: 0,Dwell_Type,Zone_Class,LotFrontage,LotArea,Road_Type,Alley,Property_Shape,LandContour,Utilities,LotConfig,...,ScreenPorch,PoolArea,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,Property_Sale_Price
0,60,RL,65.000000,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,0,,,0,2,2008,WD,Normal,208500
1,20,RL,80.000000,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,0,,,0,5,2007,WD,Normal,181500
2,60,RL,68.000000,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,0,9,2008,WD,Normal,223500
3,70,RL,60.000000,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,0,,,0,2,2006,WD,Abnorml,140000
4,60,RL,84.000000,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,0,,,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2068,60,RL,72.056475,8755,Pave,,IR1,Lvl,AllPub,FR2,...,0,0,GdPrv,,0,6,2009,WD,Normal,214000
2069,90,RL,63.000000,7711,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,0,8,2007,Oth,Abnorml,150000
2070,90,RL,313.000000,25000,Pave,,Reg,Low,AllPub,Inside,...,0,0,,,0,6,2007,WD,Normal,143000
2071,20,RL,52.000000,14375,Pave,,IR1,Lvl,NoSeWa,CulDSac,...,233,0,,,0,1,2009,COD,Abnorml,137500


In [59]:
house.PoolQC.isnull().sum()

2064

In [60]:
house.drop("Fence",axis=1)

Unnamed: 0,Dwell_Type,Zone_Class,LotFrontage,LotArea,Road_Type,Alley,Property_Shape,LandContour,Utilities,LotConfig,...,ScreenPorch,PoolArea,PoolQC,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,Property_Sale_Price
0,60,RL,65.000000,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,0,,,0,2,2008,WD,Normal,208500
1,20,RL,80.000000,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,0,,,0,5,2007,WD,Normal,181500
2,60,RL,68.000000,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,0,9,2008,WD,Normal,223500
3,70,RL,60.000000,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,0,,,0,2,2006,WD,Abnorml,140000
4,60,RL,84.000000,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,0,,,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2068,60,RL,72.056475,8755,Pave,,IR1,Lvl,AllPub,FR2,...,0,0,,,0,6,2009,WD,Normal,214000
2069,90,RL,63.000000,7711,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,0,8,2007,Oth,Abnorml,150000
2070,90,RL,313.000000,25000,Pave,,Reg,Low,AllPub,Inside,...,0,0,,,0,6,2007,WD,Normal,143000
2071,20,RL,52.000000,14375,Pave,,IR1,Lvl,NoSeWa,CulDSac,...,233,0,,,0,1,2009,COD,Abnorml,137500


In [61]:
house.drop("MiscFeature",axis=1)


Unnamed: 0,Dwell_Type,Zone_Class,LotFrontage,LotArea,Road_Type,Alley,Property_Shape,LandContour,Utilities,LotConfig,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscVal,MoSold,YrSold,SaleType,SaleCondition,Property_Sale_Price
0,60,RL,65.000000,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,0,,,0,2,2008,WD,Normal,208500
1,20,RL,80.000000,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,0,,,0,5,2007,WD,Normal,181500
2,60,RL,68.000000,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,0,9,2008,WD,Normal,223500
3,70,RL,60.000000,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,0,,,0,2,2006,WD,Abnorml,140000
4,60,RL,84.000000,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,0,,,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2068,60,RL,72.056475,8755,Pave,,IR1,Lvl,AllPub,FR2,...,0,0,,GdPrv,0,6,2009,WD,Normal,214000
2069,90,RL,63.000000,7711,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,0,8,2007,Oth,Abnorml,150000
2070,90,RL,313.000000,25000,Pave,,Reg,Low,AllPub,Inside,...,0,0,,,0,6,2007,WD,Normal,143000
2071,20,RL,52.000000,14375,Pave,,IR1,Lvl,NoSeWa,CulDSac,...,233,0,,,0,1,2009,COD,Abnorml,137500


In [62]:
house.drop("Alley",axis=1)

Unnamed: 0,Dwell_Type,Zone_Class,LotFrontage,LotArea,Road_Type,Property_Shape,LandContour,Utilities,LotConfig,LandSlope,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,Property_Sale_Price
0,60,RL,65.000000,8450,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,,,,0,2,2008,WD,Normal,208500
1,20,RL,80.000000,9600,Pave,Reg,Lvl,AllPub,FR2,Gtl,...,0,,,,0,5,2007,WD,Normal,181500
2,60,RL,68.000000,11250,Pave,IR1,Lvl,AllPub,Inside,Gtl,...,0,,,,0,9,2008,WD,Normal,223500
3,70,RL,60.000000,9550,Pave,IR1,Lvl,AllPub,Corner,Gtl,...,0,,,,0,2,2006,WD,Abnorml,140000
4,60,RL,84.000000,14260,Pave,IR1,Lvl,AllPub,FR2,Gtl,...,0,,,,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2068,60,RL,72.056475,8755,Pave,IR1,Lvl,AllPub,FR2,Gtl,...,0,,GdPrv,,0,6,2009,WD,Normal,214000
2069,90,RL,63.000000,7711,Pave,IR1,Lvl,AllPub,Inside,Gtl,...,0,,,,0,8,2007,Oth,Abnorml,150000
2070,90,RL,313.000000,25000,Pave,Reg,Low,AllPub,Inside,Gtl,...,0,,,,0,6,2007,WD,Normal,143000
2071,20,RL,52.000000,14375,Pave,IR1,Lvl,NoSeWa,CulDSac,Gtl,...,0,,,,0,1,2009,COD,Abnorml,137500


# CREATING THE DUMMIES

In [63]:
dummies = pd.get_dummies(house)

In [64]:
dummies.shape

(2072, 971)

# SPLITTING TRAIN AND TEST

In [65]:
x = dummies.drop(['Property_Sale_Price'],axis=1)
y = dummies['Property_Sale_Price']
x = sm.add_constant(x) # Add Constant
train_x,test_x,train_y,test_y = train_test_split(x,y,test_size=0.2,random_state=72)

  x = pd.concat(x[::order], 1)


# SCALER THE DUMMIES

In [66]:
scaler = MinMaxScaler()
train_x = scaler.fit_transform(train_x)

# MODEL

In [67]:
model = sm.OLS(train_y, train_x).fit()

In [68]:
model.summary()

0,1,2,3
Dep. Variable:,Property_Sale_Price,R-squared:,0.982
Model:,OLS,Adj. R-squared:,0.964
Method:,Least Squares,F-statistic:,52.87
Date:,"Sun, 15 May 2022",Prob (F-statistic):,0.0
Time:,17:39:47,Log-Likelihood:,-17679.0
No. Observations:,1657,AIC:,37060.0
Df Residuals:,804,BIC:,41680.0
Df Model:,852,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.603e-09,5.8e-10,-2.762,0.006,-2.74e-09,-4.64e-10
x1,1.163e+04,1.74e+04,0.668,0.505,-2.26e+04,4.58e+04
x2,-4560.6470,7938.298,-0.575,0.566,-2.01e+04,1.1e+04
x3,8.772e+04,1.78e+04,4.928,0.000,5.28e+04,1.23e+05
x4,9.988e+04,1.56e+04,6.406,0.000,6.93e+04,1.3e+05
x5,9799.0633,1.07e+04,0.919,0.358,-1.11e+04,3.07e+04
x6,1.448e+04,6906.773,2.096,0.036,917.788,2.8e+04
x7,1.002e+05,1.62e+04,6.183,0.000,6.84e+04,1.32e+05
x8,1.199e+05,1.48e+04,8.130,0.000,9.1e+04,1.49e+05

0,1,2,3
Omnibus:,199.876,Durbin-Watson:,2.007
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1486.15
Skew:,0.29,Prob(JB):,0.0
Kurtosis:,7.603,Cond. No.,1.14e+17


# SCORE

In [69]:
print("The score for OLS Regression is {}%.".format(round(model.rsquared*100,2)))

The score for OLS Regression is 98.25%.
