# Multiple Linear Regression with StatsModels

In this notebook we will make basic predictions of carprice

# Getting Started

In [3]:
# Import the relevant libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
sns.set()

In [4]:
# Load the data
raw_data = pd.read_csv('../proprietary_data/real_estate_price_size_year_view.csv')
raw_data.head()

Unnamed: 0,price,size,year,view
0,234314.144,643.09,2015,No sea view
1,228581.528,656.22,2009,No sea view
2,281626.336,487.29,2018,Sea view
3,401255.608,1504.75,2015,No sea view
4,458674.256,1275.46,2009,Sea view


# Preprocessing

#### Check for missing values

In [7]:
raw_data.isna().sum()

price    0
size     0
year     0
view     0
dtype: int64

#### Create a dummy variable for 'view'

In [8]:
data = raw_data.copy()
data = pd.get_dummies(data)

In [9]:
data.head()

Unnamed: 0,price,size,year,view_No sea view,view_Sea view
0,234314.144,643.09,2015,1,0
1,228581.528,656.22,2009,1,0
2,281626.336,487.29,2018,0,1
3,401255.608,1504.75,2015,1,0
4,458674.256,1275.46,2009,0,1


# Create the regression

### Declare the dependent and the independent variables

In [10]:
y = data['price']
x = data.drop(columns=['price'])

### Regression

In [11]:
x = sm.add_constant(x)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.913
Model:,OLS,Adj. R-squared:,0.91
Method:,Least Squares,F-statistic:,335.2
Date:,"Sat, 13 Feb 2021",Prob (F-statistic):,1.02e-50
Time:,09:35:08,Log-Likelihood:,-1144.6
No. Observations:,100,AIC:,2297.0
Df Residuals:,96,BIC:,2308.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.58e+06,6.63e+05,-5.402,0.000,-4.89e+06,-2.26e+06
size,223.0316,7.838,28.455,0.000,207.473,238.590
year,2718.9489,493.502,5.510,0.000,1739.356,3698.542
view_No sea view,-1.818e+06,3.31e+05,-5.489,0.000,-2.48e+06,-1.16e+06
view_Sea view,-1.761e+06,3.31e+05,-5.315,0.000,-2.42e+06,-1.1e+06

0,1,2,3
Omnibus:,29.224,Durbin-Watson:,1.965
Prob(Omnibus):,0.0,Jarque-Bera (JB):,64.957
Skew:,1.088,Prob(JB):,7.85e-15
Kurtosis:,6.295,Cond. No.,1.32e+19
