# Multiple Linear Regression- Housing dataset

## Importing the libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Importing the dataset

In [2]:
ds=pd.read_csv('Housing.csv')

In [3]:
ds.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [46]:
ds.isnull().sum()


price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64

### Split the dataset into independent and Dependent variables

In [4]:
ds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB


In [6]:
X=ds[['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus']]
y=ds[['price']]

In [7]:
ds.columns

Index(['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus'],
      dtype='object')

### Work with the catagorical data

In [9]:
ds['mainroad'].value_counts()

yes    468
no      77
Name: mainroad, dtype: int64

In [10]:
ds['guestroom'].value_counts()

no     448
yes     97
Name: guestroom, dtype: int64

In [11]:
ds['basement'].value_counts()

no     354
yes    191
Name: basement, dtype: int64

In [12]:
ds['hotwaterheating'].value_counts()

no     520
yes     25
Name: hotwaterheating, dtype: int64

In [13]:
ds['airconditioning'].value_counts()

no     373
yes    172
Name: airconditioning, dtype: int64

In [15]:
ds['prefarea'].value_counts()

no     417
yes    128
Name: prefarea, dtype: int64

In [16]:
ds['furnishingstatus'].value_counts()

semi-furnished    227
unfurnished       178
furnished         140
Name: furnishingstatus, dtype: int64

# Turning categorical columns into 1 or 0 with dummies

In [22]:
X=pd.get_dummies(ds[['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus']],
                drop_first=True)

In [23]:
X.head()

Unnamed: 0,area,bedrooms,bathrooms,stories,parking,mainroad_yes,guestroom_yes,basement_yes,hotwaterheating_yes,airconditioning_yes,prefarea_yes,furnishingstatus_semi-furnished,furnishingstatus_unfurnished
0,7420,4,2,3,2,1,0,0,0,1,1,0,0
1,8960,4,4,4,3,1,0,0,0,1,0,0,0
2,9960,3,2,2,2,1,0,1,0,0,1,1,0
3,7500,4,2,2,3,1,0,1,0,1,1,0,0
4,7420,4,1,2,2,1,1,1,0,1,0,0,0


## Splitting the dataset into the Training set and Test set
- Random State 20

In [37]:
from sklearn.model_selection import train_test_split
X_train, X_rest, y_train, y_rest= train_test_split(X,y,
                                                  test_size=.1,
                                                  random_state=20)

## Training the Multiple Linear Regression model on the Training set

In [39]:
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(X_train.values, y_train)

LinearRegression()

## Intercept and Coefficient

In [40]:
print('The coefficients are:', regressor.coef_)
print('The intercept is:', regressor.intercept_)

The coefficients are: [[ 2.48445255e+02  1.32015678e+05  9.88972986e+05  4.33817126e+05
   2.91011373e+05  4.02594956e+05  3.44929362e+05  2.99713946e+05
   6.59976914e+05  8.11033778e+05  6.85985176e+05 -2.66859853e+04
  -3.96630876e+05]]
The intercept is: [16465.06806519]


In [41]:
X_train.columns

Index(['area', 'bedrooms', 'bathrooms', 'stories', 'parking', 'mainroad_yes',
       'guestroom_yes', 'basement_yes', 'hotwaterheating_yes',
       'airconditioning_yes', 'prefarea_yes',
       'furnishingstatus_semi-furnished', 'furnishingstatus_unfurnished'],
      dtype='object')

# RMSE and R square

In [42]:
y_pred=regressor.predict(X_test.values)

In [43]:
from sklearn.metrics import mean_squared_error, r2_score
import math
print(f'The r-square is {r2_score(y_test, y_pred): .2f}')
print(f'The RMSE is {math.sqrt(mean_squared_error(y_test, y_pred)): .2f}')

The r-square is  0.68
The RMSE is  945510.73


# Validation case scenario:
### House with 
### area = 7420
### bedrooms = 4
### bathroom= 2
### stories = 3
### On a mainroad
### no guestroom
### no basement 
### no hotwater
### has air conditioning
### 2 parking
### yes prefarea
### it is furnished

# actual = 13,300,000


In [44]:
regressor.predict([[7420,4,2,3,2,1,0,0,0,1,1,0,0]])

array([[8149025.5785844]])

# my predicted result with 80/10/10 for train, validate, and test

In [45]:
# 8149025.5785844

In [34]:
X_test.head(5)

Unnamed: 0,area,bedrooms,bathrooms,stories,parking,mainroad_yes,guestroom_yes,basement_yes,hotwaterheating_yes,airconditioning_yes,prefarea_yes,furnishingstatus_semi-furnished,furnishingstatus_unfurnished
343,4080,2,1,1,0,1,0,0,0,0,0,1,0
524,3264,2,1,1,0,1,0,0,0,0,0,0,1
495,4000,3,1,2,1,1,0,0,0,0,0,0,1
216,6040,3,1,1,2,1,0,0,0,0,1,1,0
185,3000,3,1,2,0,1,0,1,0,0,0,0,1
