# Multi-variable Linear Regression 
This notebook will gives a knowledge about multi-variable LR both manually and with scikit-learn module.
First, we have to import the module first. 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn import preprocessing

After importing the data about real estate, we will predict the house pricing through the six variables, as shown below.

In [2]:
# importing data
df = pd.read_csv('Real-estate1.csv')
df.drop('No', inplace = True,axis=1)
  
print(df.head())
print(df.columns)

   X1 transaction date  X2 house age  X3 distance to the nearest MRT station  \
0             2012.917          32.0                                84.87882   
1             2012.917          19.5                               306.59470   
2             2013.583          13.3                               561.98450   
3             2013.500          13.3                               561.98450   
4             2012.833           5.0                               390.56840   

   X4 number of convenience stores  X5 latitude  X6 longitude  \
0                               10     24.98298     121.54024   
1                                9     24.98034     121.53951   
2                                5     24.98746     121.54391   
3                                5     24.98746     121.54391   
4                                5     24.97937     121.54245   

   Y house price of unit area  
0                        37.9  
1                        42.2  
2                        47.3  


Because all the variable and target still converge in 1 data frame, we separate it first.

In [3]:
# creating feature variables
X = df.drop('Y house price of unit area',axis= 1)
y = df['Y house price of unit area']
print(X)
print(y)

     X1 transaction date  X2 house age  \
0               2012.917          32.0   
1               2012.917          19.5   
2               2013.583          13.3   
3               2013.500          13.3   
4               2012.833           5.0   
..                   ...           ...   
409             2013.000          13.7   
410             2012.667           5.6   
411             2013.250          18.8   
412             2013.000           8.1   
413             2013.500           6.5   

     X3 distance to the nearest MRT station  X4 number of convenience stores  \
0                                  84.87882                               10   
1                                 306.59470                                9   
2                                 561.98450                                5   
3                                 561.98450                                5   
4                                 390.56840                                5   
..             

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=101)

In [20]:
Model = LinearRegression()
Model.fit(X_train, y_train)

Model.coef_, Model.intercept_

(array([ 5.06414221e+00, -2.83627390e-01, -4.81454698e-03,  1.00267145e+00,
         2.30625349e+02, -2.54512413e+01]),
 -12815.542837714267)

In [21]:
predictions = Model.predict(X_test)
predictions

array([13.36200709, 10.46702902, 21.1309444 , 47.51821861, 32.19604529,
       38.29484911, 35.6601396 , 41.19990963, 47.71302799, 40.07561216,
       45.02931772, 33.19025175, 41.68856441, 35.93619794, 43.53760772,
       45.52775148, 39.76828142, 44.54567674, 48.02668394, 44.89412847,
       43.68952256, 54.14326792, 47.93998304, 37.30552851, 34.39726048,
       48.1890637 , 40.48530867, 49.51891453, 46.66380666, 39.81558938,
       47.34917303, 40.58522657, 45.03432956, 44.20805178, 46.42068741,
        8.60581282, 39.49449402, 40.77973144,  8.96941114, 54.9763222 ,
       32.34268168, 48.82636539, 25.91343282, 47.69340627, 40.42562406,
       50.27322481, 42.37967759, 38.00524935, 43.52778567, 36.60913578,
       47.14896225, 34.34219138, 43.79283299, 16.79084146, 38.09559746,
       48.57761705, 44.35772909, 44.10136466, 45.3481875 , 40.26076485,
       35.63392053, 45.39676575, 42.18649771, 43.33314043, 53.00620842,
       44.12043985, 24.0143795 , 46.91423046, 31.93471132, 41.50

In [22]:
# model evaluation
print('mean_squared_error : ', mean_squared_error(y_test, predictions))
print('mean_absolute_error : ', mean_absolute_error(y_test, predictions))


mean_squared_error :  52.210965481447374
mean_absolute_error :  5.466522594038082


In [23]:
Model.score(X_test, y_test, sample_weight=None)

0.6584013936695442