# Multiple Linear Regression

## Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Importing the dataset

In [2]:
dataset = pd.read_excel('Research data.xlsx')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values


In [3]:
print(X)

[[0 0 1 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [4]:
print(y)

[ 66.193  66.193  66.193 ... 158.94  159.03  159.13 ]


## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [15]:
print(X_test[0:20])

[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]


In [7]:
print(y_test)

[134.31  126.88   52.497 ... 124.33  161.06  105.99 ]


## Training the Multiple Linear Regression model on the Training set

In [8]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

## Predicting the Test set results

In [9]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[183.62 134.31]
 [183.62 126.88]
 [ 84.77  52.5 ]
 ...
 [160.81 124.33]
 [189.55 161.06]
 [ 78.98 105.99]]


**Making a single prediction**

In [10]:
print(regressor.predict([[0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0]]))

[110.77]


**Getting the final linear regression equation with the values of the coefficients**

In [11]:
print(regressor.coef_)
print(regressor.intercept_)

[ -53.91  -50.91  -61.14    5.92  -47.07  -57.58  -22.82  -52.94  -72.67
   -6.08  -75.5   -33.78  -12.36   61.21  -25.38   15.8   101.84 -112.21
   32.96   41.23  -98.86]
183.6244778200806


In [12]:
from sklearn.metrics import mean_absolute_error
MAE = mean_absolute_error(y_test, y_pred)
print("Multiple linear regression MAE = ", MAE)



Multiple linear regression MAE =  67.13535425853932


In [13]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.1399883632480945

In [14]:
import statsmodels.api as sm
from scipy import stats
import pandas as pd

dataset = pd.read_excel('Research data.xlsx')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

X2 = sm.add_constant(X)
est = sm.OLS(y, X2)
est2 = est.fit()
print(est2.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.139
Model:                            OLS   Adj. R-squared:                  0.139
Method:                 Least Squares   F-statistic:                     5587.
Date:                Tue, 18 Aug 2020   Prob (F-statistic):               0.00
Time:                        00:05:16   Log-Likelihood:            -4.2739e+06
No. Observations:              729633   AIC:                         8.548e+06
Df Residuals:                  729611   BIC:                         8.548e+06
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        183.4818      0.112   1636.040      0.0