# Multiple Linear Regression

## Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [None]:
dataset = pd.read_csv('50_Startups.csv')
dataset.drop(columns=['State'], inplace=True)
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
dataset.dtypes

R&D Spend          float64
Administration     float64
Marketing Spend    float64
Profit             float64
dtype: object

In [None]:
print(X)

[[165349.2  136897.8  471784.1 ]
 [162597.7  151377.59 443898.53]
 [153441.51 101145.55 407934.54]
 [144372.41 118671.85 383199.62]
 [142107.34  91391.77 366168.42]
 [131876.9   99814.71 362861.36]
 [134615.46 147198.87 127716.82]
 [130298.13 145530.06 323876.68]
 [120542.52 148718.95 311613.29]
 [123334.88 108679.17 304981.62]
 [101913.08 110594.11 229160.95]
 [100671.96  91790.61 249744.55]
 [ 93863.75 127320.38 249839.44]
 [ 91992.39 135495.07 252664.93]
 [119943.24 156547.42 256512.92]
 [114523.61 122616.84 261776.23]
 [ 78013.11 121597.55 264346.06]
 [ 94657.16 145077.58 282574.31]
 [ 91749.16 114175.79 294919.57]
 [ 86419.7  153514.11      0.  ]
 [ 76253.86 113867.3  298664.47]
 [ 78389.47 153773.43 299737.29]
 [ 73994.56 122782.75 303319.26]
 [ 67532.53 105751.03 304768.73]
 [ 77044.01  99281.34 140574.81]
 [ 64664.71 139553.16 137962.62]
 [ 75328.87 144135.98 134050.07]
 [ 72107.6  127864.55 353183.81]
 [ 66051.52 182645.56 118148.2 ]
 [ 65605.48 153032.06 107138.38]
 [ 61994.4

In [None]:
print(y)

[192261.83 191792.06 191050.39 182901.99 166187.94 156991.12 156122.51
 155752.6  152211.77 149759.96 146121.95 144259.4  141585.52 134307.35
 132602.65 129917.04 126992.93 125370.37 124266.9  122776.86 118474.03
 111313.02 110352.25 108733.99 108552.04 107404.34 105733.54 105008.31
 103282.38 101004.64  99937.59  97483.56  97427.84  96778.92  96712.8
  96479.51  90708.19  89949.14  81229.06  81005.76  78239.91  77798.83
  71498.49  69758.98  65200.33  64926.08  49490.75  42559.73  35673.41
  14681.4 ]


## Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Multiple Linear Regression model on the Training set

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
import pickle
with open('model.pkl', 'wb') as f:
    pickle.dump(regressor, f)

## Predicting the Test set results

In [None]:
import pickle
with open('model.pkl', 'rb') as f:
    regressor = pickle.load(f)

regressor

In [None]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[103901.9  103282.38]
 [132763.06 144259.4 ]
 [133567.9  146121.95]
 [ 72911.79  77798.83]
 [179627.93 191050.39]
 [115166.65 105008.31]
 [ 67113.58  81229.06]
 [ 98154.81  97483.56]
 [114756.12 110352.25]
 [169064.01 166187.94]]


In [None]:
# Summarize the fit of the model
mse = np.mean(np.abs(y_pred - y_test)**2)
mae = np.mean(np.abs(y_pred - y_test))
me = np.mean(y_pred - y_test)
rmse = np.sqrt(np.mean(np.abs(y_pred - y_test)**2))
print(regressor.intercept_, regressor.coef_)
print("R2: ", regressor.score(X, y))
print("MAE: ", mae)
print("RMSE: ", rmse)
print("MSE: ", mse)
print("ME: ", me)

42989.00816508669 [0.78 0.03 0.03]
R2:  0.9490884301964866
MAE:  7320.44161484813
RMSE:  8803.775790469346
MSE:  77506468.16885415
ME:  -3574.6331897536597
