# Multiple Linear Regression

## Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [None]:
dataset = pd.read_csv('Case01_U2R01_1_5_50_U5R01_1_5_10_U10R10.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
print(X)

[[ 1.       1.       2.      ...  0.      10.      -4.8659 ]
 [ 1.       1.       2.      ...  0.       9.8751  -4.8659 ]
 [ 1.       1.       2.      ...  0.       9.8751  -5.     ]
 ...
 [ 1.       1.      10.      ...  0.       0.26489  5.     ]
 [ 1.       1.      10.      ...  0.       0.12495  5.     ]
 [ 1.       1.      10.      ...  0.       0.       5.     ]]


In [None]:
print(y)

[0.42964 0.41301 0.41945 ... 0.64078 0.61312 0.59926]


In [None]:
y = y.reshape(len(y),1)
print(y)

[[0.42964]
 [0.41301]
 [0.41945]
 ...
 [0.64078]
 [0.61312]
 [0.59926]]


## Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.01, random_state = 0)
print(np.array2string(X_train[1, :], precision=10))

[ 1.      1.     10.     10.      0.      9.5784 -3.6468]


**Feature Scaling**

In [None]:
from sklearn.preprocessing import StandardScaler

sc_X2 = StandardScaler()
X_train[:, 2] = sc_X2.fit_transform(X_train[:, 2].reshape(-1, 1)).flatten()
X_test[:, 2] = sc_X2.transform(X_test[:, 2].reshape(-1, 1)).flatten()

sc_X3 = StandardScaler()
X_train[:, 3] = sc_X3.fit_transform(X_train[:, 3].reshape(-1, 1)).flatten()
X_test[:, 3] = sc_X3.transform(X_test[:, 3].reshape(-1, 1)).flatten()

sc_X5 = StandardScaler()
X_train[:, 5] = sc_X5.fit_transform(X_train[:, 5].reshape(-1, 1)).flatten()
X_test[:, 5] = sc_X5.transform(X_test[:, 5].reshape(-1, 1)).flatten()

sc_X6 = StandardScaler()
X_train[:, 6] = sc_X6.fit_transform(X_train[:, 6].reshape(-1, 1)).flatten()
X_test[:, 6] = sc_X6.transform(X_test[:, 6].reshape(-1, 1)).flatten()

sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)
y_test = sc_y.transform(y_test)

#print(X_test[:, 3].reshape(-1, 1))
print(X_test[1, :])

[ 1.          1.         -0.89408296 -0.60635741  0.          1.37230904
 -1.41695347]


## Training the Multiple Linear Regression model on the Training set

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
y_pred = regressor.predict(X_test)
print(y_pred)
np.set_printoptions(precision=3)
print(np.concatenate((y_pred, y_test),1))

[[ 7.43143202e-01]
 [ 1.39301339e-02]
 [-3.67930364e-01]
 [ 5.86318772e-01]
 [ 7.59313298e-01]
 [-6.95830455e-02]
 [ 2.50681708e+00]
 [-4.01509178e-01]
 [ 5.71595838e-01]
 [-1.29733121e+00]
 [-9.33414810e-01]
 [ 1.73004444e+00]
 [-1.30462763e+00]
 [-1.43888167e+00]
 [ 1.92257379e+00]
 [-6.92952539e-01]
 [-5.69727856e-01]
 [-1.06970908e+00]
 [ 1.07367676e+00]
 [ 8.08335084e-01]
 [ 1.74557189e-01]
 [-3.93496618e-02]
 [-1.91771280e-01]
 [ 8.40622147e-02]
 [-4.92199395e-01]
 [-1.41625972e+00]
 [-3.52299443e-01]
 [ 1.94461963e-01]
 [-5.02986781e-01]
 [ 2.92788060e-01]
 [-1.17597347e-01]
 [-4.43170494e-01]
 [ 1.72909462e+00]
 [ 2.40833010e+00]
 [-1.22653176e+00]
 [ 8.82213477e-01]
 [ 9.18836148e-01]
 [-4.88417506e-01]
 [ 6.38465019e-03]
 [ 9.68989480e-01]
 [-1.18227671e+00]
 [-1.07743013e-02]
 [ 9.88333555e-01]
 [-4.31069203e-01]
 [-1.29044332e+00]
 [-4.42874820e-01]
 [ 1.03401891e+00]
 [-1.16628021e+00]
 [-1.25485799e+00]
 [-2.90099340e-01]
 [-1.33738430e-02]
 [-1.39458524e+00]
 [-1.1566851

## Predicting the Test set results

**Training the Polynomial Regression model on the whole dataset**

In [None]:
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 5)
X_poly = poly_reg.fit_transform(X_train)
X_poly_test = poly_reg.fit_transform(X_test)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y_train)

**Visualising the Polynomial Regression results**

In [None]:
y_pred_2 = lin_reg_2.predict(X_poly_test)
np.set_printoptions(precision=3)
print(np.concatenate((y_pred_2.reshape(len(y_pred_2),1), y_test.reshape(len(y_test),1)),1))

[[ 4.674e-01  5.478e-01]
 [ 5.071e-01  6.411e-01]
 [-4.247e-01 -4.532e-01]
 [ 6.364e-01  6.890e-01]
 [ 5.179e-01  5.541e-01]
 [-2.777e-01 -3.396e-01]
 [ 3.965e+00  3.931e+00]
 [-4.386e-01 -4.381e-01]
 [-1.207e-01 -1.926e-01]
 [-8.598e-01 -8.191e-01]
 [-8.668e-01 -8.806e-01]
 [ 1.886e+00  1.818e+00]
 [-9.132e-01 -9.074e-01]
 [-9.987e-01 -1.023e+00]
 [ 1.523e+00  1.487e+00]
 [-7.813e-01 -7.289e-01]
 [-7.695e-01 -8.171e-01]
 [-8.675e-01 -8.755e-01]
 [ 7.169e-01  7.213e-01]
 [ 1.022e+00  9.273e-01]
 [ 1.781e-01  1.282e-01]
 [-2.007e-01 -1.689e-01]
 [-6.622e-02 -3.864e-02]
 [-1.966e-01 -2.741e-01]
 [-4.037e-01 -4.151e-01]
 [-9.969e-01 -9.635e-01]
 [-6.084e-01 -6.275e-01]
 [-1.229e-01 -1.719e-01]
 [-4.868e-01 -4.857e-01]
 [ 1.839e-01  1.247e-01]
 [-4.539e-01 -4.908e-01]
 [-6.383e-01 -6.806e-01]
 [ 1.373e+00  1.368e+00]
 [ 3.172e+00  3.146e+00]
 [-9.368e-01 -9.318e-01]
 [ 1.223e+00  1.181e+00]
 [ 1.914e+00  1.711e+00]
 [-7.341e-01 -7.796e-01]
 [-3.979e-01 -3.846e-01]
 [ 1.252e+00  1.316e+00]


In [None]:

row1 = [1., 1., 2., 10., 0., 9.5784,  4.8659]
X_test2 = np.array([row1])

X_test2[:, 2] = sc_X2.transform(X_test2[:, 2].reshape(-1, 1)).flatten()
X_test2[:, 3] = sc_X3.transform(X_test2[:, 3].reshape(-1, 1)).flatten()
X_test2[:, 5] = sc_X5.transform(X_test2[:, 5].reshape(-1, 1)).flatten()
X_test2[:, 6] = sc_X6.transform(X_test2[:, 6].reshape(-1, 1)).flatten()

print(X_test2)

y_test2 = lin_reg_2.predict(poly_reg.fit_transform(X_test2))
y1 = sc_y.inverse_transform(y_test2)
print(y1)

print(X_train[1, :])
y2 = sc_y.inverse_transform(y_train[1,:].reshape(-1, 1))
print(y2)


[[ 1.     1.    -0.894  0.057  0.     1.289  1.375]]
[[0.481]]
[ 1.     1.     2.326  0.057  0.     1.289 -1.034]
[[1.389]]


Evaluation

In [None]:
from sklearn.metrics import mean_squared_error

# Inverse transform predictions and actual values for linear regression
y_pred_original = sc_y.inverse_transform(y_pred)
y_test_original = sc_y.inverse_transform(y_test)

# Calculate MSE for linear regression
mse_linear = mean_squared_error(y_test_original, y_pred_original)
print(f"Linear Regression Mean Squared Error (MSE): {mse_linear}")

# Inverse transform predictions for polynomial regression
y_pred_poly_original = sc_y.inverse_transform(y_pred_2)

# Calculate MSE for polynomial regression
mse_poly = mean_squared_error(y_test_original, y_pred_poly_original)
print(f"Polynomial Regression Mean Squared Error (MSE): {mse_poly}")


Linear Regression Mean Squared Error (MSE): 0.016563556416796524
Polynomial Regression Mean Squared Error (MSE): 0.0003452351452346124


## R-squared metric

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Inverse transform predictions and actual values for linear regression
y_pred_original = sc_y.inverse_transform(y_pred)
y_test_original = sc_y.inverse_transform(y_test)

# Calculate MSE for linear regression
mse_linear = mean_squared_error(y_test_original, y_pred_original)
print(f"Linear Regression Mean Squared Error (MSE): {mse_linear}")

# Calculate R-squared for linear regression
r2_linear = r2_score(y_test_original, y_pred_original)
print(f"Linear Regression R-squared (R²): {r2_linear}")

# Inverse transform predictions for polynomial regression
y_pred_poly_original = sc_y.inverse_transform(y_pred_2)

# Calculate MSE for polynomial regression
mse_poly = mean_squared_error(y_test_original, y_pred_poly_original)
print(f"Polynomial Regression Mean Squared Error (MSE): {mse_poly}")

# Calculate R-squared for polynomial regression
r2_poly = r2_score(y_test_original, y_pred_poly_original)
print(f"Polynomial Regression R-squared (R²): {r2_poly}")

Linear Regression Mean Squared Error (MSE): 0.016563556416796524
Linear Regression R-squared (R²): 0.8625276625092426
Polynomial Regression Mean Squared Error (MSE): 0.0003452351452346124
Polynomial Regression R-squared (R²): 0.9971346562776074
