# Multiple Linear Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
%matplotlib inline

## Importing the dataset

In [2]:
dataset = pd.read_csv('output.csv')
X = dataset.iloc[:, 1:14].values
y = dataset.iloc[:, 1].values

## Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Multiple Linear Regression model on the Training set

In [4]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

## Predicting the Test set results

In [5]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[289000. 289000.]
 [429900. 429900.]
 [129000. 129000.]
 ...
 [ 83300.  83300.]
 [520000. 520000.]
 [215000. 215000.]]


## Evaluating the Model Performance

In [6]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
r2_score(y_test, y_pred)

1.0

In [7]:
print('MAE:', mean_absolute_error(y_test, y_pred))
print('MSE:', mean_squared_error(y_test, y_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))

MAE: 5.964182485018446e-11
MSE: 7.255523976787045e-21
RMSE: 8.517936356176327e-11


## Applying k-Fold Cross Validation

In [8]:
from sklearn.model_selection import cross_val_score

accuracies = cross_val_score (estimator= regressor , X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))
print("r2_score: {:.2f} %".format(r2_score(y_test, y_pred)*100))

Accuracy: 100.00 %
Standard Deviation: 0.00 %
r2_score: 100.00 %
