# Linear Regression

## Part 1 - Data Preprocessing

### Importing the dataset

In [None]:
import pandas as pd
dataset = pd.read_excel('data.xlsx')

In [None]:
dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


### Getting the inputs and output

In [None]:
x = dataset.iloc[:, :-1].values

In [None]:
x

array([[  14.96,   41.76, 1024.07,   73.17],
       [  25.18,   62.96, 1020.04,   59.08],
       [   5.11,   39.4 , 1012.16,   92.14],
       ...,
       [  31.32,   74.33, 1012.92,   36.48],
       [  24.48,   69.45, 1013.86,   62.39],
       [  21.6 ,   62.52, 1017.23,   67.87]])

In [None]:
y = dataset.iloc[: , -1].values

In [None]:
y

array([463.26, 444.37, 488.56, ..., 429.57, 435.74, 453.28])

### Creating the Training Set and the Test Set

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train , y_test = train_test_split(x, y, test_size = 0.2, random_state= 0)

In [None]:
x_train

array([[  11.22,   43.13, 1017.24,   80.9 ],
       [  13.67,   54.3 , 1015.92,   75.42],
       [  32.84,   77.95, 1014.68,   45.8 ],
       ...,
       [  16.81,   38.52, 1018.26,   75.21],
       [  12.8 ,   41.16, 1022.43,   86.19],
       [  32.32,   67.9 , 1006.08,   37.93]])

In [None]:
x_test

array([[  28.66,   77.95, 1009.56,   69.07],
       [  17.48,   49.39, 1021.51,   84.53],
       [  14.86,   43.14, 1019.21,   99.14],
       ...,
       [  12.24,   44.92, 1023.74,   88.21],
       [  27.28,   47.93, 1003.46,   59.22],
       [  17.28,   39.99, 1007.09,   74.25]])

In [None]:
y_train

array([473.93, 467.87, 431.97, ..., 459.01, 462.72, 428.12])

In [None]:
y_test

array([431.23, 460.01, 461.14, ..., 473.26, 438.  , 463.28])

## Part 2 - Building and training the model

### Building the model

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

### Training the model

In [None]:
model.fit(x_train, y_train)

### Inference

Making the predictions of the data points in the test set

In [None]:
y_pred = model.predict(x_test)

In [None]:
y_pred

array([431.42761597, 458.56124622, 462.75264705, ..., 469.51835895,
       442.41759454, 461.88279939])

Making the prediction of a single data point with AT = 15, V = 40, AP = 1000, RH = 75

In [None]:
model.predict([[15,40,1000,75]])

array([465.80771895])

## Part 3: Evaluating the model

### R-Squared

In [None]:
from sklearn.metrics import r2_score
r2 = r2_score(y_test , y_pred)

In [None]:
r2

0.9325315554761303

### Adjusted R-Squared

In [None]:
k = x_test.shape[1]
n = x_test.shape[0]
adj_r2 = 1-(1-r2)*(n-1)/(n-k-1)

In [None]:
adj_r2

0.9323901862890713