# Linear Regression

## Part 1 - Data Preprocessing

### Importing the dataset

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import pandas as pd
dataset = pd.read_excel('data.xlsx')

In [5]:
dataset.head(10)

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9
5,26.27,59.44,1012.23,58.77,443.67
6,15.89,43.96,1014.02,75.24,467.35
7,9.48,44.71,1019.12,66.43,478.42
8,14.64,45.0,1021.78,41.25,475.98
9,11.74,43.56,1015.14,70.72,477.5


### Getting the inputs and output

In [6]:
X = dataset.iloc[:, :-1].values

In [7]:
X

array([[  14.96,   41.76, 1024.07,   73.17],
       [  25.18,   62.96, 1020.04,   59.08],
       [   5.11,   39.4 , 1012.16,   92.14],
       ...,
       [  31.32,   74.33, 1012.92,   36.48],
       [  24.48,   69.45, 1013.86,   62.39],
       [  21.6 ,   62.52, 1017.23,   67.87]])

In [8]:
y = dataset.iloc[:, -1].values

In [9]:
y

array([463.26, 444.37, 488.56, ..., 429.57, 435.74, 453.28])

### Creating the Training Set and the Test Set

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [11]:
X_train

array([[  11.22,   43.13, 1017.24,   80.9 ],
       [  13.67,   54.3 , 1015.92,   75.42],
       [  32.84,   77.95, 1014.68,   45.8 ],
       ...,
       [  16.81,   38.52, 1018.26,   75.21],
       [  12.8 ,   41.16, 1022.43,   86.19],
       [  32.32,   67.9 , 1006.08,   37.93]])

In [12]:
X_test

array([[  28.66,   77.95, 1009.56,   69.07],
       [  17.48,   49.39, 1021.51,   84.53],
       [  14.86,   43.14, 1019.21,   99.14],
       ...,
       [  12.24,   44.92, 1023.74,   88.21],
       [  27.28,   47.93, 1003.46,   59.22],
       [  17.28,   39.99, 1007.09,   74.25]])

In [13]:
y_train

array([473.93, 467.87, 431.97, ..., 459.01, 462.72, 428.12])

In [14]:
y_test

array([431.23, 460.01, 461.14, ..., 473.26, 438.  , 463.28])

## Part 2 - Building and training the model

### Building the model

In [15]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

### Training the model

In [16]:
model.fit(X_train, y_train)

### Inference

Making the predictions of the data points in the test set

In [17]:
y_pred = model.predict(X_test)

In [18]:
y_pred

array([431.42761597, 458.56124622, 462.75264705, ..., 469.51835895,
       442.41759454, 461.88279939])

Making the prediction of a single data point with AT = 15, V = 40, AP = 1000, RH = 75

In [19]:
model.predict([[15,40,1000,75]])

array([465.80771895])

## Part 3: Evaluating the model

### R-Squared

In [20]:
from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)

In [21]:
r2

0.9325315554761302

### Adjusted R-Squared

In [22]:
k = X_test.shape[1]
n = X_test.shape[0]
adj_r2 = 1-(1-r2)*(n-1)/(n-k-1)

In [23]:
adj_r2

0.9323901862890713