# Multiple Linear Regression

## Import libraries

In [None]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch.optim as optim
import optuna

## Prepare the dataset

### Load the dataset

In [95]:
data = pd.read_csv('housing.csv')
data

Unnamed: 0,RM,LSTAT,PTRATIO,MEDV
0,6.575,4.98,15.3,504000.0
1,6.421,9.14,17.8,453600.0
2,7.185,4.03,17.8,728700.0
3,6.998,2.94,18.7,701400.0
4,7.147,5.33,18.7,760200.0
...,...,...,...,...
484,6.593,9.67,21.0,470400.0
485,6.120,9.08,21.0,432600.0
486,6.976,5.64,21.0,501900.0
487,6.794,6.48,21.0,462000.0


### Split the dataset

In [96]:
X = data.drop(columns='MEDV')
y = data['MEDV']

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

print('x_train:', x_train.shape)
print('x_test:', x_test.shape)
print('y_train:', y_train.shape)
print('y_test:', y_test.shape)

x_train: (391, 3)
x_test: (98, 3)
y_train: (391,)
y_test: (98,)


### Normalize the data

In [97]:
x_scaler = StandardScaler()
y_scaler = StandardScaler()

x_train = x_scaler.fit_transform(x_train)
x_test = x_scaler.transform(x_test)

y_train = y_scaler.fit_transform(y_train.values.reshape(-1, 1))
y_test = y_scaler.transform(y_test.values.reshape(-1, 1))

In [98]:
print('x_train:', x_train.shape)
print('x_test:', x_test.shape)
print('y_train:', y_train.shape)
print('y_test:', y_test.shape)

x_train: (391, 3)
x_test: (98, 3)
y_train: (391, 1)
y_test: (98, 1)


### Convert into tensor

In [99]:
x_train = torch.FloatTensor(x_train)     # Dataframe -> Tensor
y_train = torch.FloatTensor(y_train)
x_test = torch.FloatTensor(x_test)
y_test = torch.FloatTensor(y_test)

## Functions to compute the models

### To optimize the parameters using optuna

In [None]:
def objective(trial, X):
    W = torch.zeros([3, 1], requires_grad=True)
    b = torch.zeros(1, requires_grad=True)

    num_epochs = trial.suggest_categorical('num_epochs', [100, 1000, 10000])
    lr = trial.suggest_categorical('lr', [0.01, 0.001, 0.0001, 0.00001, 0.000001])

    optimizer = optim.SGD([W, b], lr=lr)

    cost = train_model(X, W, b, num_epochs, lr, True)

### To train the model

In [None]:
def train_model(X, W, b, num_epochs, lr, check_optuna):
    optimizer = optim.SGD([W, b], lr= lr)

    for epoch in range(num_epochs):
        hypothesis = X * W + b
        cost = torch.mean((hypothesis - y_train) ** 2)

        if check_optuna == True:
            if torch.isnan(cost) or torch.isinf(cost):
                raise optuna.TrialPruned()

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

    if check_optuna == False:
        return cost, hypothesis
    else:
        return cost

## Study optuna

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: objective(trial, x_train), n_trials=30)

## Print best parameters

## Train model with best parameters

In [None]:
W = torch.zeros([3, 1], requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# cost, hypothesis = train_model(x_train, W, b, best_epoch, best_lr, False)

## Plot loss graph