# Linear regression PyTorch

In [2]:
import sys
from pathlib import Path
project_path = str(Path().cwd().parent.resolve())
if project_path not in sys.path:
    sys.path.append(project_path)

# imports
import time
from common.utils import get_data, split

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import torch
from torch.utils.data import DataLoader, TensorDataset

In [3]:
data = get_data(only_numeric=True)
data.head()

Unnamed: 0,Previous qualification (grade),Admission grade,Age at enrollment,Curricular units 1st sem (credited),Curricular units 1st sem (enrolled),Curricular units 1st sem (evaluations),Curricular units 1st sem (approved),Curricular units 1st sem (grade),Curricular units 1st sem (without evaluations),Curricular units 2nd sem (credited),Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP
0,122.0,127.3,20,0,0,0,0,0.0,0,0,0,0,0,0.0,0,10.8,1.4,1.74
1,160.0,142.5,19,0,6,6,6,14.0,0,0,6,6,6,13.666667,0,13.9,-0.3,0.79
2,122.0,124.8,19,0,6,0,0,0.0,0,0,6,0,0,0.0,0,10.8,1.4,1.74
3,122.0,119.6,20,0,6,8,6,13.428571,0,0,6,10,5,12.4,0,9.4,-0.8,-3.12
4,100.0,141.5,45,0,6,9,5,12.333333,0,0,6,6,6,13.0,0,13.9,-0.3,0.79


In [4]:
X = data.drop("Admission grade", axis=1)
X = StandardScaler().fit_transform(X)       # normalize
X = torch.tensor(X, dtype=torch.float32)
y = data["Admission grade"]
y = torch.tensor(y, dtype=torch.float32)

X_train, X_val, X_test, y_train, y_val, y_test = split(X, y)

In [5]:
class LinearRegression(torch.nn.Module):
    def __init__(self, inputSize):
        super().__init__()
        self.linear = torch.nn.Linear(inputSize, 1)     # linear layer       y = w1*x1 + w2*x2 + ... + b

    def forward(self, x):
        return self.linear(x)
    

def train_and_measure_time(device, X_train, y_train, epochs = 200, batch_size=128, learning_rate=0.001, should_print_time=False):
    
    start = time.perf_counter()
          
    model = LinearRegression(X_train.shape[1]).to(device)
    criterion = torch.nn.MSELoss()                                          # MSE
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)       # optimized with params
    
    dataset = TensorDataset(X_train, y_train)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for _ in range(epochs):
        for X_batch, y_batch in loader:
            y_pred = model(X_batch)                     # X * W^T + b
            loss = criterion(y_pred, y_batch)           # 1/n ∑ (ytrue - ypred)^2       (MSE)
            
            optimizer.zero_grad()                       # clear gradient from previous steps
            loss.backward()                             # back propagration - calculates gradients of the loss 
            
            optimizer.step()                            # w = w - learning_rate * (∂loss / ∂w)
        
    end = time.perf_counter()
    
    if should_print_time:
        print(f"Training time: {end - start:.2f} seconds\n")
        
    return model, (round(end - start, 2))    

In [6]:
def eval_chosen_datasets(model, eval_train=False, eval_val=False, eval_test=False,
         X_train=None, y_train=None, X_val=None, y_val=None, X_test=None, y_test=None):
    
    model.eval()                                    # switch to eval mode
    
    evals = {}
    
    with torch.no_grad():                           # no gradient in testing phasse
        if eval_train:
            y_pred_train = model(X_train).cpu().numpy()
            evals["train"] = mean_squared_error(y_train.cpu().numpy(), y_pred_train)
            
        if eval_val:
            y_pred_val = model(X_val).cpu().numpy()
            evals["val"] = mean_squared_error(y_val.cpu().numpy(), y_pred_val)
        
        if eval_test:
            y_pred_test = model(X_test).cpu().numpy()
            evals["test"] = mean_squared_error(y_test.cpu().numpy(), y_pred_test)
            
    return evals

In [7]:
def print_eval(evaluations: dict):
    print(f"MSE train: {evaluations['train']}")
    print(f"MSE validation: {evaluations['val']}")
    print(f"MSE test: {evaluations['test']}")

CPU

In [8]:
device = torch.device("cpu")

X_train_dev = X_train.to(device)
y_train_dev = y_train.to(device).view(-1,1)
X_val_dev = X_val.to(device)
y_val_dev = y_val.to(device).view(-1,1)
X_test_dev = X_test.to(device)
y_test_dev = y_test.to(device).view(-1,1)

model, seconds = train_and_measure_time(device, X_train_dev, y_train_dev, epochs=250, batch_size=512, learning_rate=0.01)
cpu_evaluations = eval_chosen_datasets(
    model, 
    eval_train=True, X_train=X_train_dev, y_train=y_train_dev, 
    eval_val=True, X_val=X_val_dev, y_val=y_val_dev, 
    eval_test=True, X_test=X_test_dev, y_test=y_test_dev, 
)

print_eval(cpu_evaluations)

cpu_results = cpu_evaluations
cpu_results["time"] = seconds

MSE train: 131.12391662597656
MSE validation: 124.09395599365234
MSE test: 153.34840393066406


GPU

In [9]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    
    # WITH COMMENTED WE CAN SEE THE POWER OF GPU
    
    # X_train = X_train.to(device)
    # y_train = y_train.to(device)
    
    # X_train = X_train.to(device)
    # y_train = y_train.to(device).view(-1,1)
    # X_val = X_val.to(device)
    # y_val = y_val.to(device).view(-1,1)
    # X_test = X_test.to(device)
    # y_test = y_test.to(device).view(-1,1)

    X_train_dev = X_train.to(device)
    y_train_dev = y_train.to(device).view(-1,1)
    X_val_dev = X_val.to(device)
    y_val_dev = y_val.to(device).view(-1,1)
    X_test_dev = X_test.to(device)
    y_test_dev = y_test.to(device).view(-1,1)
    
    model, seconds = train_and_measure_time(device, X_train_dev, y_train_dev, epochs=250, batch_size=512, learning_rate=0.01)
    gpu_evaluations = eval_chosen_datasets(
        model, 
        eval_train=True, X_train=X_train_dev, y_train=y_train_dev, 
        eval_val=True, X_val=X_val_dev, y_val=y_val_dev, 
        eval_test=True, X_test=X_test_dev, y_test=y_test_dev, 
    )

    print_eval(gpu_evaluations)
else:
    print("Not available")
    gpu_evaluations = {"train": 0, "val": 0, "test": 0}
    
gpu_results = gpu_evaluations
gpu_results["time"] = seconds

MSE train: 131.10179138183594
MSE validation: 123.68314361572266
MSE test: 153.0789031982422


In [10]:
df = pd.DataFrame([cpu_results, gpu_results], index=["CPU", "GPU"])
df

Unnamed: 0,train,val,test,time
CPU,131.123917,124.093956,153.348404,8.32
GPU,131.101791,123.683144,153.078903,6.71


In [11]:
def look_for_best_params():
    
    CANDIDATE_PARAMS = {
        "epochs": [100, 250, 500, 750],
        "batch_size": [64, 128, 196, 256, 512],
        "learning_rate": [0.001, 0.01]
    }

    device = torch.device("cpu")

    X_train_dev = X_train.to(device)
    y_train_dev = y_train.to(device).view(-1,1)
    X_val_dev = X_val.to(device)
    y_val_dev = y_val.to(device).view(-1,1)

    results = []

    for epochs in CANDIDATE_PARAMS["epochs"]:
        for batch_size in CANDIDATE_PARAMS["batch_size"]:
            for learning_rate in CANDIDATE_PARAMS["learning_rate"]:
                model, seconds = train_and_measure_time(device, X_train_dev, y_train_dev, epochs=epochs, batch_size=batch_size, learning_rate=learning_rate)
                evaluation = eval_chosen_datasets(model, X_val=X_val_dev, y_val=y_val_dev, eval_val=True)["val"]
                results.append({
                    "Time (s)": seconds,
                    "Eval MSE": evaluation,
                    "Epochs": epochs,
                    "Batch size": batch_size,
                    "Learning rate": learning_rate
                })
                print(f"Time: {seconds} s     Eval: {evaluation:.5f}     Epochs: {epochs}     Batch size: {batch_size}      Learning rate: {learning_rate}")
       
# 6-7 mins     
# RESULT
#   echos - 250
#   batch_size - 512
#   learining_rate - 0.01   
# look_for_best_params()