# Linear regression PyTorch

In [21]:
import sys
from pathlib import Path
project_path = str(Path().cwd().parent.resolve())
if project_path not in sys.path:
    sys.path.append(project_path)

# imports
import time
from common.utils import get_data, split

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import torch
from torch.utils.data import DataLoader, TensorDataset

In [22]:
data = get_data(only_numeric=True)
data.head()

Unnamed: 0,Previous qualification (grade),Admission grade,Age at enrollment,Curricular units 1st sem (credited),Curricular units 1st sem (enrolled),Curricular units 1st sem (evaluations),Curricular units 1st sem (approved),Curricular units 1st sem (grade),Curricular units 1st sem (without evaluations),Curricular units 2nd sem (credited),Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP
0,122.0,127.3,20,0,0,0,0,0.0,0,0,0,0,0,0.0,0,10.8,1.4,1.74
1,160.0,142.5,19,0,6,6,6,14.0,0,0,6,6,6,13.666667,0,13.9,-0.3,0.79
2,122.0,124.8,19,0,6,0,0,0.0,0,0,6,0,0,0.0,0,10.8,1.4,1.74
3,122.0,119.6,20,0,6,8,6,13.428571,0,0,6,10,5,12.4,0,9.4,-0.8,-3.12
4,100.0,141.5,45,0,6,9,5,12.333333,0,0,6,6,6,13.0,0,13.9,-0.3,0.79


In [23]:
X = data.drop("Admission grade", axis=1)
X = StandardScaler().fit_transform(X)       # normalize
X = torch.tensor(X, dtype=torch.float32)
y = data["Admission grade"]
y = torch.tensor(y, dtype=torch.float32)

X_train, X_val, X_test, y_train, y_val, y_test = split(X, y)

In [24]:
class LinearRegression(torch.nn.Module):
    def __init__(self, inputSize):
        super().__init__()
        self.linear = torch.nn.Linear(inputSize, 1)     # linear layer       y = w1*x1 + w2*x2 + ... + b

    def forward(self, x):
        return self.linear(x)
    

def train_and_measure_time(device, X_train, y_train, epochs = 200, batch_size=128, learning_rate=0.001, should_print_time=False):
    
    start = time.perf_counter()
          
    model = LinearRegression(X_train.shape[1]).to(device)
    criterion = torch.nn.MSELoss()                                          # MSE
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)       # optimized with params
    
    dataset = TensorDataset(X_train, y_train)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for _ in range(epochs):
        for X_batch, y_batch in loader:
            y_pred = model(X_batch)                     # X * W^T + b
            loss = criterion(y_pred, y_batch)           # 1/n ∑ (ytrue - ypred)^2       (MSE)
            
            optimizer.zero_grad()                       # clear gradient from previous steps
            loss.backward()                             # back propagration - calculates gradients of the loss 
            
            optimizer.step()                            # w = w - learning_rate * (∂loss / ∂w)
        
    end = time.perf_counter()
    
    if should_print_time:
        print(f"Training time: {end - start:.2f} seconds\n")
        
    return model, (round(end - start, 2))    

In [None]:
def eval_chosen_datasets(model, eval_train=False, eval_val=False, eval_test=False,
         X_train=None, y_train=None, X_val=None, y_val=None, X_test=None, y_test=None):
    
    model.eval()                                    # switch to eval mode
    
    evals = {}
    
    with torch.no_grad():                           # no gradient in testing phasse
        if eval_train:
            y_pred_train = model(X_train).cpu().numpy()
            evals["train"] = mean_squared_error(y_train.cpu().numpy(), y_pred_train)
            
        if eval_val:
            y_pred_val = model(X_val).cpu().numpy()
            evals["val"] = mean_squared_error(y_val.cpu().numpy(), y_pred_val)
        
        if eval_test:
            y_pred_test = model(X_test).cpu().numpy()
            evals["test"] = mean_squared_error(y_test.cpu().numpy(), y_pred_test)
            
    return evals

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split


def eval_pytorch(model, X, y):
    model.eval()
    with torch.no_grad():
        y_pred = model(X).cpu().numpy()
        return mean_squared_error(y.cpu().numpy(), y_pred)

def get_custom_dataframe_pytorch(device, X_train_val, y_train_val, X_test, y_test, cv=5, random_states=None): 
    
    results = np.empty((0, 4))
    
    for i in range(cv):
        random_state = random_states[i] if random_states else None
        X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=random_state)
        
        model, time = train_and_measure_time(device, X_train, y_train, epochs = 200, batch_size=128, learning_rate=0.001, should_print_time=False)
    
        
        y_pred_train = eval_pytorch(model, X_train, y_train)
        y_pred_val = eval_pytorch(model, X_val, y_val)
        y_pred_test = eval_pytorch(model, X_test, y_test)
        
        results = np.vstack([results, np.array([
            time,
            mean_squared_error(y_train, y_pred_train), 
            mean_squared_error(y_val, y_pred_val),
            mean_squared_error(y_test, y_pred_test)
        ])])
        
    result_df = pd.DataFrame(results.T, index=["Time", "Train", "Validation", "Test"])
    return result_df

In [26]:
def print_eval(evaluations: dict):
    print(f"MSE train: {evaluations['train']}")
    print(f"MSE validation: {evaluations['val']}")
    print(f"MSE test: {evaluations['test']}")

In [None]:
device = torch.device("cpu")

X_train_dev = X_train.to(device)
y_train_dev = y_train.to(device).view(-1,1)
X_val_dev = X_val.to(device)
y_val_dev = y_val.to(device).view(-1,1)
X_test_dev = X_test.to(device)
y_test_dev = y_test.to(device).view(-1,1)

get_custom_dataframe_pytorch(device, X_train_dev, y_train_dev, X_test, y_test)

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")

    X_train_dev = X_train.to(device)
    y_train_dev = y_train.to(device).view(-1,1)
    X_val_dev = X_val.to(device)
    y_val_dev = y_val.to(device).view(-1,1)
    X_test_dev = X_test.to(device)
    y_test_dev = y_test.to(device).view(-1,1)

    get_custom_dataframe_pytorch(device, X_train_dev, y_train_dev, X_test, y_test)
    
else:
    print("GPU not available")
    

CPU

In [27]:
device = torch.device("cpu")

X_train_dev = X_train.to(device)
y_train_dev = y_train.to(device).view(-1,1)
X_val_dev = X_val.to(device)
y_val_dev = y_val.to(device).view(-1,1)
X_test_dev = X_test.to(device)
y_test_dev = y_test.to(device).view(-1,1)

model, seconds = train_and_measure_time(device, X_train_dev, y_train_dev, epochs=100, batch_size=64, learning_rate=0.001)
cpu_evaluations = eval_chosen_datasets(
    model, 
    eval_train=True, X_train=X_train_dev, y_train=y_train_dev, 
    eval_val=True, X_val=X_val_dev, y_val=y_val_dev, 
    eval_test=True, X_test=X_test_dev, y_test=y_test_dev, 
)

print_eval(cpu_evaluations)

cpu_results = cpu_evaluations
cpu_results["time"] = seconds

MSE train: 131.06732177734375
MSE validation: 123.8985366821289
MSE test: 152.20675659179688


GPU

In [28]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    
    # WITH COMMENTED WE CAN SEE THE POWER OF GPU
    
    # X_train = X_train.to(device)
    # y_train = y_train.to(device)
    
    # X_train = X_train.to(device)
    # y_train = y_train.to(device).view(-1,1)
    # X_val = X_val.to(device)
    # y_val = y_val.to(device).view(-1,1)
    # X_test = X_test.to(device)
    # y_test = y_test.to(device).view(-1,1)

    X_train_dev = X_train.to(device)
    y_train_dev = y_train.to(device).view(-1,1)
    X_val_dev = X_val.to(device)
    y_val_dev = y_val.to(device).view(-1,1)
    X_test_dev = X_test.to(device)
    y_test_dev = y_test.to(device).view(-1,1)
    
    model, seconds = train_and_measure_time(device, X_train_dev, y_train_dev, epochs=250, batch_size=512, learning_rate=0.01)
    gpu_evaluations = eval_chosen_datasets(
        model, 
        eval_train=True, X_train=X_train_dev, y_train=y_train_dev, 
        eval_val=True, X_val=X_val_dev, y_val=y_val_dev, 
        eval_test=True, X_test=X_test_dev, y_test=y_test_dev, 
    )

    print_eval(gpu_evaluations)
else:
    print("Not available")
    gpu_evaluations = {"train": 0, "val": 0, "test": 0}
    
gpu_results = gpu_evaluations
gpu_results["time"] = seconds

MSE train: 132.399169921875
MSE validation: 125.33018493652344
MSE test: 156.39425659179688


In [29]:
df = pd.DataFrame([cpu_results, gpu_results], index=["CPU", "GPU"])
df

Unnamed: 0,train,val,test,time
CPU,131.067322,123.898537,152.206757,3.14
GPU,132.39917,125.330185,156.394257,7.03


In [30]:
def look_for_best_params():
    
    CANDIDATE_PARAMS = {
        "epochs": [100, 250, 500],
        "batch_size": [64, 128, 196, 256, 512],
        "learning_rate": [0.001, 0.005]
    }

    device = torch.device("cpu")

    X_train_dev = X_train.to(device)
    y_train_dev = y_train.to(device).view(-1,1)
    X_val_dev = X_val.to(device)
    y_val_dev = y_val.to(device).view(-1,1)

    results = []

    for epochs in CANDIDATE_PARAMS["epochs"]:
        for batch_size in CANDIDATE_PARAMS["batch_size"]:
            for learning_rate in CANDIDATE_PARAMS["learning_rate"]:
                model, seconds = train_and_measure_time(device, X_train_dev, y_train_dev, epochs=epochs, batch_size=batch_size, learning_rate=learning_rate)
                evaluation = eval_chosen_datasets(model, X_val=X_val_dev, y_val=y_val_dev, eval_val=True)["val"]
                results.append({
                    "Time (s)": seconds,
                    "Eval MSE": evaluation,
                    "Epochs": epochs,
                    "Batch size": batch_size,
                    "Learning rate": learning_rate
                })
                print(f"Time: {seconds} s     Eval: {evaluation:.5f}     Epochs: {epochs}     Batch size: {batch_size}      Learning rate: {learning_rate}")
       
# 6-7 mins     
# RESULT
#   echos - 250
#   batch_size - 512
#   learining_rate - 0.01   
look_for_best_params()

Time: 3.09 s     Eval: 123.97095     Epochs: 100     Batch size: 64      Learning rate: 0.001
Time: 2.96 s     Eval: 124.36595     Epochs: 100     Batch size: 64      Learning rate: 0.005
Time: 2.34 s     Eval: 123.64854     Epochs: 100     Batch size: 128      Learning rate: 0.001
Time: 2.39 s     Eval: 124.06177     Epochs: 100     Batch size: 128      Learning rate: 0.005
Time: 2.25 s     Eval: 146.99533     Epochs: 100     Batch size: 196      Learning rate: 0.001
Time: 2.37 s     Eval: 124.00691     Epochs: 100     Batch size: 196      Learning rate: 0.005
Time: 2.21 s     Eval: 206.74171     Epochs: 100     Batch size: 256      Learning rate: 0.001
Time: 2.31 s     Eval: 123.72475     Epochs: 100     Batch size: 256      Learning rate: 0.005
Time: 2.02 s     Eval: 1093.17920     Epochs: 100     Batch size: 512      Learning rate: 0.001
Time: 2.07 s     Eval: 123.45286     Epochs: 100     Batch size: 512      Learning rate: 0.005
Time: 7.24 s     Eval: 124.12977     Epochs: 250   