# Linear regression PyTorch

In [101]:
import sys
from pathlib import Path
project_path = str(Path().cwd().parent.resolve())
if project_path not in sys.path:
    sys.path.append(project_path)

# imports
import time
from common.utils import get_data

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import torch
from torch.utils.data import DataLoader, TensorDataset

In [102]:
data = get_data(only_numeric=True)
data.head()

Unnamed: 0,Previous qualification (grade),Admission grade,Age at enrollment,Curricular units 1st sem (credited),Curricular units 1st sem (enrolled),Curricular units 1st sem (evaluations),Curricular units 1st sem (approved),Curricular units 1st sem (grade),Curricular units 1st sem (without evaluations),Curricular units 2nd sem (credited),Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP
0,122.0,127.3,20,0,0,0,0,0.0,0,0,0,0,0,0.0,0,10.8,1.4,1.74
1,160.0,142.5,19,0,6,6,6,14.0,0,0,6,6,6,13.666667,0,13.9,-0.3,0.79
2,122.0,124.8,19,0,6,0,0,0.0,0,0,6,0,0,0.0,0,10.8,1.4,1.74
3,122.0,119.6,20,0,6,8,6,13.428571,0,0,6,10,5,12.4,0,9.4,-0.8,-3.12
4,100.0,141.5,45,0,6,9,5,12.333333,0,0,6,6,6,13.0,0,13.9,-0.3,0.79


In [103]:
RANDOM_STATES = [16, 6, 316, 50, 24]

X = data.drop("Admission grade", axis=1)
X = StandardScaler().fit_transform(X)       # normalize
X = torch.tensor(X, dtype=torch.float32)
y = data["Admission grade"]
y = torch.tensor(y, dtype=torch.float32)

X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2)

In [104]:
class LinearRegression(torch.nn.Module):
    def __init__(self, inputSize):
        super().__init__()
        self.linear = torch.nn.Linear(inputSize, 1)     # linear layer       y = w1*x1 + w2*x2 + ... + b

    def forward(self, x):
        return self.linear(x)
    

def train_and_measure_time(device, X_train, y_train, epochs = 200, batch_size=128, learning_rate=0.001, should_print_time=False):
    
    start = time.perf_counter()
          
    model = LinearRegression(X_train.shape[1]).to(device)
    criterion = torch.nn.MSELoss()                                          # MSE
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)       # optimized with params
    
    dataset = TensorDataset(X_train, y_train)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for _ in range(epochs):
        for X_batch, y_batch in loader:
            y_pred = model(X_batch)                     # X * W^T + b
            loss = criterion(y_pred, y_batch)           # 1/n ∑ (ytrue - ypred)^2       (MSE)
            
            optimizer.zero_grad()                       # clear gradient from previous steps
            loss.backward()                             # back propagration - calculates gradients of the loss 
            
            optimizer.step()                            # w = w - learning_rate * (∂loss / ∂w)
        
    end = time.perf_counter()
    
    if should_print_time:
        print(f"Training time: {end - start:.2f} seconds\n")
        
    return model, (round(end - start, 2))    

In [105]:
def eval_pytorch(model, X, y):
    model.eval()
    with torch.no_grad():
        y_pred = model(X).cpu().numpy()
        return mean_squared_error(y.cpu().numpy(), y_pred)

def get_custom_dataframe_pytorch(device, X_train_val, y_train_val, X_test, y_test, cv=5, random_states=None): 
    
    X_train_val = X_train_val.to(device)
    y_train_val = y_train_val.to(device).view(-1,1)
    X_test = X_test.to(device)
    y_test = y_test.to(device).view(-1,1)
    
    results = np.empty((0, 4))
    
    for i in range(cv):
        random_state = random_states[i] if random_states else None
        X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=random_state)
        
        model, time = train_and_measure_time(device, X_train, y_train, epochs = 200, batch_size=128, learning_rate=0.001, should_print_time=False)
    
        results = np.vstack([results, np.array([
            round(time, 2),
            eval_pytorch(model, X_train, y_train),
            eval_pytorch(model, X_val, y_val),
            eval_pytorch(model, X_test, y_test)
        ])])
        
    result_df = pd.DataFrame(results.T, index=["Time", "Train", "Validation", "Test"])
    return result_df

CPU

In [106]:
device = torch.device("cpu")
get_custom_dataframe_pytorch(device, X_train_val, y_train_val, X_test, y_test, random_states=RANDOM_STATES)

Unnamed: 0,0,1,2,3,4
Time,3.98,3.92,3.9,3.89,3.88
Train,133.465561,136.576492,137.662384,138.182816,138.057678
Validation,151.281281,138.67276,135.242599,132.157883,132.651184
Test,121.787094,123.202599,124.061874,122.410927,122.258476


GPU

In [107]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    result = get_custom_dataframe_pytorch(device, X_train_val, y_train_val, X_test, y_test, random_states=RANDOM_STATES)
    
else:
    result = "GPU not available"
    
    
result

Unnamed: 0,0,1,2,3,4
Time,6.13,6.03,5.93,5.94,5.97
Train,133.503296,136.579559,137.608017,138.175613,138.040924
Validation,151.162277,138.644592,134.946014,132.284073,132.542114
Test,121.610085,123.358261,123.904434,122.527168,122.279869
