# PeDistance Linear Regression

### Initial settings

In [2]:
import numpy as np
import torch
import pandas as pd
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Used as file name where saving model state
model_file_name="reg_state.pth"

# Get device based on available hardware
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu') # don't have GPU 
    return device

# Convert a df to tensor to be used in pytorch
def df_to_tensor(df):
    device = get_device()
    return torch.tensor(df.to_numpy()).float().to(device)

### Import Dataset

In [3]:
# Filtered fields read from dataset csv file
fields = [
    'Distance', 
    'PedTopLeftX', 
    'PedTopLeftY', 
    'PedTopRightX', 
    'PedTopRightY',
    'PedBottomLeftX', 
    'PedBottomLeftY',
    'PedBottomRightX',
    'PedBottomRightY'
]

df = pd.read_csv (r'./Dataset.csv', usecols=fields)
print(df.shape)

(689, 9)


### Splitting dataset


In [4]:
# sklearn function to split pandas dataframe into train and test set
train, test = train_test_split(df, test_size = 0.3)

In [5]:
# parsing dataframes to pytorch tensor
Y_training = df_to_tensor(train.Distance)
Y_testing = df_to_tensor(test.Distance)

print(Y_training.shape)
print(Y_testing.shape)

torch.Size([482])
torch.Size([207])


#### Getting width  height from raw 2D screen points

In [6]:
X_training = df_to_tensor(train.apply(lambda el :  pd.Series([abs(el.PedTopRightX - el.PedTopLeftX), abs(el.PedBottomLeftY - el.PedTopLeftY)], index=['width', 'height']), axis=1))
X_testing = df_to_tensor(test.apply(lambda el :  pd.Series([abs(el.PedTopRightX - el.PedTopLeftX), abs(el.PedBottomLeftY - el.PedTopLeftY)], index=['width', 'height']), axis=1))

print(X_training.shape)
print(X_testing.shape)

torch.Size([482, 2])
torch.Size([207, 2])


### Linear Regression Module

In [20]:
from pathlib import Path

class LinearRegressor(nn.Module):
    def __init__(self, in_size, out_size):
        super(LinearRegressor, self).__init__()
        self.criterion = nn.MSELoss()
        self.linear = nn.Linear(in_size,out_size)       
        
    # used to train linear regressor
    def fit(self, X_training, Y_training, lr= 0.001, epochs=200000, momentum=0.0, reset_parameters= False):
        self.reset_parameters(reset_parameters)
        
        self.mean = X_training.mean(0)
        self.stds = X_training.std(0)
        X_training_norm = self._data_normalization(X_training)
                
        self.optimizer = torch.optim.SGD(reg.parameters(), lr=lr, momentum=momentum)
        self.writer = SummaryWriter('logs/linear_regressor_lr' + str(lr) + '_momentum' + str(momentum) + '_epochs' + str(epochs))
        
        self._training(X_training_norm, Y_training, lr, epochs, momentum)
       
    def _training(self, X_training_norm, Y_training, l, epochs, momentum):
        reg.train()
        for e in range(epochs):           
            preds_train = self.forward(X_training_norm)
            l = self.criterion(preds_train.view(-1),Y_training)

            self.writer.add_scalar('loss/train', l.item(), global_step=e)

            l.backward()

            self.optimizer.step()    
            self.optimizer.zero_grad()
            
#             reg.eval()
#             with torch.set_grad_enabled(False):
#                 preds_test = reg(X_testing_norm)
#                 l = criterion(preds_test.view(-1),Y_testing)
#                 writer.add_scalar('loss/test', l.item(), global_step=e)    
        
    # used to get prediction based on trained linear regressor
    def predict(self, X):  
        X_norm = self._data_normalization(X)
        return self.linear(X_norm)
       
    # used to get final score tensor and predicted tensor based on train or test features and labels
    def score(self, X, Y):
        Y_preds = self.predict(X)
        l = self.criterion(Y_preds.view(-1),Y)
        return l, Y_preds
    
    # used to reset parameters before training
    def reset_parameters(self, reset_parameters= False):
        if reset_parameters:
            for layer in self.children():
                if hasattr(layer, 'reset_parameters'):
                    layer.reset_parameters()
    
    # used to load parameters and model state to make predictions or continue training
    def load_state(self, path):
        if Path(path).exists():
            checkpoint = torch.load(path)
            self.mean = checkpoint["mean"]
            self.stds = checkpoint["stds"]
            self.load_state_dict(checkpoint["model_state_dict"])
    
    # used to save trained model state
    def save_state(self, path):
         torch.save({
            'mean': self.mean,
            'stds': self.stds,
            'model_state_dict': self.state_dict(),
            }, path)
        
    def _data_normalization(self, data):
        return (data-self.mean)/self.stds        
    
    def forward(self,x):
        result = self.linear(x)
        return result    

### Modular Linear Regression

In [21]:
reg = LinearRegressor(2,1).to(get_device())

In [14]:
reg.load_state(model_file_name)

In [17]:
reg.fit(X_training, Y_training)
reg.save_state(model_file_name)

In [15]:
loss_train, Y_train_preds = reg.score(X_training, Y_training)
print("Loss train: " + str(loss_train.item()))
for p,c in zip(Y_training, Y_train_preds):
      print([p.item(), c.item()])

Loss train: 3.14652681350708
[8.32291030883789, 8.460108757019043]
[5.0554304122924805, 0.6458368301391602]
[10.745719909667969, 13.33796501159668]
[9.095480918884277, 11.239663124084473]
[18.3211727142334, 16.9089412689209]
[18.8629207611084, 17.17870330810547]
[7.16493034362793, 6.312450408935547]
[5.186280727386475, 0.9428377151489258]
[12.050487518310547, 13.471766471862793]
[6.642959117889404, 5.992827892303467]
[18.772342681884766, 16.9089412689209]
[10.841412544250488, 12.324480056762695]
[9.012786865234375, 10.833433151245117]
[10.583412170410156, 12.924891471862793]
[17.75496482849121, 16.30694580078125]
[20.433074951171875, 16.973499298095703]
[17.374425888061523, 16.38829231262207]
[12.489852905273438, 14.393385887145996]
[13.587333679199219, 14.758689880371094]
[6.479678630828857, 4.646100997924805]
[13.669398307800293, 14.721942901611328]
[7.251957893371582, 8.033415794372559]
[20.085269927978516, 17.766002655029297]
[19.03744125366211, 17.058521270751953]
[9.4630250930786

In [16]:
loss_testing, Y_testing_preds = reg.score(X_testing, Y_testing)
print("Loss testing: " + str(loss_testing.item()))
for p,c in zip(Y_testing, Y_testing_preds):
    print([p.item(), c.item()])

Loss testing: 2.606541156768799
[6.190547943115234, 4.590981483459473]
[14.537843704223633, 15.664024353027344]
[10.52283763885498, 10.890642166137695]
[5.2639241218566895, 1.5521831512451172]
[10.358003616333008, 11.432764053344727]
[5.964507579803467, 3.836306571960449]
[10.81712818145752, 12.538619041442871]
[13.462961196899414, 14.424872398376465]
[7.010646820068359, 7.701183795928955]
[17.45148468017578, 17.001312255859375]
[7.5683746337890625, 7.250857830047607]
[15.371305465698242, 15.627278327941895]
[16.31889533996582, 15.850855827331543]
[5.03120756149292, 0.4069852828979492]
[19.537919998168945, 16.11485481262207]
[7.905259132385254, 9.262048721313477]
[12.0525541305542, 14.337760925292969]
[14.909192085266113, 14.424872398376465]
[18.79692268371582, 16.966150283813477]
[13.712966918945312, 15.207935333251953]
[21.625946044921875, 17.090007781982422]
[14.892918586730957, 16.05923080444336]
[10.568982124328613, 12.265181541442871]
[10.061871528625488, 11.277419090270996]
[19.

### Imperative Linear Regression

In [24]:
lr = 0.0001
momentum = 0.99
epochs = 200000

writer = SummaryWriter('logs/linear_regressor_lr' + str(lr) + '_momentum' + str(momentum) + '_epochs' + str(epochs))

#normalizzazione dei dati
mean = X_training.mean(0)
stds = X_training.std(0)

X_training_norm = (X_training-mean)/stds
X_testing_norm = (X_testing-mean)/stds

reg = LinearRegressor(2,1).to(get_device())
criterion = nn.MSELoss()

optimizer = torch.optim.SGD(reg.parameters(),lr=lr, momentum=momentum)

for e in range(epochs):
    reg.train()
    preds_train = reg(X_training_norm)
    l = criterion(preds_train.view(-1),Y_training)

    writer.add_scalar('loss/train', l.item(), global_step=e)
    
    l.backward()
    
    optimizer.step()    
    optimizer.zero_grad()
        
    reg.eval()
 
    with torch.set_grad_enabled(False):
        preds_test = reg(X_testing_norm)
        l = criterion(preds_test.view(-1),Y_testing)
        writer.add_scalar('loss/test', l.item(), global_step=e)