In [4]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter

import matplotlib.pyplot as plt
from IPython.core.debugger import Pdb

# Definition of parameters

In [3]:
import os
import shutil

device = 'cuda'
print(device)
learning_rate = 0.00001
batch_size = 10_000
experiment_name = 'test_experiment'
# the directory experiment_name exists, delete it
# if os.path.exists(experiment_name):
#     shutil.rmtree(experiment_name)


cuda


# Data loading

In [7]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self, csv):
        # read the csv file
        self.df = pd.read_csv(csv, sep='\s+')
        # self.df = self.df.dropna(axis=0)
        # save cols
        self.input_cols = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
        self.output_cols = ['MEDV']
        


    def __len__(self):
        # TODO: here i will return the number of samples in the dataset
        return len(self.df)


    def __getitem__(self, idx):
        # read row, split in input and output and convert in tensors
        cur_sample = self.df.iloc[idx]
        # split the current sample in input and output (ground truth)
        cur_sample_x = cur_sample[self.input_cols]
        cur_sample_y = cur_sample[self.output_cols]
        # convert to tensor (torch format)
        #cur_sample_x = torch.tensor(cur_sample_x.tolist(), dtype=torch.float32, device=device)
        #cur_sample_y = torch.tensor(cur_sample_y.tolist(), dtype=torch.float32, device=device)
        cur_sample_x = torch.tensor(cur_sample_x.tolist(), dtype=torch.float32)
        cur_sample_y = torch.tensor(cur_sample_y.tolist(), dtype=torch.float32)
        # return the sample
        return cur_sample_x, cur_sample_y


  self.df = pd.read_csv(csv, sep='\s+')


In [8]:
# try to use the dataset
ds = Dataset('../datasets/BostonHousingDataset/train.csv')
# get first item
xx,yy = ds.__getitem__(0)
# print shapes
print(xx.shape)
print(yy.shape)

torch.Size([13])
torch.Size([1])


In [None]:
# create train and validation datasets
train_ds = Dataset('../datasets/BostonHousingDataset/train.csv')
val_ds =  Dataset('../datasets/BostonHousingDataset/val.csv')

In [None]:
# create train dataloader
train_dl = torch.utils.data.DataLoader(
    train_ds,
    batch_size = batch_size,
    drop_last = True,
    shuffle = True,
    num_workers = 4
)
# create validation dataloader
val_dl = torch.utils.data.DataLoader(
    val_ds,
    batch_size = batch_size,
    drop_last = False,
    shuffle = False,
    num_workers = 4
)

train_ds.__getitem__(1)

(tensor([-0.4176,  1.8728, -1.0734, -0.2726, -0.6107,  0.5861, -0.4329,  0.9208,
         -0.5230, -0.2271, -0.3956,  0.4411, -0.7672]),
 tensor([0.1488]))

# Network definition

In [None]:
# define network

class Net(nn.Module):

    def __init__(self):
        # initialize super class
        super(Net, self).__init__()
        self.layer1 = nn.Linear(13,128)
        self.layer2 = nn.ReLU()
        self.layer3 = nn.Linear(128,64)
        self.layer4 = nn.ReLU()
        self.layer5 = nn.Linear(64,32)
        self.layer6 = nn.ReLU()
        self.layer7 = nn.Linear(32,16)
        self.layer8 = nn.ReLU()
        self.layer9 = nn.Linear(16, 1)


    def forward(self, x):
        # apply layers in cascade
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer7(x)
        x = self.layer8(x)
        x = self.layer9(x)
        # return output
        return x


In [None]:
# let's test the network
net = Net()

# define random batch of 10 elements
inp = torch.rand(10, 13)

# forward
out = net(inp)

# let's print the shape
print(' Input shape is', inp.shape)
print('Output shape is', out.shape)

 Input shape is torch.Size([10, 13])
Output shape is torch.Size([10, 1])


In [None]:
# let's move the network in GPU
net.to(device)

# define random batch of 10 elements
inp = torch.rand(10, 13)

# move the batch in GPU
inp = inp.to(device)

# get the output
out = net(inp)

# let's print the shape
print(' Input shape is', inp.shape)
print('Output shape is', out.shape)

 Input shape is torch.Size([10, 13])
Output shape is torch.Size([10, 1])


# Define validation routine

In [None]:
# create validation routine
def validate(net, dl):
    # get final score
    score = 0
    # set network in eval mode
    net.eval()
    # at the end of epoch, validate model
    for inp, gt in dl:
        # move batch to gpu
        inp = inp.to(device)
        gt = gt.to(device)
        # get output
        with torch.no_grad():
            out = net(inp)
        # compare with gt
        cur_score = F.l1_loss(out, gt)
        # append
        score += cur_score 
    # at the end, average over batches
    score /= len(dl)
    # set network in training mode
    net.train()
    # return score
    return score
        
        

# Train

In [None]:
import shutil
# %load_ext tensorboard
#%reload_ext tensorboard
#%tensorboard --logdir={experiment_name}

In [None]:
from tqdm import tqdm
# define optimizer
optimizer = torch.optim.Adam(params=net.parameters(), lr=learning_rate)

# define summary writer
writer = SummaryWriter(experiment_name)

# initialize iteration number
n_iter = 0

# define best validation value
best_val = None

# for each epoch
for cur_epoch in tqdm(range(250)):
    # plot current epoch
    writer.add_scalar("epoch", cur_epoch, n_iter)
    # for each batch
    for inp, gt in train_dl:
        # move batch to gpu
        inp = inp.to(device)
        gt = gt.to(device)
        # reset gradients
        optimizer.zero_grad()
        # get output
        out = net(inp)
        # compute loss
        loss = F.l1_loss(out, gt)
        # compute backward
        loss.backward()
        # update weights
        optimizer.step()
        # plot
        writer.add_scalar("loss", loss.item(), n_iter)
        n_iter += 1
        
    # at the end, validate model
    cur_val = validate(net, val_dl)
    # plot validation
    writer.add_scalar("val", loss.item(), n_iter)
    # check if it is the best model so far
    if best_val is None or cur_val > best_val:
        # define new best val
        best_val = cur_val
        # save current model as best
        torch.save({
            'net': net.state_dict(),
            'opt': optimizer.state_dict(),
            'epoch': cur_epoch
        }, experiment_name + '_best.pth')
        # save last model
        torch.save({
            'net': net.state_dict(),
            'opt': optimizer.state_dict(),
            'epoch': cur_epoch
        }, experiment_name + '_last.pth')
    

  3%|▎         | 7/250 [00:01<00:43,  5.58it/s]

100%|██████████| 250/250 [00:43<00:00,  5.69it/s]


# Test

In [None]:
# create test dataset
test_ds =  Dataset('../datasets/BostonHousingDataset/test.csv')

# create dataloader
test_dl = torch.utils.data.DataLoader(
    test_ds,
    batch_size = batch_size,
    drop_last = False,
    shuffle = False
)

In [None]:
# TODO: load best network

state = torch.load(experiment_name + '_best.pth')
net.load_state_dict(state['data'])
optimizer.load_state_dict(state['opt'])
best_epoch = state['epoch']


KeyError: 'data'

In [None]:
test_value = validate(net, test_dl).item()

In [None]:
# see how many values of the test dataset are predicted correctly
correct = 0
total = 0
for inp, gt in test_dl:
    inp = inp.to(device)
    gt = gt.to(device)
    out = net(inp)
    total += gt.size(0)
    correct += (torch.abs(out - gt) < 1).sum().item()

print('Accuracy of the network on the test dataset: %d %%' % (100 * correct / total))

In [None]:
print(f'The model scored a MAE of {test_value:0.04f} over the testset.')

In [None]:
import timeit

a_cpu = torch.rand(1000, device='cpu')
b_cpu = torch.rand((1000, 1000), device='cpu')
a_mps = torch.rand(1000, device='cuda')
b_mps = torch.rand((1000, 1000), device='cuda')
 
print('cpu', timeit.timeit(lambda: a_cpu @ b_cpu, number=100_000))
print('mps', timeit.timeit(lambda: a_mps @ b_mps, number=100_000))

print('cpu', timeit.timeit(lambda: b_cpu @ b_cpu, number=10_000))
print('mps', timeit.timeit(lambda: b_mps @ b_mps, number=10_000))


b_cpu = torch.rand((10000, 10000), device='cpu')
b_mps = torch.rand((10000, 10000), device='cuda')

print('cpu', timeit.timeit(lambda: b_cpu @ b_cpu, number=100))
print('mps', timeit.timeit(lambda: b_mps @ b_mps, number=100))

In [None]:

def test_mps():
    a_mps = torch.rand(1000, device='cuda')
    b_mps = torch.rand((1000, 1000), device='cuda')
    a_mps @ b_mps

print('mps', timeit.timeit(lambda: test_mps(), number=100_0000))
# print('cpu', timeit.timeit(lambda: test_cpu(), number=100_000))

mps 25.301743762000115
