In [1]:
# Preprocessing the DataSet

from tabular_data import load_airbnb
import numpy as np

X, y = load_airbnb()
X.drop(532, axis=0, inplace=True)
y.drop(532, axis=0, inplace=True)
X['guests'] = X['guests'].str.replace('\'','').astype(np.float64)
X['bedrooms'] = X['bedrooms'].str.replace('\'','').astype(np.float64)

In [2]:
 # Task 1

import torch
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import torch.nn.functional as F

# DataSet Class
class AirbnbNightlyPriceImageDataset(torch.utils.data.Dataset):
    def __init__(self):
        super().__init__()
        self.X, self.y = X , y
    # Not dependent on index
    def __getitem__(self, index):
        features = torch.tensor(self.X.iloc[index])
        label = torch.tensor(self.y.iloc[index])
        return (features, label)

    def __len__(self):
        return len(self.X)

dataset = AirbnbNightlyPriceImageDataset()
print(dataset[10])
print(len(dataset))

batch_size = 16

# Split the data 
train_dataset, validation_dataset, test_dataset = random_split(dataset, [0.7, 0.15, 0.15], generator=torch.Generator().manual_seed(42))

# Create DataLoaders
train_loader=DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader=DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
test_loader=DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

len(train_dataset), len(validation_dataset), len(test_dataset)

(tensor([2.0000, 1.0000, 1.0000, 5.0000, 5.0000, 4.8000, 5.0000, 5.0000, 4.8000,
        8.0000, 1.0000], dtype=torch.float64), tensor(126))
889


(623, 133, 133)

In [3]:
# Task 2

# Linear Model
class LinearRegression(torch.nn.Module):

    def __init__(self):
        super().__init__()
        # Initialise the Parameters
        self.linear_layer = torch.nn.Linear(11,1) # 11 features, 1 label

    def forward(self, features):
        # Use the layers to process the features
        return self.linear_layer(features)

model = LinearRegression()

# Train function
def train(model, dataloader, epochs=10):
    for epoch in range(epochs):
        for batch in dataloader:
            features, labels = batch
            features = features.to(torch.float32) # Convert torch into the right format
            labels = labels.to(torch.float32) # Convert torch into the right format
            prediction = model(features)
            loss = F.mse_loss(prediction, labels)
            loss.backward()
            print(loss)
            
    return

train(model, test_dataset)

tensor(6225.5342, grad_fn=<MseLossBackward0>)
tensor(40599.8750, grad_fn=<MseLossBackward0>)
tensor(80.5699, grad_fn=<MseLossBackward0>)
tensor(731.9358, grad_fn=<MseLossBackward0>)
tensor(41494.6133, grad_fn=<MseLossBackward0>)
tensor(32702.2871, grad_fn=<MseLossBackward0>)
tensor(181890.0469, grad_fn=<MseLossBackward0>)
tensor(8752.2373, grad_fn=<MseLossBackward0>)
tensor(146546.1562, grad_fn=<MseLossBackward0>)
tensor(130453.2109, grad_fn=<MseLossBackward0>)
tensor(5220.3545, grad_fn=<MseLossBackward0>)
tensor(23076.0938, grad_fn=<MseLossBackward0>)
tensor(18732.2891, grad_fn=<MseLossBackward0>)
tensor(19343.8711, grad_fn=<MseLossBackward0>)
tensor(51325.4844, grad_fn=<MseLossBackward0>)
tensor(12476.6387, grad_fn=<MseLossBackward0>)
tensor(3886.6960, grad_fn=<MseLossBackward0>)
tensor(89861.2031, grad_fn=<MseLossBackward0>)
tensor(9649.5410, grad_fn=<MseLossBackward0>)
tensor(5787.6255, grad_fn=<MseLossBackward0>)
tensor(132155.5312, grad_fn=<MseLossBackward0>)
tensor(4180.6704, gr

  loss = F.mse_loss(prediction, labels)


tensor(5888.6338, grad_fn=<MseLossBackward0>)
tensor(13799.2939, grad_fn=<MseLossBackward0>)
tensor(25275.5469, grad_fn=<MseLossBackward0>)
tensor(5046.0801, grad_fn=<MseLossBackward0>)
tensor(439753.2188, grad_fn=<MseLossBackward0>)
tensor(15485.1318, grad_fn=<MseLossBackward0>)
tensor(7429.0400, grad_fn=<MseLossBackward0>)
tensor(3400.3315, grad_fn=<MseLossBackward0>)
tensor(333561.8750, grad_fn=<MseLossBackward0>)
tensor(2855.0193, grad_fn=<MseLossBackward0>)
tensor(9756.7783, grad_fn=<MseLossBackward0>)
tensor(30870.2637, grad_fn=<MseLossBackward0>)
tensor(40510.9766, grad_fn=<MseLossBackward0>)
tensor(21659.0938, grad_fn=<MseLossBackward0>)
tensor(16545.8398, grad_fn=<MseLossBackward0>)
tensor(7708.3584, grad_fn=<MseLossBackward0>)
tensor(6225.5342, grad_fn=<MseLossBackward0>)
tensor(40599.8750, grad_fn=<MseLossBackward0>)
tensor(80.5699, grad_fn=<MseLossBackward0>)
tensor(731.9358, grad_fn=<MseLossBackward0>)
tensor(41494.6133, grad_fn=<MseLossBackward0>)
tensor(32702.2871, grad_

In [4]:
# Task 3 


loss_fn = torch.nn.MSELoss() # This Loss function is better

# Train function with optimiser
def train(model, dataloader, epochs=10):

    optimiser = torch.optim.SGD(model.parameters(), lr=0.001)

    for epoch in range(epochs):
        for batch in dataloader:
            features, labels = batch
            features = features.to(torch.float32) # Convert torch into the right format
            labels = labels.to(torch.float32) # Convert torch into the right format
            prediction = model(features)
            loss = loss_fn(prediction, labels)
            loss.backward() # What does this do? Populates the gradients?
            print(loss)
            optimiser.step() # Optimiser step
            optimiser.zero_grad()
            
    return

train(model,train_loader)

tensor(45433.6406, grad_fn=<MseLossBackward0>)
tensor(2.4612e+11, grad_fn=<MseLossBackward0>)
tensor(3.6975e+11, grad_fn=<MseLossBackward0>)
tensor(1.1447e+12, grad_fn=<MseLossBackward0>)
tensor(6.6102e+12, grad_fn=<MseLossBackward0>)
tensor(3.3460e+13, grad_fn=<MseLossBackward0>)
tensor(2.2160e+14, grad_fn=<MseLossBackward0>)
tensor(1.0976e+15, grad_fn=<MseLossBackward0>)
tensor(3.6695e+15, grad_fn=<MseLossBackward0>)
tensor(1.8985e+16, grad_fn=<MseLossBackward0>)
tensor(1.2945e+17, grad_fn=<MseLossBackward0>)
tensor(1.2299e+18, grad_fn=<MseLossBackward0>)
tensor(7.7375e+18, grad_fn=<MseLossBackward0>)
tensor(3.3610e+19, grad_fn=<MseLossBackward0>)
tensor(1.9154e+20, grad_fn=<MseLossBackward0>)
tensor(1.0331e+21, grad_fn=<MseLossBackward0>)
tensor(4.2848e+21, grad_fn=<MseLossBackward0>)
tensor(1.8198e+22, grad_fn=<MseLossBackward0>)
tensor(8.4980e+22, grad_fn=<MseLossBackward0>)
tensor(2.1974e+23, grad_fn=<MseLossBackward0>)
tensor(1.0577e+24, grad_fn=<MseLossBackward0>)
tensor(4.3810

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)


In [25]:
# Task 4
from torch.utils.tensorboard import SummaryWriter

# Neural Networks Model - Updated with more Layers
class NeuralNetwork(torch.nn.Module):

    def __init__(self):
        super().__init__()
        # Initialise the Parameters
        self.layers = torch.nn.Sequential( # Update Model with more Layers
        torch.nn.Linear(11, 20),
        torch.nn.ReLU(),
        torch.nn.Linear(20, 10),
        torch.nn.ReLU(),
        torch.nn.Linear(10, 1)
        )

    def forward(self, features):
        # Use the layers to process the features
        return self.layers(features)

model = NeuralNetwork()
loss_fn = torch.nn.MSELoss()

# Train function with Tensorboard
def train(model, dataloader, epochs=15):

    optimiser = torch.optim.SGD(model.parameters(), lr=0.0001)

    writer = SummaryWriter()

    for epoch in range(epochs):
        batch_idx = 0
        current_loss = 0.0
        for batch in dataloader:
            features, labels = batch
            features = features.to(torch.float32) # Convert torch into the right format
            labels = labels.to(torch.float32) # Convert torch into the right format
            prediction = model(features)
            loss = loss_fn(prediction,labels)
            loss.backward() 
            optimiser.step() # Optimiser step
            optimiser.zero_grad()
            ls = loss.item()
            print("Loss", ls)
            batch_idx += 1
            current_loss = current_loss + ls
        
        # print (f"currentnt loss {current_loss} and batch index {batch_idx}")
        # print(f'Loss after mini-batch  ({epoch + 1} : {current_loss // batch_idx}')
            writer.add_scalar('loss',current_loss / batch_idx , epoch)
        
train(model,train_loader)

# The Plotting does not seem okay
# Do we visualize the 

Loss 32874.046875
Loss 33385.96484375
Loss 18677.79296875
Loss 6260.33251953125
Loss 29972.0390625
Loss 152446.515625
Loss 51624.62890625
Loss 13705.05859375
Loss 17883.490234375
Loss 38810.6171875
Loss 33240.375
Loss 21057.158203125
Loss 21708.265625
Loss 14642.0244140625
Loss 55883.1796875
Loss 50960.8046875
Loss 23199.65625
Loss 21984.55859375
Loss 42962.0625
Loss 65052.875
Loss 44503.12890625
Loss 27910.4453125
Loss 51964.23828125
Loss 94222.703125
Loss 61028.0078125
Loss 23614.3125
Loss 16019.2275390625
Loss 35824.36328125
Loss 14794.5458984375
Loss 49328.046875
Loss 18435.94140625
Loss 31823.076171875
Loss 27371.291015625
Loss 74436.7890625
Loss 16412.806640625
Loss 34907.51953125
Loss 21182.865234375
Loss 48917.1640625
Loss 21511.837890625
Loss 35571.33984375
Loss 15856.921875
Loss 52315.2734375
Loss 36915.86328125
Loss 16096.279296875
Loss 45267.6015625
Loss 22541.26171875
Loss 38462.34765625
Loss 16142.451171875
Loss 14949.314453125
Loss 28321.900390625
Loss 87435.9140625
Loss

In [30]:
# Task 5 

# yaml file
''' 
optimiser: SGD
lr: 0.001
hidden_layer_width: 32
depth: 5
'''
# Define function get_nn_config()
import yaml
def get_nn_config():
    with open('nn_config.yaml', 'r') as stream:
    # Converts yaml document to python object
        dictionary=yaml.safe_load(stream)
    return dictionary

# Retrieve config dictionary
nn_config = get_nn_config()

# Redefine NeuralNetwork to include the custom numbers of hidden layers (depth) and hidden layers width
class NeuralNetwork(torch.nn.Module):

    def __init__(self):
        super().__init__()
        # Initialise the Parameters
        #self.linear_layer = torch.nn.Linear(11,1) # 11 features, 1 label

        self.layers = torch.nn.Sequential()
        self.layers.add_module("Input Layer", torch.nn.Linear(11, nn_config['hidden_layer_width'])) # Input layer
        self.layers.add_module("ReLU", torch.nn.ReLU())
        for i in range(nn_config['depth'] - 2): #  The input and the first linear layer are already taken into account
            self.layers.add_module("Hidden Layer", torch.nn.Linear(nn_config['hidden_layer_width'], nn_config['hidden_layer_width'])) # Hidden Layer
            self.layers.add_module("Hidden ReLU", torch.nn.ReLU())
        self.layers.add_module("Output Layer", torch.nn.Linear(nn_config['hidden_layer_width'], 1))# output layer
    

    def forward(self, features):
        # Use the layers to process the features
        return self.layers(features)

model = NeuralNetwork()

# Train function with config 
def train(model, dataloader, nn_config, epochs=15):

    # Set optimiser with lr from nn_config
    if nn_config['optimiser'] == "SGD":
        optimiser = torch.optim.SGD(model.parameters(), lr=nn_config['lr'])

    elif nn_config['optimiser'] == "Adam":
        optimiser = torch.optim.Adam(model.parameters(), lr=nn_config['lr'])

    elif nn_config['optimiser'] == "Adagrad":
        optimiser = torch.optim.Adagrad(model.parameters(), lr=nn_config['lr'])

    writer = SummaryWriter()

    batch_idx = 0

    for epoch in range(epochs):
        batch_idx = 0
        current_loss = 0.0
        for batch in dataloader:
            features, labels = batch
            features = features.to(torch.float32) # Convert torch into the right format
            labels = labels.to(torch.float32) # Convert torch into the right format
            prediction = model(features)
            loss = loss_fn(prediction,labels)
            loss.backward() 
            optimiser.step() # Optimiser step
            optimiser.zero_grad()
            ls = loss.item()
            print("Loss", ls)
            batch_idx += 1
            current_loss = current_loss + ls
        
        # print (f"currentnt loss {current_loss} and batch index {batch_idx}")
        # print(f'Loss after mini-batch  ({epoch + 1} : {current_loss // batch_idx}')
            writer.add_scalar('loss',current_loss / batch_idx , epoch)

train(model,train_loader,nn_config)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Loss 18310.115234375
Loss 22052.578125
Loss 1733978752.0
Loss 59853812.0
Loss 16750.033203125
Loss 15768.2998046875
Loss 10666.501953125
Loss 3181.186279296875
Loss 6162.72802734375
Loss 3274.758056640625
Loss 6310.32373046875
Loss 15342.03125
Loss 15253.5
Loss 26386.3984375
Loss 10649.123046875
Loss 31295.318359375
Loss 25279.4453125
Loss 5948.83251953125
Loss 8617.21875
Loss 7330.173828125
Loss 39545.16015625
Loss 22756.107421875
Loss 4493.6162109375
Loss 3573.541748046875
Loss 26726.978515625
Loss 5459.2275390625
Loss 3902.8896484375
Loss 11376.8916015625
Loss 6706.4072265625
Loss 17096.822265625
Loss 11234.052734375
Loss 35898.1640625
Loss 23220.462890625
Loss 14199.22265625
Loss 4454.2490234375
Loss 21123.3828125
Loss 9465.3984375
Loss 27822.3125
Loss 8875.0322265625
Loss 6949.73291015625
Loss 12910.408203125
Loss 7499.80126953125
Loss 5606.97314453125
Loss 2400.288330078125
Loss 19259.853515625
Loss 18634.17578125
Loss 9307.646484375
Loss 21439.77734375
Loss 3400.17626953125
Loss

In [32]:
# Task 6

import os
import time
import json

def save_model(best_model, best_hyperparameters, best_metrics):
    '''
        Creates a models folder, then within the models' folder creates a regression folder and finally creates a last neural networks folder where it stores the model, a dictionary of its hyperparameters and a dictionary of its metrics
        
        Parameters
        ----------
        folder_name: str
            A string used to name the folder to be created
        
        best_model: pytorch model
            A model from pythorch
        
        best_hyperparameters: dict
            A dictionary containing the optimal hyperparameters configuration
        
        best_metrics: dict 
            A dictionary containing the test metrics obtained using the best model   

        Returns
        -------
        None       
    '''

    # Create Models folder
    models_dir = 'airbnb-property-listings/models'
    current_dir = os.path.dirname(os.getcwd())
    models_path = os.path.join(current_dir, models_dir)
    if os.path.exists(models_path) == False:
        os.mkdir(models_path)

    # Create regression folder
    regression_dir = 'airbnb-property-listings/models/regression'
    current_dir = os.path.dirname(os.getcwd())
    regression_path = os.path.join(current_dir, regression_dir)
    if os.path.exists(regression_path) == False:
        os.mkdir(regression_path)

    # Create neural_networks folder
    nn_name_dir = os.path.join(regression_path,'neural_networks') # Create the neural network folder
    current_dir = os.path.dirname(os.getcwd())
    nn_name_path = os.path.join(current_dir, nn_name_dir)
    if os.path.exists(nn_name_path) == False:
        os.mkdir(nn_name_path)

    # Create a Timestamp folder
    timestamp_dir = os.path.join(nn_name_dir,time.strftime("%Y-%m-%d_%H:%M:%S")) # Create the timestamp folder
    current_dir = os.path.dirname(os.getcwd())
    timestamp_path = os.path.join(current_dir, timestamp_dir)
    if os.path.exists(timestamp_path) == False:
        os.mkdir(timestamp_path)

    # Save the model in a file called model.pt
    torch.save(best_model, os.path.join(timestamp_path, 'model.pt')) 
   
    # Save the hyperparameters in a file called hyperparameters.json
    with open(os.path.join(timestamp_path, 'hyperparameters.json'), 'w') as fp: 
            json.dump(best_hyperparameters, fp)

    # Save the metrics in a file called metrics.json
    with open(os.path.join(timestamp_path, 'metrics.json'), 'w') as fp:
            json.dump(best_metrics, fp)

    return


# Define the model

model = NeuralNetwork()

best_model = train(model,train_loader,nn_config)

# Define the hyperparemeters

best_hyperparameters = get_nn_config()

# Calculate the metrics

''' 
The RMSE loss of your model under a key called RMSE_loss for training, validation, and test sets
The R^2 score of your model under a key called R_squared for training, validation, and test sets
The time taken to train the model under a key called training_duration
The average time taken to make a prediction under a key called inference_latency

'''
best_metrics = {

    'RMSE_loss' : 0, #[training, validation, test], # Need to Calculate the metrics
    'R^2' : 0, #[training, validation, test],
    'training_duration' : 0,
    'inference_latency' : 0,
}

save_model(best_model, best_hyperparameters, best_metrics)

Loss 46977.3359375
Loss 11844.857421875
Loss 87655.125
Loss 13715.0302734375
Loss 94273416.0
Loss 32588.1640625
Loss 33342.234375
Loss 14352.0283203125
Loss 35434.6171875
Loss 64561.578125
Loss 27883.984375
Loss 26150260736.0
Loss 4.908231502803763e+16
Loss inf
Loss inf
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan


In [None]:
# Task 7


