In [1]:
# Preprocessing the DataSet

from tabular_data import load_airbnb
import numpy as np

X, y = load_airbnb()
X.drop(532, axis=0, inplace=True)
y.drop(532, axis=0, inplace=True)
X['guests'] = X['guests'].str.replace('\'','').astype(np.float64)
X['bedrooms'] = X['bedrooms'].str.replace('\'','').astype(np.float64)

In [2]:
 # Task 1

import torch
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import torch.nn.functional as F


# DataSet Class
class AirbnbNightlyPriceImageDataset(torch.utils.data.Dataset):
    def __init__(self):
        super().__init__()
        self.X, self.y = X , y
    # Not dependent on index
    def __getitem__(self, index):
        features = torch.tensor(self.X.iloc[index])
        label = torch.tensor(self.y.iloc[index])
        return (features, label)

    def __len__(self):
        return len(self.X)

dataset = AirbnbNightlyPriceImageDataset()
print(dataset[10])
print(len(dataset))

batch_size = 16

# Split the data 
train_dataset, validation_dataset, test_dataset = random_split(dataset, [0.7, 0.15, 0.15], generator=torch.Generator().manual_seed(42))

# Create DataLoaders
train_loader=DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader=DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
test_loader=DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

features, label = next(iter(train_loader))

len(train_dataset), len(validation_dataset), len(test_dataset)

(tensor([2.0000, 1.0000, 1.0000, 5.0000, 5.0000, 4.8000, 5.0000, 5.0000, 4.8000,
        8.0000, 1.0000], dtype=torch.float64), tensor(126))
889


(623, 133, 133)

In [3]:
# Task 2

# Linear Model
class LinearRegression(torch.nn.Module):

    def __init__(self):
        super().__init__()
        # Initialise the Parameters
        self.linear_layer = torch.nn.Linear(11,1) # 11 features, 1 label

    def forward(self, features):
        # Use the layers to process the features
        return self.linear_layer(features)

model = LinearRegression()

# Train function
def train(model, dataloader, epochs=10):
    for epoch in range(epochs):
        for batch in dataloader:
            features, labels = batch
            features = features.to(torch.float32) # Convert torch into the right format
            labels = labels.to(torch.float32) # Convert torch into the right format
            prediction = model(features)
            loss = F.mse_loss(prediction, labels)
            loss.backward()
            print(loss.item())
            
    return

train(model, test_dataset)

8976.1923828125
41681.20703125
144.17495727539062
1080.975830078125
46574.7734375
36606.34765625
189930.9375
9511.267578125
165432.3125
142303.0625
8160.90283203125
28981.203125
25553.189453125
25264.111328125
58445.6015625
15704.583984375
4660.96630859375
100728.015625
11439.498046875
6745.86279296875
138074.40625
5712.9130859375
33724.27734375
37384.95703125
75917.65625
6645.6953125
1690.5792236328125
28790.171875
14152.037109375
10111.798828125
8197.2958984375
36691.36328125
39970.93359375
5218.03515625
9230.552734375
191564.46875
13009.775390625
87863.5234375
1968.2587890625
16388.71875
20189.626953125
31201.05859375
15193.8759765625
15467.3564453125
4577.37353515625
5488.25341796875
11573.484375
17792.314453125
18158.97265625
1293882.375
23547.060546875
16719.0859375
60729.82421875
305.7115173339844
4744.9384765625
15762.4560546875
5627.78369140625
66758.0
8378.21875
11769.4951171875
22177.70703125
6933.27880859375
15309.0556640625
1366.8486328125
19752.498046875
5868.55908203125


  loss = F.mse_loss(prediction, labels)


In [4]:
# Task 3 

model = LinearRegression()
loss_fn = torch.nn.MSELoss() # This Loss function is better

# Train function with optimiser
def train(model, dataloader, epochs=100):

    optimiser = torch.optim.SGD(model.parameters(), lr=0.0001)

    for epoch in range(epochs):
        for batch in dataloader:
            features, labels = batch
            features = features.to(torch.float32) # Convert torch into the right format
            labels = labels.to(torch.float32) # Convert torch into the right format
            prediction = model(features)
            loss = loss_fn(prediction, labels)
            loss.backward() # What does this do? Populates the gradients?
            optimiser.step() # Optimiser step
            optimiser.zero_grad()
        print(loss.item())   
    return

train(model,train_loader)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


18846.064453125
5568.08984375
32218.1015625
9180.0224609375
22692.2734375
35139.41796875
30036.26171875
8397.1875
16225.44921875
5025.37109375
13364.57421875
24371.115234375
5245.7021484375
3320.156494140625
42973.484375
6852.39501953125
5323.072265625
6540.71728515625
6123.32958984375
5283.3291015625
29783.53125
5297.20947265625
13820.828125
9111.0693359375
39330.5625
28421.916015625
8168.74951171875
8301.4892578125
4798.0419921875
5464.75244140625
6723.8271484375
16827.951171875
7520.798828125
9887.7626953125
2329.098388671875
3342.658935546875
4518.271484375
2227.049560546875
10585.759765625
4580.3095703125
1735.8590087890625
3961.9833984375
4267.80322265625
2776.515869140625
16631.537109375
8746.634765625
4288.75048828125
7566.67236328125
36799.6015625
9039.3701171875
16539.017578125
5241.47998046875
19978.546875
5470.1650390625
2831.311767578125
12642.9228515625
22870.111328125
23769.658203125
2752.163818359375
24665.65625
15770.7958984375
3930.99072265625
17506.7578125
15089.2841

In [6]:
# Task 4
from torch.utils.tensorboard import SummaryWriter
torch.manual_seed(10)

# Neural Networks Model - Updated with more Layers
class NeuralNetwork(torch.nn.Module):

    def __init__(self):
        super().__init__()
        # Initialise the Parameters
        self.layers = torch.nn.Sequential( # Update Model with more Layers
        torch.nn.Linear(11, 512),
        torch.nn.ReLU(),
        torch.nn.Linear(512, 256),
        torch.nn.ReLU(),
        torch.nn.Linear(256, 1)
        )

    def forward(self, features):
        # Use the layers to process the features
        return self.layers(features)

model = NeuralNetwork()
loss_fn = torch.nn.MSELoss()

# Train function with Tensorboard
def train(model, dataloader, epochs=20):

    optimiser = torch.optim.Adam(model.parameters(), lr=0.001)

    writer = SummaryWriter()

    for epoch in range(epochs):
        batch_idx = 0
        current_loss = 0.0
        for batch in dataloader:
            features, labels = batch
            features = features.to(torch.float32) # Convert torch into the right format
            labels = labels.to(torch.float32) # Convert torch into the right format
            prediction = model(features)
            loss = loss_fn(prediction,labels)
            loss.backward() 
            optimiser.step() # Optimiser step
            optimiser.zero_grad()
            ls = loss.item()
            batch_idx += 1
            current_loss = current_loss + ls
            # writer.add_scalar("Loss - Task 4",ls, epoch)
        
        # print (f"currentnt loss {current_loss} and batch index {batch_idx}")
        # print(f'Loss after mini-batch  ({epoch + 1} : {current_loss // batch_idx}')
        writer.add_scalar('loss',current_loss / batch_idx , epoch)
        print("Loss", current_loss / batch_idx)
            
        
train(model,train_loader)

# The Plotting does not seem okay
# Do we visualize the 

Loss 18883.622314453125
Loss 14252.867807241586
Loss 13720.902005709135
Loss 13595.596905048076
Loss 13638.107553335336
Loss 13656.456214317908
Loss 13469.92507386819
Loss 13545.59088291266
Loss 13731.53598883213
Loss 13769.813038361379
Loss 13649.937318459535
Loss 13513.16968399439
Loss 13622.061172876603
Loss 13554.998904497195
Loss 13568.152087089344
Loss 13661.958408453525
Loss 13621.658065404647
Loss 13620.61389473157
Loss 13538.938254331932
Loss 13760.293150290465


In [None]:
import numpy

# evaluate the model
def evaluate_model(test_dl, model):
    predictions, actuals = list(), list()
    for i, (inputs, targets) in enumerate(test_dl):
        # evaluate the model on the test set
        yhat = model(inputs)
        # retrieve numpy array
        yhat = yhat.detach().numpy()
        actual = targets.numpy()
        actual = actual.reshape((len(actual), 1))
        # round to class values
        yhat = yhat.round()
        # store
        predictions.append(yhat)
        actuals.append(actual)
    predictions, actuals = vstack(predictions), vstack(actuals)
    # calculate accuracy
    acc = accuracy_score(actuals, predictions)
    return acc

In [6]:
# Task 5 

torch.manual_seed(10) 

# yaml file content
''' 
optimiser: SGD
lr: 0.001
hidden_layer_width: 32
depth: 5
'''
# Define function get_nn_config()
import yaml
def get_nn_config():
    with open('nn_config.yaml', 'r') as stream:
    # Converts yaml document to python object
        dictionary=yaml.safe_load(stream)
    return dictionary

# Retrieve config dictionary
nn_config = get_nn_config()

# Redefine NeuralNetwork to include the custom numbers of hidden layers (depth) and hidden layers width
class NeuralNetwork(torch.nn.Module):

    def __init__(self):
        super().__init__()
        # Initialise the Parameters
        #self.linear_layer = torch.nn.Linear(11,1) # 11 features, 1 label

        self.layers = torch.nn.Sequential()
        self.layers.add_module("Input Layer", torch.nn.Linear(11, nn_config['hidden_layer_width'])) # Input layer
        self.layers.add_module("ReLU", torch.nn.ReLU())
        for i in range(nn_config['depth'] - 2): #  The input and the first linear layer are already taken into account
            self.layers.add_module("Hidden Layer", torch.nn.Linear(nn_config['hidden_layer_width'], nn_config['hidden_layer_width'])) # Hidden Layer
            self.layers.add_module("Hidden ReLU", torch.nn.ReLU())
        self.layers.add_module("Output Layer", torch.nn.Linear(nn_config['hidden_layer_width'], 1))# output layer
    

    def forward(self, features):
        # Use the layers to process the features
        return self.layers(features)

model = NeuralNetwork()

# Train function with config 
def train(model, dataloader, nn_config, epochs=15):

    # Set optimiser with lr from nn_config
    if nn_config['optimiser'] == "SGD":
        optimiser = torch.optim.SGD(model.parameters(), lr=nn_config['lr'])

    elif nn_config['optimiser'] == "Adam":
        optimiser = torch.optim.Adam(model.parameters(), lr=nn_config['lr'])

    elif nn_config['optimiser'] == "Adagrad":
        optimiser = torch.optim.Adagrad(model.parameters(), lr=nn_config['lr'])

    writer = SummaryWriter()

    batch_idx = 0

    for epoch in range(epochs):
        batch_idx = 0
        current_loss = 0.0
        for batch in dataloader:
            features, labels = batch
            features = features.to(torch.float32) # Convert torch into the right format
            labels = labels.to(torch.float32) # Convert torch into the right format
            prediction = model(features)
            loss = loss_fn(prediction,labels)
            loss.backward() 
            optimiser.step() # Optimiser step
            optimiser.zero_grad()
            ls = loss.item()
            print("Loss", ls)
            batch_idx += 1
            current_loss = current_loss + ls
        writer.add_scalar('loss - Task 5',current_loss / batch_idx , epoch)
        for batch in val_dataloader:
            features, labels = batch
            features = features.to(torch.float32) # Convert torch into the right format
            labels = labels.to(torch.float32) # Convert torch into the right format
            prediction = model(features)
            loss = loss_fn(prediction,labels)
            #loss.backward() 
            #optimiser.step() # Optimiser step
            #optimiser.zero_grad()
            ls = loss.item()
            print("Loss", ls)
            batch_idx += 1
            current_loss = current_loss + ls
        # print (f"currentnt loss {current_loss} and batch index {batch_idx}")
        # print(f'Loss after mini-batch  ({epoch + 1} : {current_loss // batch_idx}')
            writer.add_scalar('loss - Task 5',current_loss / batch_idx , epoch)

train(model,train_loader,nn_config)

Loss 25287.65234375
Loss 32571.419921875
Loss 184456896.0
Loss 38007.43359375
Loss 1116664758272.0
Loss 4792146.0
Loss 4797858.0
Loss 4038146.25
Loss 7114414.5
Loss 4686036.0
Loss 4604442.0
Loss 4813102.0
Loss 4839096.5
Loss 4513332.0
Loss 4675208.0
Loss 4933570.0
Loss 4901843.0
Loss 4764189.0
Loss 4529743.0
Loss 4559428.5
Loss 4611916.5
Loss 4440800.5
Loss 4452646.5
Loss 4380575.0
Loss 4492293.0
Loss 4428891.0
Loss 4324718.0
Loss 4525574.5
Loss 4354404.0
Loss 4382444.0
Loss 4297299.0
Loss 4310760.0
Loss 27395100770304.0
Loss inf
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss nan
Loss 

In [7]:
# Task 6

import os
import time
import json

def save_model(best_model, best_hyperparameters, best_metrics):
    '''
        Creates a models folder, then within the models' folder creates a regression folder and finally creates a last neural networks folder where it stores the model, a dictionary of its hyperparameters and a dictionary of its metrics
        
        Parameters
        ----------
        folder_name: str
            A string used to name the folder to be created
        
        best_model: pytorch model
            A model from pythorch
        
        best_hyperparameters: dict
            A dictionary containing the optimal hyperparameters configuration
        
        best_metrics: dict 
            A dictionary containing the test metrics obtained using the best model   

        Returns
        -------
        None       
    '''

    # Create Models folder
    models_dir = 'airbnb-property-listings/models'
    current_dir = os.path.dirname(os.getcwd())
    models_path = os.path.join(current_dir, models_dir)
    if os.path.exists(models_path) == False:
        os.mkdir(models_path)

    # Create regression folder
    regression_dir = 'airbnb-property-listings/models/regression'
    current_dir = os.path.dirname(os.getcwd())
    regression_path = os.path.join(current_dir, regression_dir)
    if os.path.exists(regression_path) == False:
        os.mkdir(regression_path)

    # Create neural_networks folder
    nn_name_dir = os.path.join(regression_path,'neural_networks') # Create the neural network folder
    current_dir = os.path.dirname(os.getcwd())
    nn_name_path = os.path.join(current_dir, nn_name_dir)
    if os.path.exists(nn_name_path) == False:
        os.mkdir(nn_name_path)

    # Create a Timestamp folder
    timestamp_dir = os.path.join(nn_name_dir,time.strftime("%Y-%m-%d_%H:%M:%S")) # Create the timestamp folder
    current_dir = os.path.dirname(os.getcwd())
    timestamp_path = os.path.join(current_dir, timestamp_dir)
    if os.path.exists(timestamp_path) == False:
        os.mkdir(timestamp_path)

    # Save the model in a file called model.pt
    torch.save(best_model, os.path.join(timestamp_path, 'model.pt')) 

    # Save the hyperparameters in a file called hyperparameters.json
    with open(os.path.join(timestamp_path, 'hyperparameters.json'), 'w') as fp: 
            json.dump(best_hyperparameters, fp)

    # Save the metrics in a file called metrics.json
    with open(os.path.join(timestamp_path, 'metrics.json'), 'w') as fp:
            json.dump(best_metrics, fp)

    return


# Define the model

model = NeuralNetwork()

best_model = train(model,train_loader,nn_config)

# Define the hyperparemeters

best_hyperparameters = get_nn_config()

# Calculate the metrics

''' 
The RMSE loss of your model under a key called RMSE_loss for training, validation, and test sets
The R^2 score of your model under a key called R_squared for training, validation, and test sets
The time taken to train the model under a key called training_duration
The average time taken to make a prediction under a key called inference_latency

'''
best_metrics = {

    'RMSE_loss' : 0, #[training, validation, test], # Need to Calculate the metrics
    'R^2' : 0, #[training, validation, test],
    'training_duration' : 0,
    'inference_latency' : 0,
}

save_model(best_model, best_hyperparameters, best_metrics)



Loss 17071.94921875
Loss 96078.3671875
Loss 15029968896.0
Loss 131251.5
Loss 159014.4375
Loss 201944.171875
Loss 121386.9765625
Loss 152356.1875
Loss 2328529404428288.0
Loss 9.941703480716933e+29
Loss 3.976681929732662e+24
Loss 3.9607906361739133e+24
Loss 3.9449630415282944e+24
Loss 3.9291982811046765e+24
Loss 3.9134977960549405e+24
Loss 3.897859568766453e+24
Loss 3.8822835992392147e+24
Loss 3.866769887473225e+24
Loss 3.851318433468484e+24
Loss 3.8359289489946156e+24
Loss 3.8206008575908676e+24
Loss 3.805333871026864e+24
Loss 3.790127701072228e+24
Loss 3.7749829241877123e+24
Loss 3.7598980992214364e+24
Loss 3.7448732261734e+24
Loss 3.729908593273979e+24
Loss 3.715003912292798e+24
Loss 3.7001588949994803e+24
Loss 3.68537325316365e+24
Loss 3.670646122094178e+24
Loss 3.6559783664821936e+24
Loss 3.641369121636568e+24
Loss 3.626818675787677e+24
Loss 3.6123255877836407e+24
Loss 3.5978901458548345e+24
Loss 3.583513214692387e+24
Loss 3.56919392960517e+24
Loss 3.554931714132431e+24
Loss 3.54072

In [8]:
# Task 7

import itertools
# Define a fucntion wich creates many config dictionaries for your network 

def generate_nn_configs():

    # Parameters to change are: Optimiser, lr, hidden_layer_width and depth
    combinations_dict = {
        'Optimisers':['SGD', 'Adam', 'Adagrad'],
        'lr':[0.001, 0.0001],
        'hidden_layer_width':[32, 64, 128, 256],
        'depth':[3,5,10]
    }

    config_dict_list = []
    # For every possible combination of the combinations_dict create a custom dictionary that is later stored in config_dict_list
    for iteration in itertools.product(*combinations_dict.values()):
        config_dict = {
            'optimiser': iteration[0],
            'lr': iteration[1],
            'hidden_layer_width': iteration[2],
            'depth': iteration[3]
        }
        config_dict_list.append(config_dict)

    return config_dict_list

def find_best_nn():
    # Call the previous function to get the list of config dictionaries
    config_dict_list = generate_nn_configs()

    # For each configuration, redefine the nn_model and the training function
    for nn_config in config_dict_list():

        # Redefine NeuralNetwork to include the custom numbers of hidden layers (depth) and hidden layers width
        class NeuralNetwork(torch.nn.Module):

            def __init__(self):
                super().__init__()
                # Initialise the Parameters
                #self.linear_layer = torch.nn.Linear(11,1) # 11 features, 1 label

                self.layers = torch.nn.Sequential()
                self.layers.add_module("Input Layer", torch.nn.Linear(11, nn_config['hidden_layer_width'])) # Input layer
                self.layers.add_module("ReLU", torch.nn.ReLU())
                for i in range(nn_config['depth'] - 2): #  The input and the first linear layer are already taken into account
                    self.layers.add_module("Hidden Layer", torch.nn.Linear(nn_config['hidden_layer_width'], nn_config['hidden_layer_width'])) # Hidden Layer
                    self.layers.add_module("Hidden ReLU", torch.nn.ReLU())
                self.layers.add_module("Output Layer", torch.nn.Linear(nn_config['hidden_layer_width'], 1))# output layer
            
            def forward(self, features):
                # Use the layers to process the features
                return self.layers(features)

        model = NeuralNetwork()

        # Train function with config 
        def train(model, dataloader, nn_config, epochs=12):

            # Set optimiser with lr from nn_config
            if nn_config['optimiser'] == "SGD":
                optimiser = torch.optim.SGD(model.parameters(), lr=nn_config['lr'])

            elif nn_config['optimiser'] == "Adam":
                optimiser = torch.optim.Adam(model.parameters(), lr=nn_config['lr'])

            elif nn_config['optimiser'] == "Adagrad":
                optimiser = torch.optim.Adagrad(model.parameters(), lr=nn_config['lr'])

            writer = SummaryWriter()

            batch_idx = 0

            for epoch in range(epochs):
                batch_idx = 0
                current_loss = 0.0
                for batch in dataloader:
                    features, labels = batch
                    features = features.to(torch.float32) # Convert torch into the right format
                    labels = labels.to(torch.float32) # Convert torch into the right format
                    prediction = model(features)
                    loss = loss_fn(prediction,labels)
                    loss.backward() 
                    optimiser.step() # Optimiser step
                    optimiser.zero_grad()
                    ls = loss.item()
                    print("Loss", ls)
                    batch_idx += 1
                    current_loss = current_loss + ls
                
                # print (f"currentnt loss {current_loss} and batch index {batch_idx}")
                # print(f'Loss after mini-batch  ({epoch + 1} : {current_loss // batch_idx}')
                    writer.add_scalar('Loss - Task 7',current_loss / batch_idx , epoch)

        train(model,train_loader,nn_config)

        # Calculate metrics

        # ''' 
        # The RMSE loss of your model under a key called RMSE_loss for training, validation, and test sets
        # The R^2 score of your model under a key called R_squared for training, validation, and test sets
        # The time taken to train the model under a key called training_duration
        # The average time taken to make a prediction under a key called inference_latency

        # '''
        # best_metrics = {

        #     'RMSE_loss' : 0, #[training, validation, test], # Need to Calculate the metrics
        #     'R^2' : 0, #[training, validation, test],
        #     'training_duration' : 0,
        #     'inference_latency' : 0,
        # }

        # STore the metrics, config, and model:

        # if metrics are best than they were:  WHAT SHOULD I LOOK AT (RMSE)
        #     best_model = best_model
        #     best_hyperparameters = best_hyperparameters
        #     best_metrics = best_metrics

    # save_model(best_model, best_hyperparameters, best_metrics)=
        
    return 

# find_best_nn()

In [9]:
list = generate_nn_configs()


In [10]:
len(list)

72

In [11]:
model = NeuralNetwork()

In [12]:
type(NeuralNetwork())

__main__.NeuralNetwork

In [13]:
if type(model) == type(NeuralNetwork()):
    print('yesss')


yesss
