In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import os
from matplotlib import pyplot as plt
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


In [2]:
def get_file_names(folder):
    #Listing entries present in given folder
    entries = os.listdir(folder)
    for i in entries:
        if 'csv' not in i:
            entries.remove(i)
    return sorted(entries, reverse=True)[0:12]

#Function for normalizing data 
def nomarlize_price(my_dataframe):
    my_dataframe['price'] = (my_dataframe['price'] - 
                             my_dataframe['price'].min())/(my_dataframe['price'].max() - 
                                                         my_dataframe['price'].min())
    return my_dataframe


train_path = "../split_datasets/train/"
test_path = "../split_datasets/test/"
val_path = "../split_datasets/validation/"

train_files = get_file_names(train_path)
test_files = get_file_names(test_path)
val_files = get_file_names(val_path)

frames = []
for i in train_files:
    frames.append(pd.read_csv(train_path+i))
train_df = pd.concat(frames, sort = True)
train_df = train_df.drop(columns = 'id')
train_df = train_df.drop(columns = 'last_scraped')
train_df = train_df.fillna(0)
train_df = nomarlize_price(train_df)
train_df = train_df.reset_index(drop=True)

frames = []
for i in test_files:
    frames.append(pd.read_csv(test_path+i))
test_df = pd.concat(frames, sort = True)
test_df = test_df.drop(columns = 'id')
test_df = test_df.drop(columns = 'last_scraped')
test_df = test_df.fillna(0)
test_df = nomarlize_price(test_df)
test_df = test_df.reset_index(drop=True)

frames = []
for i in val_files:
    frames.append(pd.read_csv(val_path+i))
val_df = pd.concat(frames, sort = True)
val_df = val_df.drop(columns = 'id')
val_df = val_df.drop(columns = 'last_scraped')
val_df = val_df.fillna(0)
val_df = nomarlize_price(val_df)
val_df = val_df.reset_index(drop=True)

In [3]:
class airbnb_dataset (Dataset):
    def __init__(self, df, purpose):
        self.df = df
        self.price = self.df["price"]
        self.df = self.df.drop(columns="price")
        self.purpose = purpose
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        # Returns specific sample as a dict
        if torch.is_tensor(index):
            index = index.tolist()
        sample = torch.tensor(self.df.iloc[index]).float()
        label = torch.tensor([self.price.iloc[index]]).float()
        return {"sample": sample, "label": label}

In [10]:
train_ds = airbnb_dataset(train_df, "train")
test_ds = airbnb_dataset(test_df, "test")
val_ds = airbnb_dataset(val_df, "test")

train_loader = DataLoader(train_ds, batch_size=1000, num_workers=0)
test_loader = DataLoader(test_ds, batch_size=1000, num_workers=0)
val_loader = DataLoader(val_ds, batch_size=1000, num_workers=0)

In [12]:
class airbnb_net (nn.Module):
    
    def __init__(self):
        super(airbnb_net, self).__init__()
        self.layer1 = nn.Linear(264, 100)
        self.layer2 = nn.Linear(100, 50)
        self.layer3 = nn.Linear(50, 15)
        self.layer4 = nn.Linear(15, 1)
    
    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = torch.relu(self.layer3(x))
        return self.layer4(x)

In [21]:
def train(net, train_loader, val_loader):
    criterion = nn.MSELoss()
    optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum = 0.9)
    
    print("Starting Training...")
    start_time = time.time()
    epochs, losses = [], []
    
    for epoch in range(75):
        epoch_loss = 0
        epoch_time = time.time()
        batch_loss = 0
        batch_time = time.time()
        for i, data in enumerate(train_loader):
            sample = data["sample"]
            label = data["label"]
            output = net(sample)
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            batch_loss += loss
            epoch_loss += loss
            break
            
            if i % 10 == 0:
                batch_loss = batch_loss/10
                print("---[ITER %d] loss: %.3f  time: %.3f" % (i, batch_loss, time.time()-batch_time))
                batch_loss = 0
                batch_time = time.time()
        epoch_loss /= len(train_loader)
        
        epochs.append(epoch)
        losses.append(epoch_loss)
        
        print("[EPOCH %d] loss: %.3f  time: %.3f" % (epoch+1, epoch_loss, time.time()-epoch_time))
    
    print("=========================================")
    print("Training Completed...")
    print("[FINAL] loss: %.3f  time: %.3f" % (epoch_loss, time.time()-start_time))
          
        
    # plotting
    plt.title("Training Curve")
    plt.plot(losses, label="Train")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.show()
    
'''
    plt.title("Training Curve")
    plt.plot(epochs, train_acc, label="Train")
    plt.plot(epochs, valid_acc, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend(loc='best')
    plt.show()
'''            

'\n    plt.title("Training Curve")\n    plt.plot(epochs, train_acc, label="Train")\n    plt.plot(epochs, valid_acc, label="Validation")\n    plt.xlabel("Epoch")\n    plt.ylabel("Accuracy")\n    plt.legend(loc=\'best\')\n    plt.show()\n'

In [7]:
def get_error(model, train_loader, val_loader):
    train_samples = 0
    train_error = 0
    for i, data in enumerate(train_loader):
        sample = data["sample"]
        label = data["label"]
        output = net(sample)
        error = abs(label-output)
        accurate = (error < 10)
        return accurate[0:10].sum()
        train_samples += len(label)
        train_error += error.sum().item()
    val_samples = 0
    val_error = 0
    for i, data in enumerate(val_loader):
        print("hello")
        

In [22]:
net = airbnb_net()

In [None]:
train(net, train_loader, val_loader)

Starting Training...
[EPOCH 1] loss: 0.007  time: 17.014
[EPOCH 2] loss: 0.003  time: 18.342
[EPOCH 3] loss: 0.007  time: 16.629
[EPOCH 4] loss: 0.001  time: 12.986
[EPOCH 5] loss: 0.006  time: 13.060
[EPOCH 6] loss: 0.003  time: 11.519
[EPOCH 7] loss: 0.001  time: 11.981
[EPOCH 8] loss: 0.004  time: 11.772
[EPOCH 9] loss: 0.002  time: 12.385
[EPOCH 10] loss: 0.002  time: 11.351
[EPOCH 11] loss: 0.003  time: 11.371
[EPOCH 12] loss: 0.001  time: 11.101
[EPOCH 13] loss: 0.002  time: 11.019
[EPOCH 14] loss: 0.002  time: 11.644
[EPOCH 15] loss: 0.001  time: 11.821
[EPOCH 16] loss: 0.001  time: 13.966
[EPOCH 17] loss: 0.002  time: 11.137
[EPOCH 18] loss: 0.001  time: 13.813
[EPOCH 19] loss: 0.001  time: 15.790
[EPOCH 20] loss: 0.002  time: 11.974
[EPOCH 21] loss: 0.001  time: 11.414
[EPOCH 22] loss: 0.001  time: 21.190
[EPOCH 23] loss: 0.001  time: 14.489
[EPOCH 24] loss: 0.001  time: 11.845
[EPOCH 25] loss: 0.001  time: 16.501
[EPOCH 26] loss: 0.001  time: 15.074
[EPOCH 27] loss: 0.001  ti

In [8]:
train(net, train_loader, val_loader)

Starting Training...
[EPOCH 1] loss: 251.382  time: 2.180
[EPOCH 2] loss: 251.355  time: 2.901
[EPOCH 3] loss: 251.306  time: 1.802
[EPOCH 4] loss: 251.236  time: 1.707
[EPOCH 5] loss: 251.149  time: 1.702
[EPOCH 6] loss: 251.050  time: 1.689
[EPOCH 7] loss: 250.942  time: 1.688
[EPOCH 8] loss: 250.827  time: 2.249
[EPOCH 9] loss: 250.707  time: 2.291
[EPOCH 10] loss: 250.586  time: 1.739
[EPOCH 11] loss: 250.469  time: 1.735
[EPOCH 12] loss: 250.353  time: 1.723
[EPOCH 13] loss: 250.227  time: 1.728
[EPOCH 14] loss: 250.090  time: 2.126
[EPOCH 15] loss: 249.941  time: 2.104
[EPOCH 16] loss: 249.780  time: 1.878
[EPOCH 17] loss: 249.606  time: 1.937
[EPOCH 18] loss: 249.416  time: 1.971
[EPOCH 19] loss: 249.210  time: 1.973
[EPOCH 20] loss: 248.987  time: 2.150
[EPOCH 21] loss: 248.744  time: 1.819
[EPOCH 22] loss: 248.480  time: 2.010
[EPOCH 23] loss: 248.194  time: 2.034
[EPOCH 24] loss: 247.882  time: 1.997
[EPOCH 25] loss: 247.544  time: 1.831
[EPOCH 26] loss: 247.176  time: 1.983


TypeError: 'float' object is not callable

In [34]:
get_error(net, train_loader, val_loader)

tensor(3)

In [9]:
accum = 0
accum1 = 0
for i in range(1000):
    error = net(train_ds[i]["sample"]).item() - train_ds[i]["label"].item()
    accum += error
    accum1 += abs(error)
print("Error:", accum/1000)
print("Abs Error:", accum1/1000)

Error: -1.5149315567016601
Abs Error: 52.31066065979004


In [25]:
for i, data in enumerate(train_loader):
    print(len(data["label"]))
    break

1000


In [11]:
train_ds[4]["label"].item()


135.0