In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
import numpy as np

In [2]:
class FCGDataset(Dataset):
    def __init__(self, file_dir):
        data = pd.read_csv(file_dir)
        used_columns = ["areaSqm", "rentDetail", "propertyType", "furnish", "internet", "roommates", "shower", "toilet", "kitchen", "living", "pets", "smokingInside", "matchCapacity", "rent"]
        one_hot_columns = ["rentDetail", "propertyType", "furnish", "internet", "roommates", "shower", "toilet", "kitchen", "living", "pets", "smokingInside", "matchCapacity"]
        data = data[used_columns]
        ohe_df = pd.get_dummies(data[one_hot_columns], drop_first=True)
        num_df = data.drop(one_hot_columns, axis=1)
        data = pd.concat([ohe_df, num_df], axis=1)
        feature_values = data.drop("rent", axis=1).values.astype("float32")
        self.features = torch.from_numpy(feature_values)
        self.target = torch.from_numpy(data["rent"].values.astype("float32"))

    def __len__(self):
        return len(self.target)

    def __getitem__(self, idx):
        return self.features[idx], self.target[idx]

In [3]:
# neural network class
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # First fully connected layer
        # input features: areaSqm (1) + rentDetail (2) + propertyType (5) + furnish (4) + internet (3) + roommates (11) + shower (4) + toilet (4) + kitchen (4) + living (4) + pets (4) + smokingInside (4) + matchCapacity (8) (+ matchStatus (4)) = 58
        self.fc1 = nn.Linear(41, 512)
        self.fc2 = nn.Linear(512, 512)
        self.dropout1 = nn.Dropout()
        self.fc3 = nn.Linear(512, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)

        x = self.fc2(x)
        x = F.relu(x)

        x = self.dropout1(x)

        x = self.fc3(x)
        x = F.relu(x)

        # x = self.dropout2(x)
        x = self.fc4(x)

        output = x
        return output

In [4]:
def train_loop(dataset, model, loss_fn, optimizer, batch_size):
    size = len(dataset)
    for batch in range(size // batch_size):
        X, y = dataset[batch * batch_size : (batch + 1) * batch_size]
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [1]:
def test_loop(dataset, model, loss_fn, batch_size):
    size = len(dataset)
    num_batches = size // batch_size
    test_loss, correct = 0, 0

    with torch.no_grad():
        X, y = dataset.features, dataset.target
        pred = model(X)
        test_loss += loss_fn(pred, y).item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [5]:
learning_rate = 1e-3
batch_size = 64
epochs = 20

In [6]:
loss_fn = torch.nn.MSELoss(reduction='sum')
net = Net()
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
train_data = FCGDataset("train.csv")
test

In [7]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_data, net, loss_fn, optimizer, batch_size)
    test_loop()
print("Done!")

Epoch 1
-------------------------------
loss: 2300557312.000000  [    0/27915]


  return F.mse_loss(input, target, reduction=self.reduction)


loss:     nan  [ 6400/27915]
loss:     nan  [12800/27915]
loss:     nan  [19200/27915]
loss:     nan  [25600/27915]
Epoch 2
-------------------------------
loss:     nan  [    0/27915]
loss:     nan  [ 6400/27915]
loss:     nan  [12800/27915]
loss:     nan  [19200/27915]
loss:     nan  [25600/27915]
Epoch 3
-------------------------------
loss:     nan  [    0/27915]
loss:     nan  [ 6400/27915]
loss:     nan  [12800/27915]
loss:     nan  [19200/27915]
loss:     nan  [25600/27915]
Epoch 4
-------------------------------
loss:     nan  [    0/27915]
loss:     nan  [ 6400/27915]
loss:     nan  [12800/27915]
loss:     nan  [19200/27915]
loss:     nan  [25600/27915]
Epoch 5
-------------------------------
loss:     nan  [    0/27915]
loss:     nan  [ 6400/27915]
loss:     nan  [12800/27915]
loss:     nan  [19200/27915]
loss:     nan  [25600/27915]
Epoch 6
-------------------------------
loss:     nan  [    0/27915]
loss:     nan  [ 6400/27915]
loss:     nan  [12800/27915]
loss:     nan  [1

In [8]:
train_data[0]

(tensor([ 0.,  1.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,
          0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., 14.]),
 tensor(500.))