In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
import torchvision

In [32]:
# Load the datasets
train_df = pd.read_csv('../processed_data/train_processed_with_dummies.csv')
test_df = pd.read_csv('../processed_data/valid_processed_with_dummies.csv')

train_X, train_y = train_df.iloc[1:, 0:-1], train_df.iloc[1:, -1:]
train_X = torch.Tensor(train_X.to_numpy(dtype=float))
train_y = torch.Tensor(train_y.to_numpy(dtype=float))

test_X, test_y = test_df.iloc[:, 0:-1], test_df.iloc[:, -1:]
test_X = torch.Tensor(test_X.to_numpy(dtype=float))
test_y = torch.Tensor(test_y.to_numpy(dtype=float))

train = TensorDataset(train_X, train_y)
train_iter = DataLoader(train, batch_size = 32)
test = TensorDataset(test_X, test_y)
test_iter = DataLoader(test, batch_size = 32)

In [3]:
# Regression Model
features = train_X.shape[1]

regression = nn.Sequential(nn.Linear(features, 1))

In [4]:
# Multi-layer Proceptron
MLP = nn.Sequential(nn.Flatten(),
                    nn.Linear(features, 256), nn.ReLU(),
                    nn.Linear(256, 1))

In [5]:
# Training
def train(net, train_iter, epochs, loss, lr, device):
    net = net.to(device)
    optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=0)

    for epoch in range(epochs):
        for _,(X ,y) in enumerate(train_iter):
            X = X.to(device)
            y = y.to(device)

            y_hat = net(X)
            l = loss(y_hat, y)

            optimizer.zero_grad()
            l.backward()
            optimizer.step()
        
        log_rmse = torch.sqrt(loss(torch.log(y_hat),
                           torch.log(y)))
        print(f'epoch: {epoch}: loss: {log_rmse.item()}')

In [7]:
epochs, lr = 100, 0.01
loss = nn.MSELoss()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
train(regression, train_iter, epochs, loss, lr, device)

epoch: 0: loss: 3.5894737243652344
epoch: 1: loss: 2.9247424602508545
epoch: 2: loss: 2.5380711555480957
epoch: 3: loss: 2.266207456588745
epoch: 4: loss: 2.057600498199463
epoch: 5: loss: 1.8892208337783813
epoch: 6: loss: 1.748758316040039
epoch: 7: loss: 1.6288528442382812
epoch: 8: loss: 1.5247514247894287
epoch: 9: loss: 1.4332029819488525
epoch: 10: loss: 1.3518857955932617
epoch: 11: loss: 1.2790825366973877
epoch: 12: loss: 1.2134844064712524
epoch: 13: loss: 1.154070496559143
epoch: 14: loss: 1.100027084350586
epoch: 15: loss: 1.0506930351257324
epoch: 16: loss: 1.0055229663848877
epoch: 17: loss: 0.9640594124794006
epoch: 18: loss: 0.9259145855903625
epoch: 19: loss: 0.8907546997070312
epoch: 20: loss: 0.8582903742790222
epoch: 21: loss: 0.8282676935195923
epoch: 22: loss: 0.8004627823829651
epoch: 23: loss: 0.774675726890564
epoch: 24: loss: 0.750728964805603
epoch: 25: loss: 0.728461742401123
epoch: 26: loss: 0.7077298760414124
epoch: 27: loss: 0.6884030103683472
epoch: 28:

In [8]:
train(MLP, train_iter, epochs, loss, lr, device)

epoch: 0: loss: 0.4524247944355011
epoch: 1: loss: 0.36306145787239075
epoch: 2: loss: 0.3155585825443268
epoch: 3: loss: 0.2929476499557495
epoch: 4: loss: 0.2790246307849884
epoch: 5: loss: 0.26892992854118347
epoch: 6: loss: 0.2608376741409302
epoch: 7: loss: 0.2542244493961334
epoch: 8: loss: 0.2488587200641632
epoch: 9: loss: 0.24453699588775635
epoch: 10: loss: 0.2411893904209137
epoch: 11: loss: 0.23869915306568146
epoch: 12: loss: 0.23686587810516357
epoch: 13: loss: 0.23560167849063873
epoch: 14: loss: 0.2348906695842743
epoch: 15: loss: 0.23457086086273193
epoch: 16: loss: 0.23446178436279297
epoch: 17: loss: 0.2344427853822708
epoch: 18: loss: 0.23448419570922852
epoch: 19: loss: 0.23433725535869598
epoch: 20: loss: 0.23455367982387543
epoch: 21: loss: 0.23501937091350555
epoch: 22: loss: 0.23519417643547058
epoch: 23: loss: 0.23568545281887054
epoch: 24: loss: 0.23623397946357727
epoch: 25: loss: 0.2361423671245575
epoch: 26: loss: 0.2365499883890152
epoch: 27: loss: 0.2364

In [29]:
# Validation
def valid(net, test_iter, device, loss):
    net.eval()
    with torch.no_grad():
        net = net.to(device)
        l = 0
        for X ,y in test_iter:
            X = X.to(device)
            y = y.to(device)

            y_hat = net(X)
            log_rmse = torch.sqrt(loss(torch.log(y_hat),
                           torch.log(y)))
            l = l + log_rmse.item()
        
    return l / len(list(test_iter))


In [34]:
regression_valid = valid(regression, test_iter, device, loss)
MLP_valid = valid(MLP, test_iter, device, loss)

print(f'Regression Validation Loss: {regression_valid}')
print(f'MLP Validation Loss: {MLP_valid}')

Regression Validation Loss: 0.26052657089063097
MLP Validation Loss: 0.17533192144972937
