In [241]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import r2_score, mean_absolute_error

In [242]:
train_X = pd.read_csv('../../data/curated/business_interruption/train_X.csv')
train_Y = pd.read_csv('../../data/curated/business_interruption/train_Y.csv')
val_X = pd.read_csv('../../data/curated/business_interruption/val_X.csv')
val_Y = pd.read_csv('../../data/curated/business_interruption/val_Y.csv')
test_X = pd.read_csv('../../data/curated/business_interruption/test_X.csv')
test_Y = pd.read_csv('../../data/curated/business_interruption/test_Y.csv')

In [243]:
train_X = train_X.to_numpy()
train_Y = train_Y.to_numpy()
val_X = val_X.to_numpy()
val_Y = val_Y.to_numpy()
test_X = test_X.to_numpy()
test_Y = test_Y.to_numpy()

In [244]:
train_X = torch.tensor(train_X, dtype=torch.float32)
train_Y = torch.tensor(train_Y, dtype=torch.float32)
val_X = torch.tensor(val_X, dtype=torch.float32)
val_Y = torch.tensor(val_Y, dtype=torch.float32)
test_X = torch.tensor(test_X, dtype=torch.float32)
test_Y = torch.tensor(test_Y, dtype=torch.float32)

In [245]:
class NN(torch.nn.Module):
    def __init__(self):
        super(NN, self).__init__()
        self.fc1 = torch.nn.Linear(in_features=52, out_features=128)
        self.dropout = torch.nn.Dropout(p=0.3)
        self.fc2 = torch.nn.Linear(in_features=128, out_features=128)
        self.fc3 = torch.nn.Linear(in_features=128, out_features=64)
        self.fc4 = torch.nn.Linear(in_features=64, out_features=32)
        self.fc5 = torch.nn.Linear(in_features=32, out_features=1)


    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        
        return x

In [246]:
model = NN()

In [247]:
optimizer = optim.Adam(params=model.parameters(), lr=0.0001)
criterion = torch.nn.L1Loss()
scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.1)

In [248]:
num_epoches = 30
batch_size = 64
for epoch in range(num_epoches):
    model.train()
    for i in range(0, train_X.shape[0], batch_size):
        batch_X = train_X[i:i+batch_size]
        batch_Y = train_Y[i:i+batch_size]
        optimizer.zero_grad()
        output = model(train_X)
        train_loss = criterion(output, train_Y)
        train_loss.backward()
        optimizer.step()

    scheduler.step()

    model.eval()
    with torch.no_grad():
        pred = model(val_X)
    val_loss = criterion(pred, val_Y)
    print(f'Epoch: {epoch+1}/{num_epoches}, Train Loss: {round(train_loss.item(), 4)}, Val Loss: {round(val_loss.item(), 4)}')

Epoch: 1/30, Train Loss: 19.1261, Val Loss: 19.0408
Epoch: 2/30, Train Loss: 13.9743, Val Loss: 13.6738
Epoch: 3/30, Train Loss: 11.2943, Val Loss: 10.8507
Epoch: 4/30, Train Loss: 9.814, Val Loss: 9.442
Epoch: 5/30, Train Loss: 9.2083, Val Loss: 8.9298
Epoch: 6/30, Train Loss: 8.9222, Val Loss: 8.6215
Epoch: 7/30, Train Loss: 8.7774, Val Loss: 8.4563
Epoch: 8/30, Train Loss: 8.657, Val Loss: 8.4047
Epoch: 9/30, Train Loss: 8.589, Val Loss: 8.3987
Epoch: 10/30, Train Loss: 8.5218, Val Loss: 8.4029
Epoch: 11/30, Train Loss: 8.5345, Val Loss: 8.4022
Epoch: 12/30, Train Loss: 8.5229, Val Loss: 8.4036


KeyboardInterrupt: 

In [None]:
model.eval()
with torch.no_grad():
    test_pred = model(test_X)
test_pred = test_pred.numpy()
test_Y_numpy = test_Y.numpy()
r2_score(y_pred=test_pred, y_true=test_Y_numpy), mean_absolute_error(y_pred=test_pred, y_true=test_Y)

(0.6467835823753154, 8.340291)