In [15]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import pandas as pd

In [16]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cuda')

In [17]:
# setting seeds
np.random.seed(445)
torch.manual_seed(445)

<torch._C.Generator at 0x16eb81422f0>

## Model

In [18]:
class SimplePredictor(nn.Module):
    def __init__(self, num_inputs, num_hidden, num_outputs):
        super().__init__()
        self.linear1 = nn.Linear(num_inputs, num_hidden)
        self.bn1 = nn.BatchNorm1d(num_hidden)
        self.fn1 = nn.LeakyReLU()
        #self.d1 = nn.Dropout(0.3)
        self.linear2 = nn.Linear(num_hidden, num_outputs)

    def forward(self, x):
        x = self.linear1(x)
        x = self.bn1(x)
        x = self.fn1(x)
        #x = self.d1(x)
        x = self.linear2(x)
        return x

In [19]:
def get_accuracy(model, loader, pct_close):
    correct = 0
    total = 0
    model.eval()
    for x, labels in loader:
        x, labels = x.to(device), labels.to(device)
        output = model(x)
        pred = torch.round(output)
        correct += (torch.abs(pred-labels.view_as(pred)) < torch.abs(pct_close * labels.view_as(pred))).sum().item()
        # correct += (torch.abs(pred-labels.view_as(pred)) < pct_close).sum().item()
        total += x.shape[0]
    return correct / total

## Data

In [20]:
data06 = pd.read_csv("../data/processed/target06.csv")

In [21]:
all_data = data.TensorDataset(torch.from_numpy((data06.values[:,:-1] - data06.values[:,:-1].min(0)) / data06.values[:,:-1].ptp(0)).float(), torch.from_numpy(data06.values[:,-1]).float())  # with normalization
train_dataset, test_dataset, valid_dataset = torch.utils.data.random_split(all_data, (round(0.7 * len(all_data)), round(0.2 * len(all_data)), round(0.1 * len(all_data))))

In [22]:
data_loader = data.DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
test_data_loader = data.DataLoader(test_dataset, batch_size=256, shuffle=False, drop_last=False)
valid_data_loader = data.DataLoader(valid_dataset, batch_size=256, shuffle=False, drop_last=False)

## Prepare model

In [23]:
model = SimplePredictor(num_inputs=8, num_hidden=500, num_outputs=1)
model.to(device)
print(model)

SimplePredictor(
  (linear1): Linear(in_features=8, out_features=500, bias=True)
  (bn1): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fn1): LeakyReLU(negative_slope=0.01)
  (linear2): Linear(in_features=500, out_features=1, bias=True)
)


In [24]:
loss_fun = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)
epochs = 300

## Training

In [25]:
for epoch in range(epochs):
    model.train()  # reset in get_accuracy
    for data_inputs, data_labels in data_loader:
        data_inputs = data_inputs.to(device)
        data_labels = data_labels.to(device)
        preds = model(data_inputs).squeeze(dim=1)
        loss = loss_fun(preds, data_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, loss: {loss.item():.3}, acc: {get_accuracy(model, valid_data_loader, 0.2)}")

Epoch: 0, loss: 5.23e+02, acc: 0.0
Epoch: 10, loss: 3.77e+02, acc: 0.028901734104046242
Epoch: 20, loss: 1.58e+02, acc: 0.09132947976878612
Epoch: 30, loss: 1.84e+02, acc: 0.19884393063583816
Epoch: 40, loss: 2.49e+02, acc: 0.2682080924855491
Epoch: 50, loss: 1.83e+02, acc: 0.2832369942196532
Epoch: 60, loss: 1.38e+02, acc: 0.28670520231213875
Epoch: 70, loss: 47.2, acc: 0.30057803468208094
Epoch: 80, loss: 50.5, acc: 0.3063583815028902
Epoch: 90, loss: 1.69e+02, acc: 0.31445086705202313
Epoch: 100, loss: 85.1, acc: 0.3202312138728324
Epoch: 110, loss: 1.38e+02, acc: 0.3179190751445087
Epoch: 120, loss: 1.53e+02, acc: 0.3179190751445087
Epoch: 130, loss: 1.74e+02, acc: 0.3225433526011561
Epoch: 140, loss: 45.3, acc: 0.32947976878612717
Epoch: 150, loss: 93.8, acc: 0.3421965317919075
Epoch: 160, loss: 98.5, acc: 0.3352601156069364
Epoch: 170, loss: 1.23e+02, acc: 0.33872832369942196
Epoch: 180, loss: 67.9, acc: 0.3421965317919075
Epoch: 190, loss: 58.1, acc: 0.3398843930635838
Epoch: 20

In [26]:
percent = 0.2
model.eval()
get_accuracy(model, test_data_loader, percent)

0.37745664739884394

In [27]:
data_labels

tensor([40.0500,  9.5800, 22.6400, 22.3900, 23.3000, 23.6900,  7.2200, 41.8800,
        19.8900, 24.8700, 26.8000, 40.5800,  8.7200,  6.8800, 22.7300,  8.4400,
         3.5300, 10.3800,  9.7800, 19.7100, 83.8100, 10.5500, 15.8000, 31.9000,
        38.6200, 13.6400, 38.6200, 18.3900, 38.8800,  4.8600, 32.4600, 19.3700],
       device='cuda:0')

In [28]:
preds

tensor([ 23.2516,  13.0853,  24.1150,  16.7987,  14.6763,  15.1937,  10.3775,
         37.3490,  16.2988,   8.0150,  14.8764,  25.1121,   7.1874,   8.6581,
         28.2685,  17.6605,  15.7314,  13.8994,  17.3974,  13.7495, 106.7835,
         15.5437,  18.8969,  11.7963,  29.6159,  19.3711,  27.8761,  13.9124,
         17.4818,   7.0095,  22.2469,  15.4811], device='cuda:0',
       grad_fn=<SqueezeBackward1>)