In [379]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import pandas as pd

In [380]:
device = torch.device("cuda")
device

device(type='cuda')

In [381]:
# setting seeds
np.random.seed(445)
torch.manual_seed(445)

<torch._C.Generator at 0x22a9b1912f0>

## Model

In [382]:
class SimplePredictor(nn.Module):
    def __init__(self, num_inputs, num_hidden, num_outputs):
        super().__init__()
        self.linear1 = nn.Linear(num_inputs, num_hidden)
        self.bn1 = nn.BatchNorm1d(num_hidden)
        self.fn1 = nn.LeakyReLU()
        self.d1 = nn.Dropout(0.3)
        self.linear2 = nn.Linear(num_hidden, num_hidden)
        self.bn2 = nn.BatchNorm1d(num_hidden)
        self.fn2 = nn.LeakyReLU()
        self.d2 = nn.Dropout(0.3)
        self.linear3 = nn.Linear(num_hidden, num_outputs)

    def forward(self, x):
        x = self.linear1(x)
        x = self.bn1(x)
        x = self.fn1(x)
        x = self.d1(x)
        x = self.linear2(x)
        x = self.bn2(x)
        x = self.fn2(x)
        x = self.d2(x)
        x = self.linear3(x)
        return x

In [383]:
def get_accuracy(model, loader, pct_close):
    correct = 0
    total = 0
    model.eval()
    for x, labels in loader:
        x, labels = x.to(device), labels.to(device)
        output = model(x)
        pred = torch.round(output)
        correct += (torch.abs(pred-labels.view_as(pred)) < torch.abs(pct_close * labels.view_as(pred))).sum().item()
        # correct += (torch.abs(pred-labels.view_as(pred)) < pct_close).sum().item()
        total += x.shape[0]
    return correct / total

## Data

In [384]:
data06 = pd.read_csv("../data/processed/target06.csv")

In [385]:
all_data = data.TensorDataset(torch.from_numpy((data06.values[:,:-1] - data06.values[:,:-1].min(0)) / data06.values[:,:-1].ptp(0)).float(), torch.from_numpy(data06.values[:,-1]).float())  # with normalization
train_dataset, test_dataset, valid_dataset = torch.utils.data.random_split(all_data, (round(0.7 * len(all_data)), round(0.2 * len(all_data)), round(0.1 * len(all_data))))

In [386]:
data_loader = data.DataLoader(train_dataset, batch_size=64, shuffle=True, drop_last=True)
test_data_loader = data.DataLoader(test_dataset, batch_size=256, shuffle=False, drop_last=False)
valid_data_loader = data.DataLoader(valid_dataset, batch_size=256, shuffle=False, drop_last=False)

## Prepare model

In [387]:
model = SimplePredictor(num_inputs=8, num_hidden=300, num_outputs=1)
model.to(device)
print(model)

SimplePredictor(
  (linear1): Linear(in_features=8, out_features=300, bias=True)
  (bn1): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fn1): LeakyReLU(negative_slope=0.01)
  (d1): Dropout(p=0.3, inplace=False)
  (linear2): Linear(in_features=300, out_features=300, bias=True)
  (bn2): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fn2): LeakyReLU(negative_slope=0.01)
  (d2): Dropout(p=0.3, inplace=False)
  (linear3): Linear(in_features=300, out_features=1, bias=True)
)


In [388]:
loss_fun = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)
epochs = 300

## Training

In [389]:
for epoch in range(epochs):
    model.train()  # reset in get_accuracy
    for data_inputs, data_labels in data_loader:
        data_inputs = data_inputs.to(device)
        data_labels = data_labels.to(device)
        preds = model(data_inputs).squeeze(dim=1)
        loss = loss_fun(preds, data_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, loss: {loss.item():.3}, acc: {get_accuracy(model, valid_data_loader, 0.2)}")

Epoch: 0, loss: 7.36e+02, acc: 0.0
Epoch: 10, loss: 8.34e+02, acc: 0.0
Epoch: 20, loss: 4.62e+02, acc: 0.0011560693641618498
Epoch: 30, loss: 7.85e+02, acc: 0.003468208092485549
Epoch: 40, loss: 3.44e+02, acc: 0.006936416184971098
Epoch: 50, loss: 3.23e+02, acc: 0.015028901734104046
Epoch: 60, loss: 2.4e+02, acc: 0.025433526011560695
Epoch: 70, loss: 2.28e+02, acc: 0.03468208092485549
Epoch: 80, loss: 1.68e+02, acc: 0.06242774566473988
Epoch: 90, loss: 1.7e+02, acc: 0.06358381502890173
Epoch: 100, loss: 1.32e+02, acc: 0.07745664739884393
Epoch: 110, loss: 1.4e+02, acc: 0.08323699421965318
Epoch: 120, loss: 1.19e+02, acc: 0.13641618497109825
Epoch: 130, loss: 82.7, acc: 0.19653179190751446
Epoch: 140, loss: 1.75e+02, acc: 0.2543352601156069
Epoch: 150, loss: 1.22e+02, acc: 0.30289017341040464
Epoch: 160, loss: 57.0, acc: 0.3190751445086705
Epoch: 170, loss: 80.6, acc: 0.3514450867052023
Epoch: 180, loss: 89.6, acc: 0.3791907514450867
Epoch: 190, loss: 68.4, acc: 0.3872832369942196
Epoch

In [390]:
percent = 0.2
model.eval()
get_accuracy(model, test_data_loader, percent)

0.4092485549132948

In [391]:
data_labels

tensor([24.9500, 21.5200,  8.2900,  7.9000, 23.4700, 22.2500, 56.1500, 16.1700,
        14.7100, 66.9300, 21.7900, 15.4800,  9.3500, 21.7100, 16.5300, 38.6200,
        10.1500, 20.4600, 17.9100, 29.2800, 41.2500, 16.3300, 10.4500,  9.1900,
        12.0900,  3.6900, 14.9000, 22.7200, 15.6800, 11.9000, 36.4800, 12.2200,
        12.6400, 13.5600, 29.5500, 32.6500, 29.1600, 24.2900, 14.5900, 29.3300,
        20.3000, 23.9300, 15.9400, 12.4500, 17.3900,  7.0200, 15.2800,  7.0300,
        18.8200, 34.2000, 32.3900, 51.0200, 16.9500, 12.5400, 16.5100,  9.7100,
        13.2500, 25.3200, 14.3000,  9.0700,  9.6500, 15.8900,  4.2600, 12.0600],
       device='cuda:0')

In [392]:
preds

tensor([12.8636, 26.4139, 14.2402, 12.4019, 32.3715, 20.2916, 61.6947, 33.9478,
        13.5699, 44.9695, 16.5993, 15.7461, 11.2462, 18.4349, 18.4429, 33.6172,
        15.8223, 20.7246, 13.6811, 22.8553, 39.9642, 14.7579, 14.9323, 14.3362,
        25.5350, 18.4485, 18.1879, 29.4806, 31.1633,  8.8487, 20.3193, 24.5834,
        13.7796, 27.0540, 20.7070, 25.0987, 46.6343, 17.1021, 10.9134, 21.4790,
        26.6901, 21.4482, 19.2879, 12.8897, 20.9707,  8.9411, 10.1038, 25.2689,
        14.0907, 27.4160, 16.7024, 34.7234, 16.5905, 13.4571, 39.3282, 18.0339,
        19.5916, 34.8895, 19.0875,  9.0359, 17.9265, 18.8102,  3.9590, 14.2117],
       device='cuda:0', grad_fn=<SqueezeBackward1>)