In [393]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import pandas as pd

In [394]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cuda')

In [395]:
# setting seeds
np.random.seed(445)
torch.manual_seed(445)

<torch._C.Generator at 0x22a9b1912f0>

## Model

In [396]:
class SimplePredictor(nn.Module):
    def __init__(self, num_inputs, num_hidden, num_outputs):
        super().__init__()
        self.linear1 = nn.Linear(num_inputs, num_hidden)
        self.bn1 = nn.BatchNorm1d(num_hidden)
        self.fn1 = nn.LeakyReLU()
        self.d1 = nn.Dropout(0.3)
        self.linear2 = nn.Linear(num_hidden, num_hidden)
        self.bn2 = nn.BatchNorm1d(num_hidden)
        self.fn2 = nn.LeakyReLU()
        self.d2 = nn.Dropout(0.3)
        self.linear3 = nn.Linear(num_hidden, num_outputs)

    def forward(self, x):
        x = self.linear1(x)
        x = self.bn1(x)
        x = self.fn1(x)
        x = self.d1(x)
        x = self.linear2(x)
        x = self.bn2(x)
        x = self.fn2(x)
        x = self.d2(x)
        x = self.linear3(x)
        return x

In [397]:
def get_accuracy(model, loader, pct_close):
    correct = 0
    total = 0
    model.eval()
    for x, labels in loader:
        x, labels = x.to(device), labels.to(device)
        output = model(x)
        pred = torch.round(output)
        correct += (torch.abs(pred-labels.view_as(pred)) < torch.abs(pct_close * labels.view_as(pred))).sum().item()
        # correct += (torch.abs(pred-labels.view_as(pred)) < pct_close).sum().item()
        total += x.shape[0]
    return correct / total

## Data

In [398]:
data06 = pd.read_csv("../data/processed/target06.csv")

In [399]:
all_data = data.TensorDataset(torch.from_numpy((data06.values[:,:-1] - data06.values[:,:-1].min(0)) / data06.values[:,:-1].ptp(0)).float(), torch.from_numpy(data06.values[:,-1]).float())  # with normalization
train_dataset, test_dataset, valid_dataset = torch.utils.data.random_split(all_data, (round(0.7 * len(all_data)), round(0.2 * len(all_data)), round(0.1 * len(all_data))))

In [400]:
data_loader = data.DataLoader(train_dataset, batch_size=64, shuffle=True, drop_last=True)
test_data_loader = data.DataLoader(test_dataset, batch_size=256, shuffle=False, drop_last=False)
valid_data_loader = data.DataLoader(valid_dataset, batch_size=256, shuffle=False, drop_last=False)

## Prepare model

In [401]:
model = SimplePredictor(num_inputs=8, num_hidden=500, num_outputs=1)
model.to(device)
print(model)

SimplePredictor(
  (linear1): Linear(in_features=8, out_features=500, bias=True)
  (bn1): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fn1): LeakyReLU(negative_slope=0.01)
  (d1): Dropout(p=0.3, inplace=False)
  (linear2): Linear(in_features=500, out_features=500, bias=True)
  (bn2): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fn2): LeakyReLU(negative_slope=0.01)
  (d2): Dropout(p=0.3, inplace=False)
  (linear3): Linear(in_features=500, out_features=1, bias=True)
)


In [402]:
loss_fun = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)
epochs = 300

## Training

In [409]:
for epoch in range(epochs):
    model.train()  # reset in get_accuracy
    for data_inputs, data_labels in data_loader:
        data_inputs = data_inputs.to(device)
        data_labels = data_labels.to(device)
        preds = model(data_inputs).squeeze(dim=1)
        loss = loss_fun(preds, data_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, loss: {loss.item():.3}, acc: {get_accuracy(model, valid_data_loader, 0.2)}")

KeyboardInterrupt: 

In [404]:
percent = 0.2
model.eval()
get_accuracy(model, test_data_loader, percent)

0.41502890173410406

In [405]:
data_labels

tensor([  7.3100,  23.8300,  17.0900,  16.7200,  17.6200,   4.7100,  24.1400,
         60.7700,  24.0200,  15.0500,   6.7800,  13.0900,  29.9500,  24.4800,
          9.4600,   9.2800,  30.8900,  29.0800,  12.0500,   9.4300,   9.1100,
         56.6800,  16.2800,  17.7600,  26.7300,  31.9300,  35.3100,  13.4300,
         64.2500,  23.5900,  55.9300,  28.3900,   5.7700,  14.0900,  11.6300,
         20.8500,  18.6600,  35.7100,  15.2500,  43.5600,  16.8000,  22.3600,
         45.5500,  29.6800,  45.1900,   7.0200,   6.7100,   7.7900,  32.2300,
         16.5200,  16.0700,  29.5300,  17.7600,  16.9000,  22.6300,  15.6400,
         23.5400,  17.0200, 107.0300,  34.4500,  14.7100,  17.0200,  33.2800,
          8.0300], device='cuda:0')

In [406]:
preds

tensor([11.3600, 17.8969, 18.0133, 19.0191, 15.2848, 14.5450, 20.0980, 36.6738,
         9.6147, 13.2605, 21.8079, 10.2801, 23.8805, 16.0375, 11.5407, 14.6885,
        37.4205, 21.9232, 15.9340, 12.8984, 11.0779, 25.0718, 11.2050, 16.2207,
        18.6796, 18.7991, 27.1358, 16.7870, 71.8349, 21.0481, 46.3770, 13.9490,
        10.8034, 11.6345, 12.5956, 23.8274, 21.6561, 18.2344, 14.5048, 48.8154,
        19.9559, 17.4760, 54.2418, 11.8113, 46.1624,  8.8858, 13.2907, 10.2789,
        16.0052, 14.8832, 16.4630, 23.4398, 19.0115, 18.7728, 21.8953, 11.1997,
        20.9393, 14.9818, 84.7230, 29.2692, 12.1623, 15.5643, 18.3759, 14.1447],
       device='cuda:0', grad_fn=<SqueezeBackward1>)