In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
import math
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
dataa = pd.read_pickle("transformed_dataset")

In [33]:
class AmericanExpressProfileTimeSeriesDataset(Dataset):
    def __init__(self, dataset_file, transformation=False):
        self.dataset = dataa#pd.read_pickle(dataset_file)
        self.transformation = transformation

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        row = self.dataset.iloc[[idx]]
        label = row["target"].values
        data = row.drop(['customer_ID', 'target'], axis=1).values[0].tolist()
        data = np.array(data, dtype=np.float32)
        data = torch.tensor(data, dtype=torch.float32, requires_grad=True)
        label = torch.tensor(label, dtype=torch.float32)
        label = label.to(device)
        data = data.to(device)
        if self.transformation: data = self.transformation(data)
        return data, label

In [34]:
full_dataset = AmericanExpressProfileTimeSeriesDataset("transformed_dataset")

train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=64)
test_dataloader = DataLoader(test_dataset, batch_size=64)

In [None]:
train_dataset[0][0].shape

In [92]:
class FullyConnectedNetwork(nn.Module):

    def __init__(self):
        super(FullyConnectedNetwork, self).__init__()

        self.lin = nn.Sequential(
            nn.Linear(2561, 3000),
            nn.ReLU(),
            nn.Linear(3000, 1000),
            nn.ReLU(),
            nn.Linear(1000, 500),
            nn.ReLU(),
            nn.Linear(500, 10),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.lin(x)
        return x

class ConvolutionalNetwork(nn.Module):

    def __init__(self):
        super(ConvolutionalNetwork, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv1d(197, 230, 3, stride=1),
            nn.ReLU(),
            nn.Conv1d(230, 100, 4, stride=2),
            nn.ReLU(),
            nn.Conv1d(100, 10, 2, stride=2),
            nn.AvgPool1d(2, stride=1),
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.shape[0], -1)
        return x

class PredictionNetwork(nn.Module):

    def __init__(self):
        super(PredictionNetwork, self).__init__()

        self.conv = ConvolutionalNetwork()
        self.lin = FullyConnectedNetwork()

        self.pred = nn.Sequential(
            nn.Linear(20, 10),
            nn.ReLU(),
            nn.Linear(10, 5),
            nn.ReLU(),
            nn.Linear(5, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x1 = self.conv(x)
        x2 = self.lin(x.view(-1, 197*13))
        x = self.pred(torch.cat((x1, x2), 1))
        return x
        
model = PredictionNetwork()
model = model.to(device)

In [85]:
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            pred = torch.round(pred)
            correct += (pred == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return test_loss

In [86]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        X = X.to(device)
        y = y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [9]:
models = {}

In [91]:
batch_size = 64
epochs = 1000

loss_fn = nn.BCELoss()
optimizer = torch.optim.AdamW(model.parameters())
#optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    models[t] = model
    test_loss = test_loop(test_dataloader, model, loss_fn)
    #scheduler.step(test_loss)
print("Done!")

Epoch 1
-------------------------------
loss: 0.704355  [    0/367130]
loss: 0.352601  [ 6400/367130]
loss: 0.347753  [12800/367130]
loss: 0.210369  [19200/367130]
loss: 0.202459  [25600/367130]
loss: 0.195823  [32000/367130]
loss: 0.179990  [38400/367130]
loss: 0.319682  [44800/367130]
loss: 0.246297  [51200/367130]
loss: 0.220392  [57600/367130]
loss: 0.227206  [64000/367130]
loss: 0.243464  [70400/367130]
loss: 0.238975  [76800/367130]
loss: 0.353398  [83200/367130]
loss: 0.244033  [89600/367130]
loss: 0.349914  [96000/367130]
loss: 0.298490  [102400/367130]
loss: 0.255954  [108800/367130]
loss: 0.181289  [115200/367130]
loss: 0.269248  [121600/367130]
loss: 0.294199  [128000/367130]
loss: 0.289939  [134400/367130]
loss: 0.267621  [140800/367130]
loss: 0.366261  [147200/367130]
loss: 0.317008  [153600/367130]
loss: 0.281931  [160000/367130]
loss: 0.255432  [166400/367130]
loss: 0.226527  [172800/367130]
loss: 0.246981  [179200/367130]
loss: 0.225174  [185600/367130]
loss: 0.180268  

KeyboardInterrupt: 

In [44]:
#torch.save(model, "./model")
#model = torch.load("./model")
#model.eval()

In [57]:
model_ = models[2]

In [None]:
full_validation_dataset = AmericanExpressProfileTimeSeriesDataset("transformed_test_dataset")
validation_dataloader = DataLoader(full_validation_dataset, batch_size=64)

In [None]:
def validation_loop(dataloader, model, out_file):

    with torch.no_grad():
        data_file = open(f"{out_file}.csv", "w")
        data_file.write("customer_ID,prediction\n")
        for X, customer_ID in dataloader:
            pred = model(X)
            pred = pred.tolist()
            for idx in range(len(customer_ID)):
                data_file.write(customer_ID[idx] + "," + str(pred[idx])+"\n")
        data_file.close()

In [None]:
validation_loop(validation_dataloader, model, "./test_data/test_labels")