In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.nn import functional as F
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
import math
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import time

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
class AmericanExpressProfileTimeSeriesDataset(Dataset):
    def __init__(self, dataset_file, transformation=False):
        self.dataset = pd.read_csv(dataset_file)
        self.transformation = transformation

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        row = self.dataset.iloc[[idx]]
        label = row["target"].values
        data = row.drop(['customer_ID', 'target'], axis=1)
        data = data.values[0].tolist()

        for idx, value in enumerate(data):
            if idx == 0: continue
            list_ = value[1:-1].split(", ")
            for idx_l, elem in enumerate(list_):
                list_[idx_l] = float(elem)
            data[idx] = list_
        data = data[1:]

        data = np.array(data, dtype=np.float32)
        data = torch.tensor(data, dtype=torch.float32, requires_grad=True)
        label = torch.tensor(label, dtype=torch.float32)
        label = label.to(device)
        data = data.to(device)
        if self.transformation: data = self.transformation(data)
        return data, label

In [87]:
class Block(nn.Module):

    def __init__(self, in_channels, out_channels):
        super(Block, self).__init__()

        self.relu = nn.LeakyReLU()

        self.conv1 = nn.Sequential(
            nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=1, padding=1,  padding_mode='zeros'),
            nn.BatchNorm1d(out_channels),
        )

        self.conv2 = nn.Sequential(
            nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1,  padding_mode='zeros'),
            nn.BatchNorm1d(out_channels),
        )

    def forward(self, x):
        identity = x
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = x + identity
        x = self.relu(x)
        return x

class ResNet(nn.Module):

    def __init__(self):
        super(ResNet, self).__init__()

        self.relu = nn.ReLU()

        self.block1 = Block(189, 189)

        self.block2 = Block(189, 189)

        self.block3 = Block(189, 189)

        self.block4 = Block(189, 189)

        self.block5 = Block(189, 189)

        self.block6 = Block(189, 189)

        self.block7 = Block(189, 189)


        self.linear = nn.Sequential(
            nn.Linear(189, 100),
            nn.LeakyReLU(),
            nn.Linear(100, 50),
            nn.LeakyReLU(),
            nn.Linear(50, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)

        avg = nn.AvgPool1d(13, stride=1)
        x = avg(x)
        x = x.view(x.shape[0], -1)
        x = self.linear(x)
        return x

model = ResNet()
model = model.to(device)

In [206]:
class ConvolutionalNetwork(nn.Module):

    def __init__(self, convolution_layers, linear_layers, activation, final_activation):
        super(ConvolutionalNetwork, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv1d(197, 256, 2, stride=1),
            nn.ReLU(),
            nn.Conv1d(256, 512, 2, stride=1),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=1),
            nn.Conv1d(512, 1024, 2, stride=1),
            nn.ReLU(),
            nn.Conv1d(1024, 1024, 2, stride=1),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=1),
            nn.Conv1d(1024, 1024, 2, stride=1),
            nn.ReLU(),
            nn.Conv1d(1024, 1024, 2, stride=1),
            nn.ReLU(),
            nn.MaxPool1d(5, stride=1),
            nn.Flatten(),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Linear(256, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.conv(x)
        return x

#model = ConvolutionalNetwork()
#model = model.to(device)

In [24]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        
        self.linear = nn.Sequential(
            nn.Linear(20, 10),
            nn.ReLU(),
            nn.Linear(10, 5),
            nn.ReLU(),
            nn.Linear(5, 1),
            nn.Sigmoid()
        )
    
    def forward(self,x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        output, (hn, cn) = self.lstm(x, (h_0, c_0))
        hidden = torch.cat((hn[-2,:,:], hn[-1,:,:]), dim = 1)     
        out = self.linear(hidden)
        return out

model = LSTM(189, 10, 10)
model = model.to(device)

In [6]:
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            pred = torch.round(pred)
            correct += (pred == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return test_loss

In [7]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        X = X.to(device)
        y = y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [8]:
models = {}

In [9]:
epochs = 10
learning_rate = 0.001
batch_size = 16
shuffle=True
train_test_ration = 0.8

In [29]:
full_dataset = AmericanExpressProfileTimeSeriesDataset("transformed_dataset.csv")#, transformation=lambda data: data.T)

train_size = int(train_test_ration * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)

In [88]:
loss_fn = nn.BCELoss()
#loss_fn = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    models[t] = model
    test_loss = test_loop(test_dataloader, model, loss_fn)
    #scheduler.step(test_loss)
print("Done!")

Epoch 1
-------------------------------
loss: 0.672205  [    0/367130]
loss: 0.791440  [ 1600/367130]
loss: 0.379075  [ 3200/367130]
loss: 0.345529  [ 4800/367130]
loss: 0.276147  [ 6400/367130]
loss: 0.389461  [ 8000/367130]
loss: 0.458248  [ 9600/367130]
loss: 0.076126  [11200/367130]
loss: 0.160398  [12800/367130]
loss: 0.500907  [14400/367130]
loss: 0.256985  [16000/367130]
loss: 0.143625  [17600/367130]
loss: 0.513790  [19200/367130]
loss: 0.391611  [20800/367130]
loss: 0.453627  [22400/367130]
loss: 0.252253  [24000/367130]
loss: 0.299439  [25600/367130]
loss: 0.182512  [27200/367130]
loss: 0.291900  [28800/367130]
loss: 0.135493  [30400/367130]
loss: 0.139523  [32000/367130]
loss: 0.647722  [33600/367130]


In [130]:
torch.save(models[1], "./model")
#model = torch.load("./model")
#model.eval()

In [None]:
model_ = models[3]

In [None]:
class AmericanExpressProfileTimeSeriesValidationDataset(Dataset):
    def __init__(self, dataset_file, transformation=False):
        self.dataset = pd.read_csv(dataset_file, nrows=10)
        self.transformation = transformation

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):

        row = self.dataset.iloc[[idx]]
        data = row.drop(['customer_ID'], axis=1)

        data = data.values[0].tolist()

        for idx, value in enumerate(data):
            if idx == 0: continue
            list_ = value[1:-1].split(", ")
            for idx_l, elem in enumerate(list_):
                list_[idx_l] = float(elem)
            data[idx] = list_
        data = data[1:]
        
        
        data = np.array(data, dtype=np.float32)
        data = torch.tensor(data, dtype=torch.float32, requires_grad=True)
        data = data.to(device)
        if self.transformation: data = self.transformation(data)
        return data

In [None]:
full_validation_dataset = AmericanExpressProfileTimeSeriesValidationDataset("transformed_test_dataset", transformation=lambda data: data.T)

validation_dataloader = DataLoader(full_validation_dataset, batch_size=batch_size)

In [None]:
full_validation_dataset[0].shape

In [None]:
def validation_loop(dataloader, model, out_file):

    with torch.no_grad():
        data_file = open(f"{out_file}.csv", "w")
        data_file.write("customer_ID,prediction\n")
        for X, customer_ID in dataloader:
            pred = model(X)
            pred = pred.tolist()
            for idx in range(len(customer_ID)):
                data_file.write(customer_ID[idx] + "," + str(pred[idx])+"\n")
        data_file.close()

In [None]:
validation_loop(validation_dataloader, model, "./test_data/test_labels")