In [1]:
# import libraries
import numpy as np
import csv
import torch
import torch.nn as nn
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from models.LSTM import LSTM
from models.RNN import RNN
from torch.utils.data import DataLoader
from dataset import Dataset
import os
from torchsummary import summary
from tools.adjust_learning_rate import adjust_learning_rate




In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load 3D numpy matrices (user, time, transaction type)
ns = np.load('data/neg_sequences.npy')
ps = np.load('data/pos_sequences.npy')

transaction_size = ns.shape[-1]

# Take a look at the given data with OHE (one-hot encodings)
print('Shape of negative sequence array (OHE): ', ns.shape)
print('Shape of positive sequence array (OHE): ', ps.shape)

# Convert one-hot encodings to integers:
ns = np.argmax(ns, axis=2)
ps = np.argmax(ps, axis=2)

# Take a look at the given data
print('Shape of negative sequence array: ', ns.shape)
print('Shape of positive sequence array: ', ps.shape)


Shape of negative sequence array (OHE):  (100000, 10, 312)
Shape of positive sequence array (OHE):  (11979, 10, 312)
Shape of negative sequence array:  (100000, 10)
Shape of positive sequence array:  (11979, 10)


In [3]:
# Label the data
ns_label = np.zeros_like(ns[:,0])
ps_label = np.ones_like(ps[:,0])

# Concetenate positive sequences with negative sequences regarding users with correponding labels (axis=0)
X = np.concatenate((ns, ps), axis=0)
y = np.concatenate((ns_label, ps_label), axis=0) 

# Shuffle data and labels, for reproductivity set random_state=0
# dataset, labels = shuffle(dataset, labels, random_state=42)

In [4]:
# Define train, test and validation ratios
train_ratio = 0.8
test_ratio = 0.1
val_ratio = 0.1

# Split the data / Shuffle it and maintain class balance
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_ratio, random_state=42, shuffle=True)

# Further split train_data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=val_ratio, random_state=42, shuffle=True)


In [5]:
# Print train, test and validation dataset and label shapes
print('X_train.shape: ', X_train.shape, ' - y_train.shape: ', y_train.shape)
print('X_test.shape: ', X_test.shape, ' - y_test.shape: ', y_test.shape)
print('X_val.shape: ', X_val.shape, ' - y_val.shape: ', y_val.shape)

# Convert numpy arrays to torch.tensor
X_train, y_train = torch.from_numpy(X_train), torch.from_numpy(y_train)
X_train, y_train = X_train.to(device, dtype=torch.int32), y_train.to(device, dtype=torch.float32)
X_test, y_test = torch.from_numpy(X_test), torch.from_numpy(y_test)
X_test, y_test = X_test.to(device, dtype=torch.int32), y_test.to(device, dtype=torch.float32)
X_val, y_val = torch.from_numpy(X_val), torch.from_numpy(y_val)
X_val, y_val = X_val.to(device, dtype=torch.int32), y_val.to(device, dtype=torch.float32)

# Create a custom dataset
train_dataset = Dataset(X_train, y_train, device)
test_dataset = Dataset(X_test, y_test, device)
val_dataset = Dataset(X_val, y_val, device)


X_train.shape:  (90702, 10)  - y_train.shape:  (90702,)
X_test.shape:  (11198, 10)  - y_test.shape:  (11198,)
X_val.shape:  (10079, 10)  - y_val.shape:  (10079,)


In [6]:
batch_size = 100

# Divide train and test dataset into batches
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Check whether data is splitted correctly -> X_.shape: (batch, seq, encoding), y_.shape: (batch)
# for i, (X_, y_) in enumerate(train_loader): 
#     print(X_.shape, y_.shape)
#     print(X_[:10,:])
#     print(y_[:10])
#     break

In [7]:
# RNN settings
embedding_dim = 64
hidden_dim = 128
num_layers = 1

## parameter setting
num_epochs = 50
learning_rate = 0.01

model = RNN(transaction_size, embedding_dim, hidden_dim, num_layers, device, num_classes = 1, batch_first = True, fc_hidden_dim = 128)

# for i, (X_, y_) in enumerate(train_loader):
#     out = model(X_)
#     break
# for name, param in model.named_parameters():
#     if param.requires_grad:
#         # print (name, param.data)
#         print (name)








In [8]:
# # LSTM settings
# embedding_dim = 64
# hidden_dim = 64
# transaction_size = 312
# num_layers = 2

# # parameter setting
# num_epochs = 50
# batch_size = 100
# use_gpu = (device.type == 'cuda')
# learning_rate = 0.01

# # Divide training dataset into batches
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

# # Check whether data is splitted correctly -> X_.shape: (batch, seq, encoding), y_.shape: (batch)
# for i, (X_, y_) in enumerate(train_loader): 
#     print(X_.shape, y_.shape)
#     print(i)

# model = LSTM(embedding_dim, hidden_dim, transaction_size, num_layers, batch_size, use_gpu)

# model.to(device)


In [9]:
# Loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

In [10]:
# Train the model
running_loss = 0.0
lr_update_step = 10
log_loss_step = 100

n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    optimizer = adjust_learning_rate(optimizer, epoch, learning_rate, lr_update_step)
    for i, (X_, y_) in enumerate(train_loader):  
        X_ = X_.to(device)
        y_ = y_.to(device)
        
        # Forward pass
        preds_y_ = model(X_)
        loss = criterion(preds_y_, y_.view(-1, 1))
        # print("preds_y_: ", preds_y_)
        # print("preds_y_.shape", preds_y_.shape)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if (i+1) % log_loss_step == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {running_loss/log_loss_step:.4f}')
            running_loss = 0.0

learning_rate: 0.010000
Epoch [1/50], Step [100/908], Loss: 0.2919
Epoch [1/50], Step [200/908], Loss: 0.2504
Epoch [1/50], Step [300/908], Loss: 0.2443
Epoch [1/50], Step [400/908], Loss: 0.2455
Epoch [1/50], Step [500/908], Loss: 0.2490
Epoch [1/50], Step [600/908], Loss: 0.2369
Epoch [1/50], Step [700/908], Loss: 0.2391
Epoch [1/50], Step [800/908], Loss: 0.2339
Epoch [1/50], Step [900/908], Loss: 0.2452
learning_rate: 0.010000
Epoch [2/50], Step [100/908], Loss: 0.2650
Epoch [2/50], Step [200/908], Loss: 0.2298
Epoch [2/50], Step [300/908], Loss: 0.2338
Epoch [2/50], Step [400/908], Loss: 0.2402
Epoch [2/50], Step [500/908], Loss: 0.2456
Epoch [2/50], Step [600/908], Loss: 0.2294
Epoch [2/50], Step [700/908], Loss: 0.2346
Epoch [2/50], Step [800/908], Loss: 0.2301
Epoch [2/50], Step [900/908], Loss: 0.2430
learning_rate: 0.010000
Epoch [3/50], Step [100/908], Loss: 0.2615
Epoch [3/50], Step [200/908], Loss: 0.2293
Epoch [3/50], Step [300/908], Loss: 0.2301
Epoch [3/50], Step [400/9

In [11]:
# Test the model -> no need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for X_, y_ in test_loader:
        X_.to(device)
        y_.to(device)
        output = model(X_)
        # max returns (value ,index)
        preds_y_ = torch.round(output)
        print("preds_y_: ", torch.transpose(preds_y_[:10], 0, 1))
        print("y_: ", torch.transpose(y_.view(-1,1)[:10], 0, 1))
        # break
        n_samples += y_.shape[0]
        n_correct += (preds_y_ == y_.view(-1,1)).sum().item()
        print('n_correct: ', n_correct)
        print('n_samples: ', n_samples)

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

preds_y_:  tensor([[0., 0., 1., 1., 0., 0., 0., 0., 0., 0.]], device='cuda:0')
y_:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]], device='cuda:0')
n_correct:  86
n_samples:  100
preds_y_:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:0')
y_:  tensor([[0., 0., 1., 0., 0., 0., 0., 0., 1., 0.]], device='cuda:0')
n_correct:  178
n_samples:  200
preds_y_:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:0')
y_:  tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:0')
n_correct:  268
n_samples:  300
preds_y_:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]], device='cuda:0')
y_:  tensor([[0., 0., 1., 1., 0., 0., 0., 0., 0., 0.]], device='cuda:0')
n_correct:  355
n_samples:  400
preds_y_:  tensor([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]], device='cuda:0')
y_:  tensor([[0., 0., 0., 0., 0., 0., 1., 1., 0., 0.]], device='cuda:0')
n_correct:  444
n_samples:  500
preds_y_:  tensor([[0., 0., 0., 0., 0., 1., 1., 0., 0., 0.]], device='cuda:0')
y_

In [12]:
# epoch: 5 -> acc: 91.26%
# epoch: 50 -> acc: 90.7%
# epoch: 1000 -> acc: 90.8%