In [1]:
# import libraries
import numpy as np
import csv
import torch
import torch.nn as nn
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from models.LSTM import LSTM
from models.RNN import RNN
from torch.utils.data import DataLoader
from dataset import Dataset
import os
from torchsummary import summary
from tools.adjust_learning_rate import adjust_learning_rate
from tools.train import train
from tools.test import test
from torch.utils.tensorboard import SummaryWriter
import wandb
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last_expr"
torch.manual_seed(42)



<torch._C.Generator at 0x7f091ffb1cb0>

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load 3D numpy matrices (user, time, transaction type)
ns = np.load('data/neg_sequences.npy')
ps = np.load('data/pos_sequences.npy')

transaction_size = ns.shape[-1]

# Take a look at the given data with OHE (one-hot encodings)
print('Shape of negative sequence array (OHE): ', ns.shape)
print('Shape of positive sequence array (OHE): ', ps.shape)

# Convert one-hot encodings to integers:
ns = np.argmax(ns, axis=2)
ps = np.argmax(ps, axis=2)

# Take a look at the given data
print('Shape of negative sequence array: ', ns.shape)
print('Shape of positive sequence array: ', ps.shape)


Shape of negative sequence array (OHE):  (100000, 10, 312)
Shape of positive sequence array (OHE):  (11979, 10, 312)
Shape of negative sequence array:  (100000, 10)
Shape of positive sequence array:  (11979, 10)


In [3]:
# Label the data
ns_label = np.zeros_like(ns[:,0])
ps_label = np.ones_like(ps[:,0])

# Concetenate positive sequences with negative sequences regarding users with correponding labels (axis=0)
X = np.concatenate((ns, ps), axis=0)
y = np.concatenate((ns_label, ps_label), axis=0) 

# Shuffle data and labels, for reproductivity set random_state=0
# dataset, labels = shuffle(dataset, labels, random_state=42)

In [4]:
# Define train, test and validation ratios
train_ratio = 0.8
test_ratio = 0.1
val_ratio = 0.1

# Split the data / Shuffle it and maintain class balance
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_ratio, random_state=42, shuffle=True)

# Further split train_data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=val_ratio, random_state=42, shuffle=True)


In [5]:
# Print train, test and validation dataset and label shapes
print('X_train.shape: ', X_train.shape, ' - y_train.shape: ', y_train.shape)
print('X_test.shape: ', X_test.shape, ' - y_test.shape: ', y_test.shape)
print('X_val.shape: ', X_val.shape, ' - y_val.shape: ', y_val.shape)

# Convert numpy arrays to torch.tensor
X_train, y_train = torch.from_numpy(X_train), torch.from_numpy(y_train)
X_train, y_train = X_train.to(device, dtype=torch.int32), y_train.to(device, dtype=torch.float32)
X_test, y_test = torch.from_numpy(X_test), torch.from_numpy(y_test)
X_test, y_test = X_test.to(device, dtype=torch.int32), y_test.to(device, dtype=torch.float32)
X_val, y_val = torch.from_numpy(X_val), torch.from_numpy(y_val)
X_val, y_val = X_val.to(device, dtype=torch.int32), y_val.to(device, dtype=torch.float32)

# Number of positive sequences in training data
num_pos_seq = torch.count_nonzero(y_train).view(1)
num_neg_seq = (y_train.shape[0] - num_pos_seq).view(1)
print('Training data -> num_pos_seq: ', num_pos_seq, ' num_neg_seq: ', num_neg_seq)

# Create a custom dataset
train_dataset = Dataset(X_train, y_train, device)
test_dataset = Dataset(X_test, y_test, device)
val_dataset = Dataset(X_val, y_val, device)


X_train.shape:  (90702, 10)  - y_train.shape:  (90702,)
X_test.shape:  (11198, 10)  - y_test.shape:  (11198,)
X_val.shape:  (10079, 10)  - y_val.shape:  (10079,)
Training data -> num_pos_seq:  tensor([9724], device='cuda:0')  num_neg_seq:  tensor([80978], device='cuda:0')


In [6]:
###############
##### RNN #####
###############

# Initialize W&B 
wandb.init(
    # set the wandb project where this run will be logged
    project='RNN-Classifier',

    # track hyperparameters and run metadata
    config= dict(
    batch_size = 50,
    transaction_size = transaction_size,
    embedding_dim = 32,
    hidden_dim = 64,
    num_layers = 1,
    device = device,
    batch_first = True,
    fc_hidden_dim = 64,
    num_classes = 1,
    num_epochs = 200,
    learning_rate = 5e-4,
    weight_decay = 0.0, 
    lr_update_step = None,
    log_step = 100,
    lr_step_decay = False,
    gamma = 0.85,
    )
)
# initialize config
config = wandb.config

# Divide train and test dataset into batches
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=X_test.shape[0], shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=X_val.shape[0], shuffle=True, drop_last=True)

# Check whether data is splitted correctly -> X_.shape: (batch, seq, encoding), y_.shape: (batch)
# for i, (X_, y_) in enumerate(train_loader): 
    # print(X_.shape, y_.shape)
    # print(X_[:10,:])
    # print(y_[:10])

model = RNN(config.transaction_size, config.embedding_dim, config.hidden_dim, config.num_layers, config.device,
            num_classes = config.num_classes, batch_first = config.batch_first, fc_hidden_dim = config.fc_hidden_dim)
model.to(device)

# for i, (X_, y_) in enumerate(train_loader):
#     out = model(X_)
#     break
# for name, param in model.named_parameters():
#     if param.requires_grad:
#         # print (name, param.data)
#         print (name)

SyntaxError: invalid syntax (270467018.py, line 20)

In [None]:
# Observe initial performance of the model without any training
test(model, test_loader, device)

In [None]:
# # LSTM settings
# embedding_dim = 64
# hidden_dim = 64
# transaction_size = 312
# num_layers = 2

# # parameter setting
# num_epochs = 50
# batch_size = 100
# use_gpu = (device.type == 'cuda')
# learning_rate = 0.01

# # Divide training dataset into batches
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

# # Check whether data is splitted correctly -> X_.shape: (batch, seq, encoding), y_.shape: (batch)
# for i, (X_, y_) in enumerate(train_loader): 
#     print(X_.shape, y_.shape)
#     print(i)

# model = LSTM(embedding_dim, hidden_dim, transaction_size, num_layers, batch_size, use_gpu)

# model.to(device)


In [None]:
# Loss and optimizer
criterion = nn.BCEWithLogitsLoss(pos_weight = (num_neg_seq/num_pos_seq) * torch.ones([1]).to(config.device))
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate, betas=(0.9, 0.999),
                             eps=1e-8, weight_decay=config.weight_decay, amsgrad=False)  
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=config.gamma)

In [None]:
# Train the model
train(model, train_loader, val_loader, criterion, optimizer, scheduler, config)

In [None]:
# Test the model -> no need to compute gradients (for memory efficiency)
test(model, test_loader, device)

In [None]:
wandb.finish()