## Model Training

Having preprocessed the dataset, this notebook builds and trains an ANN, hopefully be able to classify transaction category and achieve decent result.

#### Import Libraries

I'll be using the PyTorch framework to build and train the ANN. 

In [40]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

#### Load & Prepare Data

In [41]:
# Load the preprocessed data
df = pd.read_csv("../dataset/clean_embedding_bank_transaction.csv")

# Extract all feature columns that are not prefixed with "category_"
feature_columns = [col for col in df.columns if not col.startswith("category_")]
X = df[feature_columns].values
print(X.shape)
print(X[:5])

(258522, 111)
[[ 2.15000183e-01  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  4.00000000e+00
   2.90000000e+01  0.00000000e+00  0.00000000e+00 -1.21868499e-01
  -1.20761722e-01  1.97299421e-02 -9.86674726e-02 -4.38451320e-01
  -1.96335524e-01 -1.55773252e-01 -7.67301798e-01 -7.35667229e-01
  -1.11007714e+00  1.19635329e-01 -7.43274093e-01  3.03134739e-01
   4.27381575e-01 -1.39323413e-01 -3.96797895e-01  4.48687136e-01
  -6.73760056e-01 -4.56344426e-01 -5.13957322e-01 -5.98248661e-01
   3.68088007e-01 -5.38518801e-02  1.04132719e-01 -6.89168334e-01
  -1.41409896e-02 -1.17392147e+00 -7.85798132e-02  8.00346583e-03
  -4.28601593e-01 -6.50245667e-01  7.18181014e-01 -6.94372535e-01
   1.40027106e-01 -4.61310267e-01  4.64503802e-02  9.86701906e-01
  -2.60781109e-01 -8.11234191e-02 -1.72459960e-01 -2.04221606e-01
  -1.66166008e-01  3.84134650e-01 -2.31410652e-01 -5.04765809e-01
  -3.08833510e-01  1.29452646e-01  5.55279136e-01  1.06327973e

In [42]:
# Extract all category columns
y = df.filter(like="category_").values 
print(y.shape)
print(y[:5])

(258522, 33)
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]]


In [43]:
# Train-test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize features using z-score normalization
scaler = StandardScaler()

# Apply the scaler to specific columns 7 - 9, ['day_of_week', 'day_of_month', 'hour']
X_train[:, 7:10] = scaler.fit_transform(X_train[:, 7:10])
X_test[:, 7:10] = scaler.transform(X_test[:, 7:10])

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)  # Use float for BCE loss
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoaders
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, drop_last=True)

# Print dataset shapes
print("Shape of X_train:", X_train_tensor.shape)
print("Shape of X_test:", X_test_tensor.shape)
print("Shape of y_train:", y_train_tensor.shape)
print("Shape of y_test:", y_test_tensor.shape)


Shape of X_train: torch.Size([180965, 111])
Shape of X_test: torch.Size([77557, 111])
Shape of y_train: torch.Size([180965, 33])
Shape of y_test: torch.Size([77557, 33])


#### Define the ANN Architecture

Since this is a multi-class classification problem (one-hot encoded category labels):

- Use fully connected layers
- Apply Batch Normalization for stable training
- Use Dropout to prevent overfitting
- Use Sigmoid activation at the output

In [44]:
# Define the ANN model
class TransactionClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(TransactionClassifier, self).__init__()
        
        self.fc1 = nn.Linear(input_size, 256)  # First hidden layer
        self.bn1 = nn.BatchNorm1d(256)  # Batch normalization
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.3)
        
        self.fc2 = nn.Linear(256, 128)  # Second hidden layer
        self.bn2 = nn.BatchNorm1d(128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.3)

        self.fc3 = nn.Linear(128, num_classes)  # Output layer
        self.sigmoid = nn.Sigmoid()  # Sigmoid activation for multi-label classification

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.dropout1(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.dropout2(x)

        x = self.fc3(x)
        x = self.sigmoid(x)  # Sigmoid for multi-label output
        return x

#### Initialise the Model

- Use binary cross-entropy for multi-class classification
- Use Adam optimiser with learning rate 0.001

In [45]:
# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define input size and output size
input_size = X_train.shape[1]  # Total number of input features
num_classes = y_train.shape[1]  # Number of one-hot encoded categories

# Initialize the model
model = TransactionClassifier(input_size, num_classes).to(device)

# Define loss function and optimizer
loss_fn = nn.BCELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Print model summary
print(model)

TransactionClassifier(
  (fc1): Linear(in_features=111, out_features=256, bias=True)
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (dropout1): Dropout(p=0.3, inplace=False)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU()
  (dropout2): Dropout(p=0.3, inplace=False)
  (fc3): Linear(in_features=128, out_features=33, bias=True)
  (sigmoid): Sigmoid()
)


#### Define Train Test Function

- Train the model for an epoch.
- Evaluate the model on the test set after each epoch.
- Print Train Accuracy, Train Loss, Test Accuracy, and Test Loss.

In [46]:
from tqdm import tqdm

# Function to train and evaluate the model
def train_and_evaluate(model, train_loader, test_loader, loss_fn, optimizer, num_epochs=20, device="cpu"):
    model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_train_loss = 0.0
        correct_train = 0
        total_train = 0

        train_progress = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Training]", leave=False)
        for batch_X, batch_y in train_progress:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            # Forward pass
            outputs = model(batch_X)
            loss = loss_fn(outputs, batch_y)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Compute loss
            running_train_loss += loss.item()

            # Compute instance-level accuracy
            predicted_classes = torch.argmax(outputs, dim=1)  # Predicted category index
            true_classes = torch.argmax(batch_y, dim=1)  # Actual category index
            correct_train += (predicted_classes == true_classes).sum().item()
            total_train += batch_y.size(0)  # Number of transactions

            train_progress.set_postfix(loss=loss.item())

        avg_train_loss = running_train_loss / len(train_loader)
        train_accuracy = correct_train / total_train

        # Evaluation phase
        model.eval()
        running_test_loss = 0.0
        correct_test = 0
        total_test = 0

        with torch.no_grad():
            test_progress = tqdm(test_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Evaluating]", leave=False)
            for batch_X, batch_y in test_progress:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)

                outputs = model(batch_X)
                loss = loss_fn(outputs, batch_y)
                running_test_loss += loss.item()

                # Compute instance-level accuracy
                predicted_classes = torch.argmax(outputs, dim=1)
                true_classes = torch.argmax(batch_y, dim=1)
                correct_test += (predicted_classes == true_classes).sum().item()
                total_test += batch_y.size(0)

                test_progress.set_postfix(loss=loss.item())

        avg_test_loss = running_test_loss / len(test_loader)
        test_accuracy = correct_test / total_test

        print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {avg_train_loss:.4f}, Train Acc: {train_accuracy:.4f}, "
              f"Test Loss: {avg_test_loss:.4f}, Test Acc: {test_accuracy:.4f}")


#### Train and Evaluate Model

In [47]:
# Train and evaluate the model
train_and_evaluate(model, train_loader, test_loader, loss_fn, optimizer, num_epochs=20, device=device)


                                                                                          

Epoch [1/20] - Train Loss: 0.0528, Train Acc: 0.6727, Test Loss: 0.0377, Test Acc: 0.7400


                                                                                          

Epoch [2/20] - Train Loss: 0.0408, Train Acc: 0.7322, Test Loss: 0.0338, Test Acc: 0.7749


                                                                                          

Epoch [3/20] - Train Loss: 0.0387, Train Acc: 0.7467, Test Loss: 0.0323, Test Acc: 0.7864


                                                                                          

Epoch [4/20] - Train Loss: 0.0374, Train Acc: 0.7562, Test Loss: 0.0314, Test Acc: 0.7950


                                                                                           

Epoch [5/20] - Train Loss: 0.0364, Train Acc: 0.7626, Test Loss: 0.0304, Test Acc: 0.7958


                                                                                           

Epoch [6/20] - Train Loss: 0.0359, Train Acc: 0.7664, Test Loss: 0.0303, Test Acc: 0.7957


                                                                                           

Epoch [7/20] - Train Loss: 0.0353, Train Acc: 0.7708, Test Loss: 0.0307, Test Acc: 0.7888


                                                                                           

Epoch [8/20] - Train Loss: 0.0348, Train Acc: 0.7738, Test Loss: 0.0294, Test Acc: 0.8074


                                                                                           

Epoch [9/20] - Train Loss: 0.0345, Train Acc: 0.7747, Test Loss: 0.0292, Test Acc: 0.8003


                                                                                            

Epoch [10/20] - Train Loss: 0.0344, Train Acc: 0.7765, Test Loss: 0.0296, Test Acc: 0.7936


                                                                                            

Epoch [11/20] - Train Loss: 0.0340, Train Acc: 0.7786, Test Loss: 0.0283, Test Acc: 0.8097


                                                                                            

Epoch [12/20] - Train Loss: 0.0337, Train Acc: 0.7805, Test Loss: 0.0289, Test Acc: 0.7999


                                                                                            

Epoch [13/20] - Train Loss: 0.0336, Train Acc: 0.7816, Test Loss: 0.0278, Test Acc: 0.8119


                                                                                            

Epoch [14/20] - Train Loss: 0.0334, Train Acc: 0.7827, Test Loss: 0.0293, Test Acc: 0.7974


                                                                                            

Epoch [15/20] - Train Loss: 0.0331, Train Acc: 0.7850, Test Loss: 0.0283, Test Acc: 0.8080


                                                                                            

Epoch [16/20] - Train Loss: 0.0331, Train Acc: 0.7849, Test Loss: 0.0283, Test Acc: 0.8029


                                                                                            

Epoch [17/20] - Train Loss: 0.0329, Train Acc: 0.7854, Test Loss: 0.0277, Test Acc: 0.8138


                                                                                            

Epoch [18/20] - Train Loss: 0.0327, Train Acc: 0.7865, Test Loss: 0.0284, Test Acc: 0.8006


                                                                                            

Epoch [19/20] - Train Loss: 0.0327, Train Acc: 0.7875, Test Loss: 0.0277, Test Acc: 0.8088


                                                                                            

Epoch [20/20] - Train Loss: 0.0327, Train Acc: 0.7876, Test Loss: 0.0276, Test Acc: 0.8153
Training and evaluation complete!




In [48]:
# Save the model
torch.save(model.state_dict(), "../models/ANN_20e_1e-3lr_4l_classifier.pth")

#### Predict Categories for Random Test Instances

In [50]:
# Function to predict categories for test instances
def predict_random_test_samples(model, X_test_tensor, y_test_tensor, label_encoder, num_samples=5, device="cpu"):
    model.eval()  # Set model to evaluation mode
    X_test_tensor = X_test_tensor.to(device)
    
    with torch.no_grad():
        # Select random samples from the test set
        indices = np.random.choice(len(X_test_tensor), num_samples, replace=False)
        X_samples = X_test_tensor[indices]
        y_samples = y_test_tensor[indices]

        # Get model predictions
        outputs = model(X_samples)
        predicted_classes = torch.argmax(outputs, dim=1).cpu().numpy()  # Convert to category index
        true_classes = torch.argmax(y_samples, dim=1).cpu().numpy()  # Convert ground truth to category index

        # Convert indices to category labels
        predicted_labels = label_encoder.inverse_transform(predicted_classes)
        true_labels = label_encoder.inverse_transform(true_classes)

        # Print results
        print("\n===== Model Predictions vs. Ground Truth =====")
        for i in range(num_samples):
            print(f"Test Instance {i+1}:")
            print(f"  ➤ Predicted Category: {predicted_labels[i]}")
            print(f"  ➤ Ground Truth:      {true_labels[i]}")
            print("---------------------------------------------------")


In [None]:
# Extract category column names dynamically
category_columns = [col for col in df.columns if col.startswith("category_")]

# Create label encoder based on column order
label_encoder = LabelEncoder()
label_encoder.fit(category_columns) 

# Call the function to predict & compare results
predict_random_test_samples(model, X_test_tensor, y_test_tensor, label_encoder, num_samples=5, device=device)


===== Model Predictions vs. Ground Truth =====
Test Instance 1:
  ➤ Predicted Category: category_Digital Entertainment
  ➤ Ground Truth:      category_Digital Entertainment
---------------------------------------------------
Test Instance 2:
  ➤ Predicted Category: category_Transfer Credit
  ➤ Ground Truth:      category_Transfer Credit
---------------------------------------------------
Test Instance 3:
  ➤ Predicted Category: category_Convenience Stores
  ➤ Ground Truth:      category_Convenience Stores
---------------------------------------------------
Test Instance 4:
  ➤ Predicted Category: category_Arts and Entertainment
  ➤ Ground Truth:      category_Arts and Entertainment
---------------------------------------------------
Test Instance 5:
  ➤ Predicted Category: category_Supermarkets and Groceries
  ➤ Ground Truth:      category_Uncategorized
---------------------------------------------------
