In [1]:
from google.colab import files
uploaded = files.upload()
#use this to select the CCD.xls file and upload to drive; the following code block will then successfully be able to open it
#https://saturncloud.io/blog/how-to-read-a-file-from-drive-in-google-colab/
'''
L1 Normalization: normalizes so that the sum of absolute values along the specified axis is 1
Recommended for:
- Feature selection in sparse machine learning models
- When outliers are present and should not be removed

L2 Normalization: normalizes so that the Euclidean norm (L2 norm) along the specified axis is 1
Recommended for:
- When the direction of the data matters more than the actual values
- Regularization techniques in machine learning, such as weight decay
Sometimes recommended to apply L2 after applying Z standardization which centers the data around 0 with a standard deviation of 1. Here are the reasons for experimenting with it:
-Normalization for Specific Model Requirements: Some models or algorithms might benefit from having input data normalized in a specific way. Applying L2 normalization after Z-Standardization can be a part of such model-specific data preparation.
- Feature Scaling: L2 normalization can scale the feature vectors to have a Euclidean norm of 1, which might be beneficial for certain models that rely on the magnitude of the feature vector.
- Regularization: L2 normalization can serve as a form of regularization by penalizing large weights in the model, leading to improved generalization and potentially preventing overfitting.
- Enhanced Numerical Stability: In some cases, combining Z-Standardization with L2 normalization can help improve the numerical stability of the optimization process during training.
- Experimental Purposes: If you are experimenting with different normalization techniques to see how they affect your model's performance, applying L2 normalization after Z-Standardization could be part of your experimentation process.

Min-Max Normalization: Scales the data to a fixed range (e.g., 0 to 1).
Recommended for temporal data:
Min-Max normalization is often suitable for temporal data when you want to preserve the temporal relationships in the data while scaling it to a specific range.
Useful when the absolute values of the data are not as important as the relative relationships between data points over time.


Z-Score Normalization (Standardization):
Method: Centers the data around 0 with a standard deviation of 1.
Recommended for temporal data:
Z-score normalization is beneficial when the temporal data follows a Gaussian distribution.
It can help in dealing with temporal data that exhibits seasonality or trends by removing the mean and scaling by the standard deviation.

LSTM Normalization (Layer Normalization):
Method: Normalizes the activations of each time step in a sequence.
Recommended for temporal data:
Specifically designed for recurrent neural networks like LSTMs (Long Short-Term Memory networks) to normalize the hidden states at each time step.
Helps in stabilizing training and improving convergence in sequential models.

Feature Scaling based on Time Windows:
Method: Normalize features within specific time windows or segments.
Recommended for temporal data:
Divide the temporal data into windows and scale the features within each window independently.
Useful when different parts of the time series have varying characteristics or scales.

Differencing:
Method: Compute differences between consecutive time steps.
Recommended for temporal data:
Transform the data by taking differences between adjacent time points to remove trends or seasonality.
Often used in time series analysis to make the data stationary.
'''

Saving CCD.xls to CCD.xls


"\nL1 Normalization: normalizes so that the sum of absolute values along the specified axis is 1\nRecommended for:\n- Feature selection in sparse machine learning models\n- When outliers are present and should not be removed\n\nL2 Normalization: normalizes so that the Euclidean norm (L2 norm) along the specified axis is 1\nRecommended for:\n- When the direction of the data matters more than the actual values\n- Regularization techniques in machine learning, such as weight decay\nSometimes recommended to apply L2 after applying Z standardization which centers the data around 0 with a standard deviation of 1. Here are the reasons for experimenting with it:\n-Normalization for Specific Model Requirements: Some models or algorithms might benefit from having input data normalized in a specific way. Applying L2 normalization after Z-Standardization can be a part of such model-specific data preparation.\n- Feature Scaling: L2 normalization can scale the feature vectors to have a Euclidean nor

In [2]:
from typing import List
import pandas as pd
import numpy as np
import math
import torch
import torch.nn.functional as F

torch.set_printoptions(sci_mode=False)
df = pd.read_excel('/content/CCD.xls')
input = np.delete(df.to_numpy(dtype = None, copy = False), 0, 0) #convert dataframe to numpy array and delete the headers

data_tensor = torch.from_numpy(input.astype('float64'))
#print(data_tensor)

'''
We need to normalize the data differently depending on which features will be used
    for their temporal dependencies (like payments each month) and which features won't (such as sex and age)
    hence we need to split the data accordingly

Each set of temporal data will be normalized separate to each other and the non-temporal data but will use the same normalization technique - minimax
Non-temporal data will be normalized together using Z-standardization ==> experimentation with applying L2 normalization after Z-standardization is recommended
'''

class WindowSizeGreaterThanSequenceException(Exception):
    def __init__(self, message="The size of the window defined for each subsequence is larger than the input sequence itself - it must be smaller than it"):
        self.message = message
        super().__init__(self.message)

class OverlapSizeException(Exception):
    def __init__(self, message="The size of the overlap is not between 0 and 1"):
        self.message = message
        super().__init__(self.message)

class OverlapTypeException(Exception):
    def __init__(self, message="overlap type must be a float"):
        self.message = message
        super().__init__(self.message)

class WindowTypeException(Exception):
    def __init__(self, message="window_size type must be int"):
        self.message = message
        super().__init__(self.message)

class InputListTypeException(Exception):
    def __init__(self, message="input_list must be of type list"):
        self.message = message
        super().__init__(self.message)


def z_standardize(input_tensor):
  mean = input_tensor.mean()  # Calculate mean and standard deviation
  std = input_tensor.std()
  return (input_tensor - mean) / std  # Manually apply Z-Standardization

def minmax_normalization(input_tensor):
  min_val = input_tensor.min()   # Define the min and max values for normalization
  max_val = input_tensor.max()
  return (input_tensor - min_val) / (max_val - min_val)  # Apply Min-Max normalization

def window_overlap(input_list: list, window_size: int = 3, overlap: float = 0.7):
  '''
  - decide how big each window is and how much overlap it shares with the previous window
    - overlap should be written as a decimal, i.e. 0.5 = 50% overlap, and between 0 and 1
    - window_size can't be larger than the entire input list
    - both overlap and window_size need to be numbers ==> window_size = int, overlap = float
  - create a new list with the windows within it as subsequences per say
  '''
  try:
      if isinstance(window_size, int):
        if isinstance(input_list, list):
          if window_size < len(input_list):
            if isinstance(overlap, float):

              if 0 < overlap < 1:
                  # the overlap percentage needs to be able to divide the list into integer-sized subdivisions ==> we can use integer division and if the number doesn't divide perfectly, one of the windows can be smaller
                  # example ==> [1, 2, 3, 4, 5, 6] split into 3 windows with 50% overlap = [[1, 2, 3], [3, 4, 5], [5, 6]]
                  # example ==> [1, 2, 3, 4, 5, 6] split into windows of size 3 with 70% overlap = [[1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 6]] as the next best situation is with 2/3 overlap

                  elements_shared_per_window = math.floor(window_size * overlap)
                  elements_different_to_prev_window = window_size - elements_shared_per_window

                  windows =  []
                  number_of_windows = (len(input_list) // elements_shared_per_window)+1
                  for i in range(0, number_of_windows):
                    windows.append([])

                  last_index = window_size
                  windows[0] = input_list[0:last_index]

                  all_filled = False

                  for i in range(1, len(windows)):
                    if not all_filled:
                      for j in range(-1, -(elements_shared_per_window+1), -1):
                        windows[i].append(windows[i-1][j])
                      windows[i].reverse()

                      for element in input_list[last_index:last_index+elements_different_to_prev_window]:
                        windows[i].append(element)
                      last_index += elements_different_to_prev_window
                      if last_index >= len(input_list):
                        all_filled = True
                  return [sublist for sublist in windows if sublist]  # ensures no empty lists are left over if the division into sublists is not perfect

              else:
                raise OverlapSizeException
            else:
              raise OverlapTypeException
          else:
            raise WindowSizeGreaterThanSequenceException
        else:
          raise InputListTypeException
      else:
        raise WindowTypeException
  except WindowTypeException as e:
      print(e)
  except InputListTypeException as e:
      print(e)
  except WindowSizeGreaterThanSequenceException as e:
      print(e)
  except OverlapTypeException as e:
      print(e)
  except OverlapSizeException as e:
      print(e)

def window_tensor(input_tensor: torch.tensor, window_size: int, overlap: float):
  # applies the window overlap algorithm to a tensor
  return torch.tensor([window_overlap(lst) for lst in input_tensor.tolist()])


In [3]:
pay = data_tensor[:, 5:11]
bill_amt = data_tensor[:, 11:17]
pay_amt = data_tensor[:, 17:23]
non_temp = data_tensor[:, 0:5]

labels = data_tensor[:, -1].unsqueeze(1)  # Extract the last column and add a new dimension

z_non_temp = z_standardize(non_temp)

# Apply L2 normalization using F.normalize
z_non_temp_norm = F.normalize(z_non_temp, p=2, dim=1)  # L2 normalization can be experimented with in combination with Z-standardization
pay_norm = minmax_normalization(pay)
bill_amt_norm = minmax_normalization(bill_amt)
pay_amt_norm = minmax_normalization(pay_amt)


# We have a function to create the overlaps - now we need to reshape all the normalized, temporal tensors to be sequences with window overlaps before we concatenate them
pay_norm_window = window_tensor(pay_norm, 3, 0.7)
bill_amt_norm_window = window_tensor(bill_amt_norm, 3, 0.7)
pay_amt_norm_window = window_tensor(pay_amt_norm, 3, 0.7)


temporal_data_concatenated = torch.cat((pay_norm_window, bill_amt_norm_window, pay_amt_norm_window), dim=2)

z_non_temp_norm_reshaped = z_non_temp_norm.unsqueeze(1).expand(-1, 4, -1)  # Adjust the size of the non-temporal tensor to match the temporal tensors along dimension 1

features = torch.cat((z_non_temp_norm_reshaped, temporal_data_concatenated), dim=2)

labels_expanded = labels.unsqueeze(1).expand(-1, 4, -1)  # Expand labels to match the features tensor shape

input = torch.cat((features, labels_expanded), dim=2).to(torch.float32)

print(input.shape)

torch.Size([30000, 4, 15])


In [4]:
# Define the custom dataset and dataloaders
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split

class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)
        #return len(self.features)

    def __getitem__(self, idx):
        sample = self.data[idx]
        features = sample[:, :14]  # Extract features (first 14 columns)
        label = sample[:, 14]  # Extract labels (last column)
        # return torch.tensor(features), torch.tensor(label)
        return features.clone().detach(), label.clone().detach()  # Use clone().detach() to construct tensors from existing data

dataset = CustomDataset(input)  # Create a TensorDataset from the data tensor

# Define the sizes of your train, test, and validation sets
train_size = int(0.7 * len(dataset))  # 70% for training
test_size = int(0.15 * len(dataset))  # 15% for testing
val_size = len(dataset) - train_size - test_size  # Remaining for validation

# Split the dataset into train, test, and validation sets
train_dataset, test_dataset, val_dataset = random_split(dataset, [train_size, test_size, val_size])

# Create DataLoader instances for each set
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [5]:
'''
TRY SUCCESSFULLY APPLYING SMOTE TO TRAINING ONLY

from imblearn.over_sampling import SMOTE
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(input[:, :14], input[:, 14])

# Define the custom dataset and dataloaders
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split

class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)
        #return len(self.features)

    def __getitem__(self, idx):
        sample = self.data[idx]
        features = sample[:, :14]  # Extract features (first 14 columns)
        label = sample[:, 14]  # Extract labels (last column)
        # return torch.tensor(features), torch.tensor(label)
        return features.clone().detach(), label.clone().detach()  # Use clone().detach() to construct tensors from existing data

dataset = CustomDataset(input)  # Create a TensorDataset from the data tensor

# Define the sizes of your train, test, and validation sets
train_size = int(0.7 * len(dataset))  # 70% for training
test_size = int(0.15 * len(dataset))  # 15% for testing
val_size = len(dataset) - train_size - test_size  # Remaining for validation

# Split the dataset into train, test, and validation sets
train_dataset, test_dataset, val_dataset = random_split(dataset, [train_size, test_size, val_size])

# Create DataLoader instances for each set
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)'''

'\nTRY SUCCESSFULLY APPLYING SMOTE TO TRAINING ONLY\n\nfrom imblearn.over_sampling import SMOTE\nsmote = SMOTE()\nX_resampled, y_resampled = smote.fit_resample(input[:, :14], input[:, 14])\n\n# Define the custom dataset and dataloaders\nfrom torch.utils.data import Dataset, DataLoader, TensorDataset, random_split\n\nclass CustomDataset(Dataset):\n    def __init__(self, data):\n        self.data = data\n\n    def __len__(self):\n        return len(self.data)\n        #return len(self.features)\n\n    def __getitem__(self, idx):\n        sample = self.data[idx]\n        features = sample[:, :14]  # Extract features (first 14 columns)\n        label = sample[:, 14]  # Extract labels (last column)\n        # return torch.tensor(features), torch.tensor(label)\n        return features.clone().detach(), label.clone().detach()  # Use clone().detach() to construct tensors from existing data\n\ndataset = CustomDataset(input)  # Create a TensorDataset from the data tensor\n\n# Define the sizes of

In [86]:
# Define the RNN model

import torch.nn as nn
import os

# Set the environment variables for deterministic behavior - important in RNNs as parallelism can result in varied and unexepected results despite having the same inputs and network architecture
if torch.version.cuda.startswith('10.1'):
    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
elif torch.version.cuda >= '10.2':
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':16:8'  # Or ':4096:2' based on your CUDA version

device = "cuda" if torch.cuda.is_available() else "cpu"

class CustomRNN(nn.RNN):
    def __init__(self, input_size, hidden_size, num_layers, nonlinearity='tanh', bias=True, batch_first=False, dropout=0, bidirectional=False, intermediate_size=30):
        super(CustomRNN, self).__init__(input_size, hidden_size, num_layers, nonlinearity, bias, batch_first, dropout, bidirectional)

        self.linear1 = torch.nn.Linear(input_size, intermediate_size)
        self.linear2 = torch.nn.Linear(intermediate_size, input_size)
        self.celu = torch.nn.CELU()
        self.gelu = torch.nn.GELU()
        self.mish = torch.nn.Mish()
        # https://pytorch.org/docs/stable/nn.html ===> use to find different activation functions to use

    def forward(self, input, hx=None):
        batch_size = input.size(0)
        if hx is not None:
            if isinstance(hx, tuple):
                hx = hx[0]  # Unpack the hidden state tuple into individual tensors
        else:
            hx = torch.zeros(self.num_layers * (2 if self.bidirectional else 1), batch_size, self.hidden_size).to(input.device)

        input = self.mish(self.celu(input))

        output, hidden_state = super(CustomRNN, self).forward(input, hx)  # Call the parent class forward method with modified hidden state
        return tuple([output, hidden_state])  # unpack this tuple where index 0 is the tensor containing predictions whereas index 1 is the tensor of hidden states after each pass

# Define the input parameters
input_size = 14
hidden_size = 16
num_layers = 3

# Create an instance of the CustomRNN model
model = CustomRNN(input_size, hidden_size, num_layers, 'tanh', False, True, 0.3, True)

In [87]:
import torch.optim as optim
import torch
import numpy as np


# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10

#define a function for reshaping the targets, as they are tensors of size 64x4 where each array is just a list of the same exact value, i.e. [1,1,1,1] or [0,0,0,0] which isn't necessary
def target_reshaper(target: torch.tensor):
  return torch.mean(target, dim=1, keepdim=True)

#define a function for reshaping the outputs through applying functions
def output_reshaper_mean(output: torch.tensor, input: torch.tensor, target: torch.tensor):
    outputs_flattened = output.reshape(input.size(0), -1)  # Flatten the outputs
    return torch.mean(torch.sigmoid(outputs_flattened), dim=1, keepdim=True)   # Calculate the mean along the second dimension (dim=1) to reduce it to size 64x1

def output_reshaper_sum(output: torch.tensor, input: torch.tensor, target: torch.tensor):
    outputs_flattened = output.reshape(input.size(0), -1)  # Flatten the outputs
    return torch.sum(torch.sigmoid(outputs_flattened) / 100, dim=1, keepdim=True)   # Calculate the sum along the second dimension (dim=1) to reduce it to size 64x1

def output_reshaper_std(output: torch.tensor, input: torch.tensor, target: torch.tensor):
    outputs_flattened = output.reshape(input.size(0), -1)  # Flatten the outputs
    return torch.std(torch.sigmoid(outputs_flattened) / 100, dim=1, keepdim=True)   # Calculate the sum along the second dimension (dim=1) to reduce it to size 64x1

def output_reshaper_max(output: torch.tensor, input: torch.tensor, target: torch.tensor):
    outputs_flattened = output.reshape(input.size(0), -1)  # Flatten the outputs
    return torch.prod(torch.sigmoid(outputs_flattened) / 100, dim=1, keepdim=True)   # Calculate the sum along the second dimension (dim=1) to reduce it to size 64x1



for epoch in range(num_epochs):
    # Training loop
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs, hidden_state = model(inputs)  # Separate outputs and hidden state

        outputs = output_reshaper_mean(outputs, inputs, targets)  # reshape to fit the targets --> [64x1]

        #outputs.requires_grad = True  # Set requires_grad to True for outputs

        #outputs = output_reshaper(outputs, inputs, targets)
        targets = target_reshaper(targets)                  # reshape to remove redundant data --> [64x1]

        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        # Initialize variables to store total loss and correct predictions
        total_loss = 0
        correct_predictions = 0
        total_samples = 0


    # Validation loop
    model.eval()
    # Iterate over the validation dataset
    with torch.no_grad():
        for inputs, targets in val_loader:
            outputs, hidden_state = model(inputs)  # Separate outputs and hidden state
            outputs = output_reshaper_mean(outputs, inputs, targets) # reshape to fit the targets
            targets = target_reshaper(targets)                  # reshape to remove redundant data


            loss = criterion(outputs, targets)
            total_loss += loss.item()

            # Calculate accuracy
            predictions = (outputs > 0.5).int()  # Assuming binary classification
            correct_predictions += (predictions == targets).sum().item()
            total_samples += len(targets)

    # Calculate metrics for the epoch
    epoch_loss = total_loss / len(val_loader)
    epoch_accuracy = correct_predictions / total_samples


    # Print the metrics for the epoch
    print(f"Epoch {epoch + 1}: Validation Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")

Epoch 1: Validation Loss: 0.5107, Accuracy: 0.7982
Epoch 2: Validation Loss: 0.4981, Accuracy: 0.8091
Epoch 3: Validation Loss: 0.4926, Accuracy: 0.8144
Epoch 4: Validation Loss: 0.4882, Accuracy: 0.8180
Epoch 5: Validation Loss: 0.4860, Accuracy: 0.8204
Epoch 6: Validation Loss: 0.4855, Accuracy: 0.8227
Epoch 7: Validation Loss: 0.4844, Accuracy: 0.8249
Epoch 8: Validation Loss: 0.4825, Accuracy: 0.8291
Epoch 9: Validation Loss: 0.4840, Accuracy: 0.8240
Epoch 10: Validation Loss: 0.4817, Accuracy: 0.8284


In [85]:
# Initialize variables to store total loss and correct predictions for the test set
total_loss_test = 0
correct_predictions_test = 0
total_samples_test = 0

# Set model to evaluation mode
model.eval()

# Iterate over the test dataset
with torch.no_grad():
    for inputs, targets in test_loader:

        outputs, hidden_state = model(inputs)  # Separate outputs and hidden state

        outputs = output_reshaper_mean(outputs, inputs, targets) # reshape to fit the targets
        targets = target_reshaper(targets)                  # reshape to remove redundant data

        loss_test = criterion(outputs, targets)
        total_loss_test += loss_test.item()

        # Calculate accuracy
        predictions_test = (outputs > 0.5).int()  # Assuming binary classification
        correct_predictions_test += (predictions_test == targets).sum().item()
        total_samples_test += len(targets)

# Calculate metrics for the test set
test_loss = total_loss_test / len(test_loader)
test_accuracy = correct_predictions_test / total_samples_test

# Print the metrics for the test set
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Test Loss: 0.4934, Test Accuracy: 0.8136
