In [None]:
# Pytorch can be installed with the interactive selector:
# https://pytorch.org/get-started/locally/#windows-anaconda

# Note the GPU version requires cuda 12.1
# NOT cuda 12.3 (the latest version as of 2/1/24)
# Can be found here: https://developer.nvidia.com/cuda-12-1-0-download-archive

In [None]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import torch
import time

# Here are some tools for gpu usage if you want to play with it:
# You can check if you have gpu setup and available here:
#print(torch.version.cuda)
#print(torch.cuda.is_available())

# However, in this case the gpu will likely be slower
# If you want to try it out uncomment this line, and then a few lines in the training section
#device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# Load the dataset
# Load data, but skip the header
dataset = np.loadtxt('card_transdata.csv', delimiter=',', skiprows = 1, dtype=np.float32)

# get the number of data points and number of features
[num_datums, num_features] = dataset.shape

# The number of features is actually 1 less, because one of the columns is dedicated to the label
num_features -= 1

# Shuffle the data in case somebody has it in an ordered list
np.random.shuffle(dataset)

In [None]:
# Now discretize for training, evaluating, validating
training_ratio = 0.7
validation_ratio = 0.2
test_ratio = 0.1

num_train = round(num_datums * training_ratio)
num_valid = round(num_datums * validation_ratio)
num_test = round(num_datums * test_ratio)

# Create all of the split datasets as numpy arrays
X_training_np = dataset[0:num_train, 0:num_features]
y_training_np = dataset[0:num_train, -1]

X_validation_np = dataset[num_train:num_train+num_valid, 0:num_features]
y_validation_np = dataset[num_train:num_train+num_valid, -1]

X_test_np = dataset[num_train+num_valid:, 0:num_features]
y_test_np = dataset[num_train+num_valid:, -1]

In [None]:
# Convert the numpy arrays into torch tensors (pretty much the same thing)
X_training = torch.from_numpy(X_training_np)
y_training = torch.from_numpy(y_training_np).reshape(-1,1)

In [None]:
# Create our network
neural_network = torch.nn.Sequential(
    torch.nn.Linear(num_features,12),
    torch.nn.ReLU(),
    torch.nn.Linear(12,8),
    torch.nn.ReLU(),
    torch.nn.Linear(8,1),
    torch.nn.Sigmoid()
)

# Another approach that is easy to modify number of hidden_layers:
#num_hidden_layers = 4
#num_hidden_nodes = 12
#
#initial_layer = torch.nn.Linear(num_features, num_hidden_nodes)
#layer_list = [initial_layer]
#
#for i in range(num_hidden_layers-1):
#    layer_list.append( torch.nn.ReLU() )
#    layer_list.append( torch.nn.Linear(num_hidden_nodes,num_hidden_nodes))
#
#layer_list.append( torch.nn.ReLU() )
#layer_list.append( torch.nn.Linear(num_hidden_nodes, 1))
#layer_list.append( torch.nn.Sigmoid() )
#
#neural_network = torch.nn.Sequential(*layer_list)

In [None]:
# Define other parameters of the network
loss_function = torch.nn.BCELoss() # Binary cross entropy
optimizer = torch.optim.Adam(neural_network.parameters())

In [None]:
# If you want to try using the gpu then uncomment here:
# Send our tensors to the gpu if it is available
#X_training = X_training.to(device)
#y_training = y_training.to(device)

# Send the model to the gpu
#neural_network.to(device)

In [None]:
# Train your model

# NOTE: Something cool about pytorch is that it is dynamically updating the model, so
# if you interrupt the script it saves the models current state and you can move on to
# validating

num_epochs = 20
batch_size = 1000
num_batches = round(num_train/batch_size)

for epoch in range(num_epochs):
    t = time.time() # Measure the time of the epoch
    for batch in range(num_batches):
        # Get a batch of the training data
        X_batch = X_training[batch*batch_size:(batch+1)*batch_size]
        y_batch = y_training[batch*batch_size:(batch+1)*batch_size]

        # Deploy the model
        y_prediction = neural_network(X_batch)

        # Compute loss
        loss = loss_function(y_prediction, y_batch)

        # Back propogation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Compute the time of the epoch
    elapsed_time = time.time() - t

    # Print results of epoch
    print(f'Epoch {epoch+1}/{num_epochs} completed in {elapsed_time:.2f}s, loss {loss:.4f}')

In [None]:
# Evaluate model
# Convert the data to tensors
X_validation = torch.from_numpy(X_validation_np)
y_valdation = torch.from_numpy(y_validation_np).reshape(-1,1)

# Run the model
y_prediction = neural_network(X_validation)
# Round the results
y_prediction = y_prediction.round()

# See how many are different
num_matches = sum(y_prediction==y_valdation)

# Compute accuracy
accuracy = float(num_matches/num_valid)

print(f"Accuracy {accuracy:.5f}")

In [None]:
# DON'T TOUCH THIS UNTIL YOU ARE DONE TUNING YOUR HYPERPARAMETERS
# Convert the data to tensors
#X_test = torch.from_numpy(X_test_np)
#y_test = torch.from_numpy(y_test_np).reshape(-1,1)

# Run the model
#y_prediction = neural_network(X_test)
# Round the results
#y_prediction = y_prediction.round()

# See how many are different
#num_matches = sum(y_prediction==y_test)

# Compute accuracy
#accuracy = float(num_matches/num_test)

#print(f"Final Accuracy {accuracy:.5f}")