In [471]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import itertools
from torch.utils.data import DataLoader, TensorDataset, random_split
from fedlab.contrib.algorithm.basic_client import SGDSerialClientTrainer
from fedlab.contrib.algorithm.basic_server import SyncServerHandler
from fedlab.core.standalone import StandalonePipeline
from fedlab.utils.functional import evaluate
from sklearn.model_selection import KFold

Possible features to implement:

- Transaction History
    - Amt, Timestamps -> Frequency
- Current Balance
- User interaction
- Geolocation Data
- Time patterns
- How often users redeem rewards
- Wallet features used
- Financial Goals

-Some attributes are categorical like user interaction. We might have to do some sort of engagement leveling for that.
- Wallet features can indicate what kind of resources a user might desire:
    - If they like to check their balance more than making transactions, it might be a sign that a user is considering making a purchase but is nervous about consequences regarding it. This could be "scenario 1" and can be encoded as a one hot vector like [1, 0, 0, ..., 0]

### NOTE

- Most features are tentative and may not be implemented. It is unclear as to what kind of data we will have access to at the current moment and whether or not hte collection of this data is feasible.

# Feature Programming

Data that I think should be collected:
- X[:, 0] = Amt (transaction amount/price)
- X[:, 1] = Timestamps (block timestamp)
- X[:, 2] = User's current balance.
- X[:, 3] = Age
- X[:, 4] = Number of total transactions made
- X[:, 5] = Knowledge index based off of tests and such

Output:
- If 0, then they're doing ok
- If 1, then they may be a little reckless
- If 2, then they may be very reckless
- If 3, 100% reckless

In [472]:
# Prototype read data function.
df = pd.read_csv("user-data.csv", delimiter=",")
df_out = pd.read_csv("not-normalized.csv", delimiter=",")

# Data matrix
D = df.to_numpy()
Y = df_out.to_numpy()[:, -1]

# Presumably, (# of points, 5)
print(D.shape)
print(Y.shape)

training_data, testing_data = np.column_stack((D[:80, :], Y[:80])), np.column_stack((D[80:, :], Y[80:]))

training_data = training_data.astype(np.float64)
testing_data = testing_data.astype(np.float64)  # or np.int32 depending on your requirement


(100, 5)
(100,)


In [473]:
# # This is code heavily based on Zaki's implementation of a Simple Neural Network.

# def relu(z):
#     """Apply the ReLU (Rectified Linear Unit) function."""
#     return np.maximum(0, z)

# def relu_derivative(z):
#     """Compute the derivative of the ReLU function."""
#     return np.where(z > 0, 1, 0)

# def feed_forward(x, network):
#     """Perform a feedforward pass through the neural network."""
#     activations = [x]
#     input_to_layer = x

#     for layer in network:
#         z = layer['b'] + np.dot(layer['W'].T, input_to_layer)
#         input_to_layer = relu(z)
#         activations.append(input_to_layer)

#     activations[-1] = softmax(activations[-1])
#     return activations

# def initialize_network(input_size, hidden_layer_sizes, output_size, scale):
#     """Initialize a deep multilayer perceptron with random weights and biases."""
#     layer_sizes = [input_size] + hidden_layer_sizes + [output_size]
#     network = []

#     for i in range(len(layer_sizes) - 1):
#         layer = {
#             'b': np.random.rand(layer_sizes[i + 1]) * scale,
#             'W': np.random.rand(layer_sizes[i], layer_sizes[i + 1]) * scale
#         }
#         network.append(layer)

#     return network

# def deep_mlp_training(data, output_size, max_iter, learning_rate, hidden_layer_sizes, scale):
#     """Train a deep multilayer perceptron on the given dataset."""
#     num_samples, num_features = data.shape
#     input_size = num_features - 1  # Last column is assumed to be the label
#     network = initialize_network(input_size, hidden_layer_sizes, output_size, scale)

#     for j in range(max_iter):
#         indices = np.arange(num_samples)
#         np.random.shuffle(indices)

#         for i in indices:
#             x_i = data[i, :-1]
#             y_i = np.zeros(output_size)
#             y_i[int(data[i, -1])] = 1

#             # Forward pass
#             activations = feed_forward(x_i, network)

#             # Backpropagation
#             deltas = [activations[-1] - y_i]
#             for l in range(len(network) - 1, 0, -1):
#                 delta = relu_derivative(np.dot(network[l]['W'], deltas[0]))
#                 deltas.insert(0, delta)

#             # Gradient descent parameter update
#             for l, layer in enumerate(network):
#                 layer['W'] -= learning_rate * np.outer(activations[l], deltas[l])
#                 layer['b'] -= learning_rate * deltas[l]

#     return network


In [474]:
class SGDSerialClientTrainerTensor(SGDSerialClientTrainer):
    def local_process(self, model_parameters, client_data):
        if isinstance(client_data, torch.utils.data.dataset.Subset):
            data_loader = DataLoader(client_data, batch_size=self.batch_size, shuffle=False)
            pack = self.train(model_parameters, data_loader)
            self.cache.append(pack)
        else:
            print(f"Invalid data format for client data, type {type(client_data)}")


In [475]:
class DeepMLP(nn.Module):
    def __init__(self, input_size=0, hidden_layer_sizes=0, output_size=0):
        super(DeepMLP, self).__init__()
        layers = []

        # Input layer
        layers.append(nn.Linear(input_size, hidden_layer_sizes[0]))
        layers.append(nn.ReLU())

        # Hidden layers
        for i in range(len(hidden_layer_sizes)-1):
            layers.append(nn.Linear(hidden_layer_sizes[i], hidden_layer_sizes[i+1]))
            layers.append(nn.ReLU())

        # Output layer
        layers.append(nn.Linear(hidden_layer_sizes[-1], output_size))

        # Combine all layers
        self.layers = nn.Sequential(*layers)

    # Feed Forward.
    def forward(self, x):
        return self.layers(x)

In [476]:
class EvalPipeline(StandalonePipeline):
    def __init__(self, handler, trainer, test_loader, client_data, show_data=True):
        super().__init__(handler, trainer)
        self.show_data = show_data
        self.test_loader = test_loader
        self.client_data = client_data
        self.loss, self.acc = [], []
        self.ax = None
        self.ax2 = None

    def getLoss(self):
        return self.loss
    
    def getAcc(self):
        return self.acc

    def main(self):
        t = 0
        while not self.handler.if_stop:
            model_parameters = self.handler.downlink_package[0]

            for client_id in self.handler.sample_clients():
                client_data = self.client_data[client_id]
                self.trainer.local_process(model_parameters, client_data)
            
            for pack in self.trainer.uplink_package:
                self.handler.load(pack)
            
            loss, acc = evaluate(self.handler.model, 
                                nn.CrossEntropyLoss(),
                                self.test_loader)
            if (self.show_data):
                print(f"Round {t}, Loss {round(loss,4)}, Test Acc {round(acc,4)}")

            self.loss.append(loss)
            self.acc.append(acc)
            
            t += 1

    def show(self):
        plt.figure(figsize=(8,4.5))
        self.ax = plt.subplot(1,2,1)
        self.ax.plot(np.arange(len(self.loss)), self.loss)
        self.ax.set_xlabel("Communication Round")
        self.ax.set_ylabel("Loss")
        
        self.ax2 = plt.subplot(1,2,2)
        self.ax2.plot(np.arange(len(self.acc)), self.acc)
        self.ax2.set_xlabel("Communication Round")
        self.ax2.set_ylabel("Accuracy")

In [477]:
# Just like in the lab, we need to implement client-side training and server side aggregation.
# I simply copy and pasted from my code:

def run(
    training_data: np.ndarray,
    test_data: np.ndarray,
    Y_train: np.ndarray,
    Y_test: np.ndarray,
    input_size: int, 
    hidden_layer_sizes: list,
    output_size: int,
    show_data=True,
    epochs=1,
    batch_size=16,
    eta=0.04,
    cuda=False,
    num_rounds=100,
    num_clients=8
):
    # Convert numpy arrays to PyTorch tensors
    training_data_tensor = torch.from_numpy(training_data).float()
    test_data_tensor = torch.from_numpy(test_data).float()
    output_train_tensor = torch.from_numpy(Y_train).long()
    output_test_tensor = torch.from_numpy(Y_test).long()

    model = DeepMLP(input_size, hidden_layer_sizes, output_size)

    # Create an instance of the trainer for serial training on clients
    trainer = SGDSerialClientTrainerTensor(model=model,
                                    num_clients=num_clients,
                                    cuda=cuda
                                    )


    # Once we actually HAVE the data, you can set it up as follows:
    trainer.setup_dataset(training_data_tensor)

    # Setup optimizer with the defined epochs, batch size, and learning rate
    trainer.setup_optim(epochs=epochs,
                        batch_size=batch_size,
                        lr=eta)

    handler = SyncServerHandler(model=model, 
                                global_round=num_rounds,
                                sample_ratio=0.1)

    train_dataset = TensorDataset(training_data_tensor, output_train_tensor)
    test_dataset = TensorDataset(test_data_tensor, output_test_tensor)

    client_data_size = len(training_data_tensor) // num_clients
    client_data = random_split(train_dataset, [client_data_size] * num_clients)

    test_loader = DataLoader( test_dataset, batch_size=batch_size)
    standalone_eval = EvalPipeline(handler=handler, trainer=trainer, test_loader=test_loader, client_data=client_data, show_data=show_data)
    standalone_eval.main()

    return standalone_eval.acc[-1], model


In [478]:
def cross_validation(
    training_data: np.ndarray, 
    input_size: int, 
    output_size: int,
    neuron_ranges=[16, 32, 64], 
    learning_rate_ranges=[0.01, 0.001]
):
    hyperparameter_combinations = itertools.product(neuron_ranges, learning_rate_ranges)
    best_performance = float('inf')
    best_params = None

    for neurons, lr in hyperparameter_combinations:
        kf = KFold(n_splits=5)
        fold_performances = []

        for train_index, val_index in kf.split(training_data):
            X_train_fold, X_valid_fold = training_data[train_index], training_data[val_index]

            # Assuming 'run' function is adapted for training and returns a trained model and its performance
            acc, _ = \
                run (
                    training_data=X_train_fold[:,:-1], 
                    test_data=X_valid_fold[:,:-1],
                    Y_train=X_train_fold[:,-1],
                    Y_test=X_valid_fold[:,-1],
                    input_size=input_size,
                    hidden_layer_sizes=neuron_ranges,
                    output_size=output_size,
                    eta=lr,
                    show_data=False
                )

            # Evaluate the trained model on the validation set
            fold_performances.append(acc)

        average_performance = np.mean(fold_performances)

        if average_performance < best_performance:
            best_performance = average_performance
            best_params = (neurons, lr)

    return best_params


In [479]:
best_params = cross_validation(
                training_data=training_data, 
                input_size=5,
                output_size=4
            )

print(best_params)

(32, 0.01)


In [480]:
optimal_hidden_layer_sizes = [best_params[0], 32]
optimal_lr = best_params[1] 

# Train the model
best_accuracy, model = run(
    training_data=training_data[:,:-1],
    test_data=testing_data[:,:-1],
    Y_train=training_data[:,-1],
    Y_test=testing_data[:,-1],
    input_size=5,
    hidden_layer_sizes=optimal_hidden_layer_sizes,
    output_size=4,
    epochs=1,  # Set your optimal number of epochs
    batch_size=16,
    eta=optimal_lr,
    cuda=False,
    num_rounds=200,
    num_clients=8,
    show_data=False
)

print(best_accuracy)

0.5


In [481]:
# Assuming the model is named 'model' and is part of your 'run' function or returned by it
torch.save(model.state_dict(), 'model.pth')

In [482]:
def init_model(optimal_hidden_layer_sizes, input_size=5, output_size=4):
    # Initialize the model
    model = DeepMLP(input_size, optimal_hidden_layer_sizes, output_size)

    # Load the saved model parameters
    model.load_state_dict(torch.load('model.pth'))

    # Set the model to evaluation mode
    model.eval()

In [None]:
# # Example: Predicting for some new data
# new_data = torch.tensor([your_new_data_here], dtype=torch.float32)
# with torch.no_grad():  # Disable gradient calculation for inference
#     prediction = model(new_data)
