In [2]:
from model import *

Possible features to implement:

- Transaction History
    - Amt, Timestamps -> Frequency
- Current Balance
- User interaction
- Geolocation Data
- Time patterns
- How often users redeem rewards
- Wallet features used
- Financial Goals

-Some attributes are categorical like user interaction. We might have to do some sort of engagement leveling for that.
- Wallet features can indicate what kind of resources a user might desire:
    - If they like to check their balance more than making transactions, it might be a sign that a user is considering making a purchase but is nervous about consequences regarding it. This could be "scenario 1" and can be encoded as a one hot vector like [1, 0, 0, ..., 0]

### NOTE

- Most features are tentative and may not be implemented. It is unclear as to what kind of data we will have access to at the current moment and whether or not hte collection of this data is feasible.

# Feature Programming

Data that I think should be collected:
- X[:, 0] = Amt (transaction amount/price)
- X[:, 1] = Timestamps (block timestamp)
- X[:, 2] = User's current balance.
- X[:, 3] = Age
- X[:, 4] = Number of total transactions made
- X[:, 5] = Knowledge index based off of tests and such

Output:
- If 0, then they're doing ok
- If 1, then they may be a little reckless
- If 2, then they may be very reckless
- If 3, 100% reckless

In [3]:
# __dir__ = os.getcwd()
DATAPATH = './ai/data'

# Prototype read data function.
df = pd.read_csv(DATAPATH + "/user-data.csv", delimiter=",")
df_out = pd.read_csv(DATAPATH + "/not-normalized.csv", delimiter=",")

# Data matrix
D = df.to_numpy()
Y = df_out.to_numpy()[:, -1]

# Presumably, (# of points, 5)
print(D.shape)
print(Y.shape)

training_data, testing_data = np.column_stack((D[:80, :], Y[:80])), np.column_stack((D[80:, :], Y[80:]))

training_data = training_data.astype(np.float64)
testing_data = testing_data.astype(np.float64)  # or np.int32 depending on your requirement

FileNotFoundError: [Errno 2] No such file or directory: './ai/data/user-data.csv'

In [473]:
# # This is code heavily based on Zaki's implementation of a Simple Neural Network.

# def relu(z):
#     """Apply the ReLU (Rectified Linear Unit) function."""
#     return np.maximum(0, z)

# def relu_derivative(z):
#     """Compute the derivative of the ReLU function."""
#     return np.where(z > 0, 1, 0)

# def feed_forward(x, network):
#     """Perform a feedforward pass through the neural network."""
#     activations = [x]
#     input_to_layer = x

#     for layer in network:
#         z = layer['b'] + np.dot(layer['W'].T, input_to_layer)
#         input_to_layer = relu(z)
#         activations.append(input_to_layer)

#     activations[-1] = softmax(activations[-1])
#     return activations

# def initialize_network(input_size, hidden_layer_sizes, output_size, scale):
#     """Initialize a deep multilayer perceptron with random weights and biases."""
#     layer_sizes = [input_size] + hidden_layer_sizes + [output_size]
#     network = []

#     for i in range(len(layer_sizes) - 1):
#         layer = {
#             'b': np.random.rand(layer_sizes[i + 1]) * scale,
#             'W': np.random.rand(layer_sizes[i], layer_sizes[i + 1]) * scale
#         }
#         network.append(layer)

#     return network

# def deep_mlp_training(data, output_size, max_iter, learning_rate, hidden_layer_sizes, scale):
#     """Train a deep multilayer perceptron on the given dataset."""
#     num_samples, num_features = data.shape
#     input_size = num_features - 1  # Last column is assumed to be the label
#     network = initialize_network(input_size, hidden_layer_sizes, output_size, scale)

#     for j in range(max_iter):
#         indices = np.arange(num_samples)
#         np.random.shuffle(indices)

#         for i in indices:
#             x_i = data[i, :-1]
#             y_i = np.zeros(output_size)
#             y_i[int(data[i, -1])] = 1

#             # Forward pass
#             activations = feed_forward(x_i, network)

#             # Backpropagation
#             deltas = [activations[-1] - y_i]
#             for l in range(len(network) - 1, 0, -1):
#                 delta = relu_derivative(np.dot(network[l]['W'], deltas[0]))
#                 deltas.insert(0, delta)

#             # Gradient descent parameter update
#             for l, layer in enumerate(network):
#                 layer['W'] -= learning_rate * np.outer(activations[l], deltas[l])
#                 layer['b'] -= learning_rate * deltas[l]

#     return network


In [1]:
best_params = cross_validation(
                training_data=training_data, 
                input_size=5,
                output_size=4
            )

print(best_params)

NameError: name 'cross_validation' is not defined

In [480]:
optimal_hidden_layer_sizes = [best_params[0], 32]
optimal_lr = best_params[1] 

# Train the model
best_accuracy, model = run(
    training_data=training_data[:,:-1],
    test_data=testing_data[:,:-1],
    Y_train=training_data[:,-1],
    Y_test=testing_data[:,-1],
    input_size=5,
    hidden_layer_sizes=optimal_hidden_layer_sizes,
    output_size=4,
    epochs=1,  # Set your optimal number of epochs
    batch_size=16,
    eta=optimal_lr,
    cuda=False,
    num_rounds=200,
    num_clients=8,
    show_data=False
)

print(best_accuracy)

0.5


In [482]:
def init_model(optimal_hidden_layer_sizes, input_size=5, output_size=4):
    # Initialize the model
    model = DeepMLP(input_size, optimal_hidden_layer_sizes, output_size)

    # Load the saved model parameters
    model.load_state_dict(torch.load('model.pth'))

    # Set the model to evaluation mode
    model.eval()

    return model