In [10]:
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import joblib
import json

from ezkl import export

## Read the Data

In [11]:
data = pd.read_csv('../sample-data/data.csv.gz', compression='gzip')
print(data.shape)
data.head(2)

(12696, 71)


Unnamed: 0,target,apy-Var1Day,tvlUsd-Var1Day,apy-chain-mean-Var1Day,tvlUsd-chain-mean-Var1Day,apy-protocol-mean-Var1Day,tvlUsd-protocol-mean-Var1Day,apy-token-mean-Var1Day,tvlUsd-token-mean-Var1Day,apy-chain-max-Var1Day,...,apy-protocol-mean-Var30Day,tvlUsd-protocol-mean-Var30Day,apy-token-mean-Var30Day,tvlUsd-token-mean-Var30Day,apy-chain-max-Var30Day,apy-protocol-max-Var30Day,apy-token-max-Var30Day,tvlUsd-chain-sum-Var30Day,tvlUsd-protocol-sum-Var30Day,tvlUsd-token-sum-Var30Day
0,0,0.007886,-0.002881,0.384474,0.012158,0.0087,0.002458,0.074924,0.012354,0.619913,...,0.528504,0.212078,1.080723,0.087964,2.729201,0.353595,1.641604,0.040766,0.212078,0.087964
1,0,0.0,-0.002264,0.202158,0.034465,-0.057477,0.015966,0.084087,0.015578,0.021917,...,0.511465,0.018395,4.537764,-0.006988,0.321799,0.791096,11.122229,0.097451,0.018395,-0.006988


## Split, scale and create Tensors

In [12]:
X = data.iloc[:, 1:]
y = data.iloc[:, 0]

scaler = MinMaxScaler()
X = scaler.fit_transform(X)
joblib.dump(scaler, 'X_scaler.joblib')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float)
X_test_tensor = torch.tensor(X_test, dtype=torch.float)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float)

## Define Model

In [13]:
class BinaryClassifier(nn.Module):
    def __init__(self, hidden_size, input_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size*2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size*2, hidden_size)
        self.relu3 = nn.ReLU()
        self.output = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.output(x)
        x = self.sigmoid(x)
        return x

## Export model before training

This works when we run the proof, but it has aleatory weights, its not usefull

Just take into account its exporting a random set of values in the shape of the input

In [19]:
hidden_size = 512
learning_rate = 0.001
input_size = X_train_tensor.shape[1]

model = BinaryClassifier(hidden_size, input_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

circuit = BinaryClassifier(hidden_size, input_size)
export(circuit, input_shape=[input_size], onnx_filename="../python-output/network-before-train.onnx", input_filename="../python-output/dummy-input-before-train.json")

verbose: False, log level: Level.ERROR



______________________

## Train the same model in 1000 epochs to update weights

In [15]:
epochs = 1000

input_size = X_train_tensor.shape[1]

# Initialize the model
model = BinaryClassifier(hidden_size, input_size)

# Define the loss function and the optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    model.train()  # Set the model to training mode

    # Forward pass
    outputs = model(X_train_tensor).squeeze()
    loss = criterion(outputs, y_train_tensor)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

## Export the trained model

This doesn't works when we run the proof, even with random values generated in the json file

Also exporting a random set of values in the shape of the input

In [20]:
export(model, input_shape=[input_size], onnx_filename="../python-output/network-after-train.onnx", input_filename="../python-output/dummy-input-after-train.json")

verbose: False, log level: Level.ERROR



____________

## Modify the export function so it can work for an input_array

In [17]:
def exportar(torch_model, input_array, input_filename):
    x = torch.tensor(input_array)
    new_shape = tuple([1] + list(x.shape))
    x = torch.reshape(x, new_shape)
    x = x.type(torch.float32)

    torch_out = torch_model(x)

    data_array = ((x).detach().numpy()).reshape([-1]).tolist()

    data = dict(input_shapes=[len(input_array)],
                input_data=[data_array],
                output_data=[((o).detach().numpy()).reshape([-1]).tolist() for o in torch_out])
    # Serialize data into file:
    json.dump(data, open(input_filename, 'w'))

## Export the input.json with real test values to proof

It doesn't work either, but it may be a problem with the export of the model and not with the input.json

In [21]:
exportar(model, X_test[0], '../python-output/real-input-after-train.json')