In [3]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, GridSearchCV


In [33]:
df = pd.read_csv('../Data/block_data_ready_cleaned.csv')
df.dropna(inplace=True)

features = ['gas_used', 'base_fee_per_gas_in_eth', 'validator_is_registered_with_relay',
            'is_fb_builder', 'sandwiches_count', 'liquidations_count',
            'sandwiched_swaps_count', 'arbitrages_count', 'hour_of_day',
            'block_fullness', 'total_gas_fees', 'base_fee_change_percentage',
            'network_demand']
target = 'proposer_total_reward_in_eth'

# Prepare the data
X = df[features]
y = df[target]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
#convert back to df
X_scaled_df = pd.DataFrame(X_scaled, columns=features)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled_df, y, test_size=0.2, random_state=42)

# Reset indices to align training and test sets with their target values
X_train.reset_index(drop=True, inplace=True)
y_train.reset_index(drop=True, inplace=True)
X_test.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)

# Sample a smaller subset from the training and testing sets (e.g., 50%)
sample_frac = 0.05
X_train_small = X_train.sample(frac=sample_frac, random_state=42)
y_train_small = y_train[X_train_small.index]
X_test_small = X_test.sample(frac=sample_frac, random_state=42)
y_test_small = y_test[X_test_small.index]

In [5]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'Basic Neural Network RMSE: {rmse}')



Epoch 1/10

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Basic Neural Network RMSE: 0.7258070051367116


In [34]:
# Now try an MLPRegressor
param_grid = {
    'hidden_layer_sizes': [(5,), (7, 7)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam', 'lbfgs'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive'],
}

mlp = MLPRegressor(random_state=42, max_iter=10000)

# Grid search
grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, n_jobs=6, cv=3, scoring='neg_mean_squared_error', verbose=3)
grid_search.fit(X_train_small, y_train_small)

# Best parameters
print("Best parameters found: ", grid_search.best_params_)

# Prediction with the best found parameters
y_pred = grid_search.predict(X_test_small)

# Evaluate the model
rmse = sqrt(mean_squared_error(y_test_small, y_pred))
print(f'MLP RMSE on Test Set: {rmse}')

Fitting 3 folds for each of 72 candidates, totalling 216 fits
Best parameters found:  {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (5,), 'learning_rate': 'constant', 'solver': 'adam'}
MLP RMSE on Test Set: 0.33876124038017336
