In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import backend as K

In [17]:
# Define RMSE as a custom metric
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

# Load the data
file_path = 'cleaned.csv'  # Replace with your actual path
data = pd.read_csv(file_path, low_memory=False)

# Preprocess the data
# Drop unnecessary columns if needed (e.g., 'epoch', timestamps if not useful)
X = data.drop(columns=['closePrice', 'epoch', 'startTimestamp', 'lockTimestamp', 'closeTimestamp', '2round_price_increased'])
y = data['closePrice']

# Convert scientific notation strings to floats
X = X.apply(pd.to_numeric, errors='coerce')
y = pd.to_numeric(y, errors='coerce')

# Fill or drop missing values if present
X.dropna(inplace=True)  # Fill with 0 or use X.dropna(inplace=True)
y.dropna(inplace=True)

In [18]:
# Loop through polynomial degrees
for degree in range(2, 7):
    print(f"\nTraining model for polynomial degree: {degree}")

    # Generate polynomial features
    poly = PolynomialFeatures(degree)
    X_poly = poly.fit_transform(X)

    # Scale the polynomial features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_poly)

    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, shuffle=False)

    # Build the neural network model
    model = Sequential([
        Dense(128, input_dim=X_train.shape[1], activation='relu'),
        Dense(64, activation='relu'),
        Dense(1, activation='linear')  # Output layer for regression
    ])

    # Compile the model with RMSE as a metric
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error', rmse])

    # Train the model
    history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=256, verbose=0)

    # Evaluate the model
    loss, mae, rmse_value = model.evaluate(X_test, y_test, verbose=0)
    print(f"Polynomial Degree: {degree}")
    print(f"Test Mean Absolute Error (MAE): {mae/1e8}")
    print(f"Test RMSE: {rmse_value/1e8}")


Training model for polynomial degree: 2
Polynomial Degree: 2
Test Mean Absolute Error (MAE): 581.4616064
Test RMSE: 581.56068864

Training model for polynomial degree: 3
Polynomial Degree: 3
Test Mean Absolute Error (MAE): 559.45428992
Test RMSE: 559.58904832

Training model for polynomial degree: 4
Polynomial Degree: 4
Test Mean Absolute Error (MAE): 548.45513728
Test RMSE: 548.62180352

Training model for polynomial degree: 5
Polynomial Degree: 5
Test Mean Absolute Error (MAE): 517.28896
Test RMSE: 517.54397696

Training model for polynomial degree: 6
Polynomial Degree: 6
Test Mean Absolute Error (MAE): 516.27491328
Test RMSE: 516.55405568
