In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_log_error
from sparseml.pytorch.optim import ScheduledModifierManager, ScheduledUpdateModifier
from sparseml.pytorch.optim.optimizer import ScheduledOptimizer

# Load the data
df = pd.read_csv('./data/lot42_train.csv')

# Drop 'price' and 'price_boxcox' columns
df = df.drop(['price', 'price_boxcox'], axis=1)

# Define the feature set and the target
X = df[['brand', 'return_policy', 'shipping', 'trending', 'name', 'category', 'seller_item_sold']]
y = df['price_log']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize the MLP regressor
mlp = MLPRegressor(hidden_layer_sizes=(50,), max_iter=100, alpha=1e-4,
                   solver='sgd', verbose=10, random_state=42,
                   learning_rate_init=.1)

# Create a SparseML modifier manager with a sparsity update schedule
manager = ScheduledModifierManager([
    ScheduledUpdateModifier(start_epoch=0, end_epoch=10, update_frequency=1, log_types=["sparsity"]),
])

# Create a SparseML optimizer that will handle applying the sparse modifications
optimizer = ScheduledOptimizer(
    mlp.optimizer, mlp.parameters(), manager, steps_per_epoch=len(X_train),
)

# Train the model
mlp.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = mlp.predict(X_test)

# Ensure all predicted values are greater than 0 (as RMSLE is undefined for negative values)
y_pred = np.maximum(y_pred, 0)

# Calculate RMSLE
rmsle = np.sqrt(mean_squared_log_error(y_test, y_pred))

print(f'RMSLE: {rmsle}')


: 

: 