In [3]:
import pandas as pd
import joblib
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
# --- Configuration ---
CLEANED_DATA_PATH = 'cleaned_paddy_data.csv'
TARGET_COLUMN = 'paddy_yield(in_kg)_scaled'

In [5]:
df_clean = pd.read_csv(CLEANED_DATA_PATH)
df_clean.head()


Unnamed: 0,paddy_yield(in_kg)_scaled,max_temp_d1_d30_scaled,min_temp_d31_d60_scaled,max_temp_d31_d60_scaled,max_temp_d61_d90_scaled,min_temp_d91_d120_scaled,max_temp_d91_d120_scaled,inst_wind_speed_d1_d30(in_knots)_scaled,inst_wind_speed_d31_d60(in_knots)_scaled,inst_wind_speed_d61_d90(in_knots)_scaled,...,agriblock_Sankarapuram,variety_delux ponni,wind_direction_d1_d30_SSE,wind_direction_d1_d30_W,wind_direction_d31_d60_W,wind_direction_d31_d60_WNW,wind_direction_d61_d90_SE,wind_direction_d61_d90_SW,wind_direction_d91_d120_NW,wind_direction_d91_d120_S
0,0.709545,0.5,-0.75,0.0,-1.0,0.0,0.0,-0.666667,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.735764,0.5,0.5,1.25,-0.25,0.0,-1.666667,0.333333,-1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,0.796395,1.0,0.25,0.0,0.25,2.0,0.0,-0.666667,0.333333,-2.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.708726,0.0,-0.25,0.5,-0.75,-0.5,-0.333333,0.0,-0.666667,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0
4,0.64236,-0.5,0.0,-0.5,0.5,0.5,1.333333,0.333333,0.333333,1.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0


In [6]:
X = df_clean.drop(columns=[TARGET_COLUMN])
y = df_clean[TARGET_COLUMN]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,shuffle=True, random_state=42)


In [7]:

# This is a good starting architecture:
# - 3 hidden layers: 100, 50, and 25 neurons
# - 'adam' optimizer is a strong, modern default
# - 'relu' is the standard activation function
# - 'max_iter=1000' gives it plenty of time to converge
mlp = MLPRegressor(
    hidden_layer_sizes=(100, 50, 25),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    early_stopping=True, # Helps prevent overfitting
    verbose=True
)

mlp.fit(X_train, y_train)


Iteration 1, loss = 0.21294421
Validation score: -0.008927
Iteration 2, loss = 0.19959458
Validation score: -0.007128
Iteration 3, loss = 0.19829889
Validation score: 0.007069
Iteration 4, loss = 0.19820874
Validation score: 0.011731
Iteration 5, loss = 0.19797565
Validation score: 0.006261
Iteration 6, loss = 0.20071794
Validation score: -0.000512
Iteration 7, loss = 0.19888163
Validation score: 0.019087
Iteration 8, loss = 0.19970507
Validation score: 0.013131
Iteration 9, loss = 0.20064885
Validation score: -0.002737
Iteration 10, loss = 0.19841292
Validation score: 0.016223
Iteration 11, loss = 0.19834773
Validation score: 0.021038
Iteration 12, loss = 0.19879544
Validation score: 0.009082
Iteration 13, loss = 0.19805613
Validation score: 0.005489
Iteration 14, loss = 0.19773539
Validation score: 0.007510
Iteration 15, loss = 0.19815286
Validation score: -0.000145
Iteration 16, loss = 0.19937217
Validation score: -0.003866
Iteration 17, loss = 0.19844610
Validation score: 0.005173


0,1,2
,loss,'squared_error'
,hidden_layer_sizes,"(100, ...)"
,activation,'relu'
,solver,'adam'
,alpha,0.0001
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,0.001
,power_t,0.5
,max_iter,1000


In [8]:
print("Evaluating model...")
y_pred_mlp = mlp.predict(X_test)

# Calculate RMSE
mlp_rmse = np.sqrt(mean_squared_error(y_test, y_pred_mlp))

print("\n--- Model Comparison ---")
print(f"Your Best XGBoost RMSE: 0.62144")
print(f"New MLP Regressor RMSE: {mlp_rmse:.5f}")

if mlp_rmse < 0.62144:
    print("Result: The MLP Regressor is an improvement!")
else:
    print("Result: The XGBoost model remains the champion.")

Evaluating model...

--- Model Comparison ---
Your Best XGBoost RMSE: 0.62144
New MLP Regressor RMSE: 0.62302
Result: The XGBoost model remains the champion.
