# Crop Recommendation ML Models

This notebook trains and evaluates 5 regression models on the cleaned crop dataset:
- Linear Regression
- MLP Regressor
- Gradient Boosting
- Random Forest
- Decision Tree

Each model will report training time, MSE, and RMSE.

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import time
import numpy as np

In [19]:
# Load data
df = pd.read_csv('./datasets/h5-i2_2016-2021_daily_harvest_cleaned.csv')

cat_cols = ['Crop', 'Treatment', 'Site', 'FIeld']
df = pd.get_dummies(df, columns=cat_cols, drop_first=True)

X = df.drop('Grain_yield_kg_ha', axis=1)
y = df['Grain_yield_kg_ha']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [20]:
models = {
    'LinearRegression': LinearRegression(),
    'MLPRegressor': MLPRegressor(max_iter=500, random_state=42),
    'GradientBoosting': GradientBoostingRegressor(random_state=42),
    'RandomForest': RandomForestRegressor(random_state=42),
    'DecisionTree': DecisionTreeRegressor(random_state=42)
}

results = []
for name, model in models.items():
    start = time.time()
    y_train = np.log1p(y_train)
    y_test  = np.log1p(y_test)
    model.fit(X_train, y_train)
    train_time = time.time() - start
    preds = model.predict(X_test)
    mse = mean_squared_error(y_test, preds)
    rmse = mse ** 0.5
    results.append((name, train_time, mse, rmse))

results_df = pd.DataFrame(
    results,
    columns=['Model', 'TrainTime', 'MSE', 'RMSE']
)
results_df

Unnamed: 0,Model,TrainTime,MSE,RMSE
0,LinearRegression,0.007155,0.07802,0.27932
1,MLPRegressor,0.170077,2117.863255,46.020248
2,GradientBoosting,0.077992,7.2e-05,0.008471
3,RandomForest,0.117144,1.8e-05,0.004193
4,DecisionTree,0.0,6e-06,0.002346
