In [None]:
# 📦 Install external libraries (only needed in Colab)
!pip install lightgbm catboost --quiet


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from sklearn.ensemble import ExtraTreesRegressor
import lightgbm as lgb
from catboost import CatBoostRegressor


In [None]:
from google.colab import files
uploaded = files.upload()

# Replace with the actual file name after upload
df = pd.read_excel("cleaned_rice_production_for_colab.xlsx")


In [None]:
df['Season_encoded'] = LabelEncoder().fit_transform(df['Season'])
df['District_encoded'] = LabelEncoder().fit_transform(df['District'])

df_clean = df.dropna(subset=['Year', 'Season_encoded', 'District_encoded', 'Sown(hect)', 'Yield(kg per hect)'])

features = ['Year', 'Season_encoded', 'District_encoded', 'Sown(hect)']
X = df_clean[features]
y = df_clean['Yield(kg per hect)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
results = {}

# 1. ExtraTrees
model_et = ExtraTreesRegressor(n_estimators=100, random_state=42)
model_et.fit(X_train, y_train)
y_pred_et = model_et.predict(X_test)
results['ExtraTrees'] = {
    'R2': r2_score(y_test, y_pred_et),
    'MAE': mean_absolute_error(y_test, y_pred_et),
    'MSE': mean_squared_error(y_test, y_pred_et)
}

# 2. LightGBM
model_lgb = lgb.LGBMRegressor(n_estimators=100, random_state=42)
model_lgb.fit(X_train, y_train)
y_pred_lgb = model_lgb.predict(X_test)
results['LightGBM'] = {
    'R2': r2_score(y_test, y_pred_lgb),
    'MAE': mean_absolute_error(y_test, y_pred_lgb),
    'MSE': mean_squared_error(y_test, y_pred_lgb)
}

# 3. CatBoost
model_cb = CatBoostRegressor(iterations=100, verbose=0, random_state=42)
model_cb.fit(X_train, y_train)
y_pred_cb = model_cb.predict(X_test)
results['CatBoost'] = {
    'R2': r2_score(y_test, y_pred_cb),
    'MAE': mean_absolute_error(y_test, y_pred_cb),
    'MSE': mean_squared_error(y_test, y_pred_cb)
}

results
