In [23]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [57]:
# STEP 1: Load your merged file
df = pd.read_csv("final_crop_data.csv")

In [61]:
# STEP 2: Keep only important columns
prod_cols = [c for c in df.columns if "Production" in c]
yield_cols = [c for c in df.columns if "Yield" in c]
id_cols = ['Crop', 'Cost_Per_Quintal']
df_prod = df.melt(id_vars=id_cols, value_vars=prod_cols, var_name='Year', value_name='Production')
df_yield = df.melt(id_vars=['Crop'], value_vars=yield_cols, var_name='Year', value_name='Yield')

In [63]:
# STEP 3: Clean Year values
df_prod['Year'] = df_prod['Year'].str.extract(r'(\d{4})')
df_yield['Year'] = df_yield['Year'].str.extract(r'(\d{4})')

In [65]:
# STEP 4: Merge yield into production
df_model = pd.merge(df_prod, df_yield, on=['Crop', 'Year'], how='left')

In [67]:
# STEP 5: Drop missing essential values
df_model = df_model.dropna(subset=['Crop', 'Year', 'Production'])

In [69]:
# STEP 6: Fill optional values with mean or placeholder
df_model['Cost_Per_Quintal'] = df_model['Cost_Per_Quintal'].fillna(df_model['Cost_Per_Quintal'].mean())
df_model['Yield'] = df_model['Yield'].fillna(df_model['Yield'].mean())

In [71]:
# STEP 7: Final clean types
df_model['Year'] = df_model['Year'].astype(int)

In [73]:
# STEP 8: Encode crop
df_encoded = pd.get_dummies(df_model, columns=['Crop'])

In [75]:
# STEP 9: Train/test split
X = df_encoded.drop(columns=['Production'])
y = df_encoded['Production']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [79]:
# STEP 10: Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [81]:
# STEP 11: Predict and evaluate
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f"✅ Model trained successfully!")
print(f"📊 RMSE: {rmse:.2f}")
print(f"📈 R² Score: {r2:.2f}")

✅ Model trained successfully!
📊 RMSE: 25.01
📈 R² Score: 0.95


