In [2]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.multioutput import RegressorChain
import xgboost as xgb
import joblib # Library for saving your model

# Load the dataset
df = pd.read_csv('mp_agriculture_stagewise_10000rows_district_season.csv')
print("✅ Dataset loaded successfully!")

✅ Dataset loaded successfully!


In [3]:
# Features are the categorical descriptors
features = ['crop', 'seed_type', 'soil', 'district', 'season']

# Find all columns for our targets
environmental_targets = [col for col in df.columns if col.endswith(('_tmin', '_tmax', '_rh', '_rain', '_wind', '_solar_rad'))]
stage_duration_targets = [col for col in df.columns if col.endswith('_stage_dur')]

# Combine all target columns into one list
targets = ['total_duration_estimate'] + environmental_targets + stage_duration_targets

# Create our feature matrix (X) and target matrix (Y)
X = df[features]
Y = df[targets]

print(f"⚙️ Number of features: {X.shape[1]}")
print(f"🎯 Total number of targets to predict: {Y.shape[1]}")

⚙️ Number of features: 5
🎯 Total number of targets to predict: 78


In [4]:
# Preprocessor for our categorical input features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), features)
    ]
)

# Define the base XGBoost model
base_model = xgb.XGBRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    random_state=42,
    n_jobs=-1
)

# Wrap the base model in Regressor Chain to handle all targets
final_model = RegressorChain(base_estimator=base_model)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', final_model)
])

In [5]:
print("\n🚀 Training the final RegressorChain model on all data...")
pipeline.fit(X, Y)
print("✅ Final model training complete!")


🚀 Training the final RegressorChain model on all data...
✅ Final model training complete!


In [6]:
# --- 5) Save the Trained Model for Future Use ---
joblib.dump(pipeline, 'final_crop_model.joblib')
print("\n💾 Model has been saved to 'final_crop_model.joblib' and is ready for future predictions.")


💾 Model has been saved to 'final_crop_model.joblib' and is ready for future predictions.
