In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb

data = pd.read_csv('/content/drive/MyDrive/crop_production.csv')
data = data.dropna()
data = data.drop(columns=['Crop_Year'])
label_encoder = LabelEncoder()
data['State_Name'] = label_encoder.fit_transform(data['State_Name'])
data['District_Name'] = label_encoder.fit_transform(data['District_Name'])
data['Crop'] = label_encoder.fit_transform(data['Crop'])
data['Season'] = label_encoder.fit_transform(data['Season'])

X = data.drop(columns=['Production'])
y = data['Production']

xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)

random_forest_model = RandomForestRegressor(n_estimators=100, random_state=42)
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', colsample_bytree=0.3, learning_rate=0.1,
                              max_depth=5, alpha=10, n_estimators=100)
gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

ensemble_model = VotingRegressor([('random_forest', random_forest_model),('gradient',gb_model)])

ensemble_model.fit(xtrain, ytrain)

y_pred_ensemble = ensemble_model.predict(xtest)

r2_ensemble = r2_score(ytest, y_pred_ensemble)
print("Ensemble R² Score:", r2_ensemble)

Ensemble R² Score: 0.8439915165439655


In [None]:
import pickle

In [None]:
filename = 'model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(ensemble_model, file)
