In [1]:
import pandas as pd
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from xgboost import XGBClassifier

RANDOM_STATE = 54895

In [7]:

# Load data
df = pd.read_csv("../data/Crop_Yield_Fertilizer.csv")


In [8]:
df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label,yield,fertilizer
0,90.0,42.0,43.0,20.879744,82.002744,6.502985,202.935536,rice,71.199428,DAP
1,85.0,58.0,41.0,21.770462,80.319644,7.038096,226.655537,rice,81.620199,DAP
2,60.0,55.0,44.0,23.004459,82.320763,7.840207,263.964248,rice,80.47313,Gypsum
3,74.0,35.0,40.0,26.491096,80.158363,6.980401,242.864034,rice,75.178196,DAP
4,78.0,42.0,42.0,20.130175,81.604873,7.628473,262.71734,rice,75.485563,Gypsum


In [9]:
FEATURES = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
TARGET = ['label']

X = df[FEATURES].copy()
y = df[TARGET].copy()

In [10]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)


  y = column_or_1d(y, warn=True)


In [11]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), FEATURES)
    ]
)

In [13]:
model = XGBClassifier(
    n_estimators=600,
    max_depth=5,
    learning_rate=0.08,
    subsample=0.9,
    colsample_bytree=0.9,
    objective='multi:softprob',
    num_class=len(label_encoder.classes_),
    eval_metric='mlogloss',
    random_state=RANDOM_STATE
    # n_jobs=-1
)


In [14]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', model)
])


In [15]:
pipeline.fit(X, y_encoded)


0,1,2
,steps,"[('preprocessor', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,objective,'multi:softprob'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.9
,device,
,early_stopping_rounds,
,enable_categorical,False


In [17]:
joblib.dump(pipeline, "../models/crop_model.pkl")
joblib.dump(label_encoder, "../models/crop_label_encoder.pkl")

print("Crop model trained and saved")


Crop model trained and saved
