In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

from xgboost import XGBClassifier
import joblib


In [6]:
df = pd.read_csv("data_core.csv")
print(df.shape)
df.head()


(8000, 9)


Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26.0,52.0,38.0,Sandy,Maize,37,0,0,Urea
1,29.0,52.0,45.0,Loamy,Sugarcane,12,0,36,DAP
2,34.0,65.0,62.0,Black,Cotton,7,9,30,14-35-14
3,32.0,62.0,34.0,Red,Tobacco,22,0,20,28-28
4,28.0,54.0,46.0,Clayey,Paddy,35,0,0,Urea


In [9]:
# Average NPK required per crop
crop_npk_table = (
    df.groupby("Crop Type")[["Nitrogen", "Phosphorous", "Potassium"]]
    .mean()
    .round(0)
)

# Convert encoded crop index → crop name
crop_npk_table.index = crop_encoder.inverse_transform(crop_npk_table.index)

crop_npk_table


Unnamed: 0,Nitrogen,Phosphorous,Potassium
Barley,18.0,19.0,4.0
Cotton,18.0,19.0,4.0
Ground Nuts,18.0,18.0,4.0
Maize,19.0,18.0,4.0
Millets,19.0,18.0,3.0
Oil seeds,18.0,18.0,4.0
Paddy,19.0,19.0,4.0
Pulses,18.0,19.0,4.0
Sugarcane,19.0,18.0,4.0
Tobacco,19.0,19.0,4.0


In [8]:
soil_encoder = LabelEncoder()
crop_encoder = LabelEncoder()

df["Soil Type"] = soil_encoder.fit_transform(df["Soil Type"])
df["Crop Type"] = crop_encoder.fit_transform(df["Crop Type"])


In [10]:
X = df[
    [
        "Soil Type",
        "Nitrogen",
        "Phosphorous",
        "Potassium",
        "Temparature",
        "Humidity",
        "Moisture"
    ]
]

y = df["Crop Type"]


In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [12]:
model = XGBClassifier(
    objective="multi:softprob",   # IMPORTANT
    num_class=len(np.unique(y)),
    n_estimators=600,
    max_depth=7,
    learning_rate=0.05,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42,
    eval_metric="mlogloss"
)

model.fit(X_train, y_train)


0,1,2
,objective,'multi:softprob'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.9
,device,
,early_stopping_rounds,
,enable_categorical,False


In [14]:
y_proba = model.predict_proba(X_test)

TOP_K = 10
correct = 0

for i in range(len(y_test)):
    top10_idx = np.argsort(y_proba[i])[-TOP_K:][::-1]
    true_label = y_test.iloc[i]

    if true_label in top10_idx:
        correct += 1

top10_accuracy = correct / len(y_test)

print("Top-10 Crop Recommendation Accuracy:", round(top10_accuracy, 4))



Top-10 Crop Recommendation Accuracy: 0.9087


In [9]:
joblib.dump(model, "crop_xgboost_model.pkl")
joblib.dump(soil_encoder, "soil_encoder.pkl")
joblib.dump(crop_encoder, "crop_encoder.pkl")


['crop_encoder.pkl']

In [15]:
model = joblib.load("crop_xgboost_model.pkl")
soil_encoder = joblib.load("soil_encoder.pkl")
crop_encoder = joblib.load("crop_encoder.pkl")

input_data = pd.DataFrame([{
    "Soil Type": soil_encoder.transform(["Loamy"])[0],
    "Nitrogen": 90,
    "Phosphorous": 40,
    "Potassium": 40,
    "Temparature": 30,
    "Humidity": 60,
    "Moisture": 45
}])


In [16]:
proba = model.predict_proba(input_data)[0]

top10_idx = np.argsort(proba)[-10:][::-1]
top10_crops = crop_encoder.inverse_transform(top10_idx)

print("Top-10 Recommended Crops:\n")
for i, crop in enumerate(top10_crops, start=1):
    print(f"{i}. {crop}")


Top-10 Recommended Crops:

1. Pulses
2. Paddy
3. Oil seeds
4. Sugarcane
5. Barley
6. Tobacco
7. Ground Nuts
8. Cotton
9. Millets
10. Maize


In [17]:
joblib.dump(crop_npk_table, "crop_npk_requirements.pkl")


['crop_npk_requirements.pkl']

In [18]:
import joblib

crop_npk = joblib.load("crop_npk_requirements.pkl")

def recommend_fertilizer(crop, soil_N, soil_P, soil_K):
    if crop not in crop_npk.index:
        return "General NPK"

    req = crop_npk.loc[crop]
    fert = []

    if soil_N < req["Nitrogen"]:
        fert.append("Urea (Nitrogen)")
    if soil_P < req["Phosphorous"]:
        fert.append("DAP (Phosphorous)")
    if soil_K < req["Potassium"]:
        fert.append("MOP (Potassium)")

    if not fert:
        return "No fertilizer required"

    return " + ".join(fert)


In [35]:
from fr import recommend_fertilizer

soil_N = 9
soil_P = 10
soil_K = 30

for crop in top10_crops:
    fert = recommend_fertilizer(crop, soil_N, soil_P, soil_K)
    print(f"{crop} → {', '.join(fert)}")


Pulses → Urea, DAP
Paddy → Urea, DAP
Oil seeds → Urea, DAP
Sugarcane → Urea, DAP
Barley → Urea, DAP
Tobacco → Urea, DAP
Ground Nuts → Urea, DAP
Cotton → Urea, DAP
Millets → Urea, DAP
Maize → Urea, DAP
