In [11]:
pip install pandas numpy scikit-learn xgboost joblib


Collecting xgboost
  Downloading xgboost-3.1.3-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-3.1.3-py3-none-win_amd64.whl (72.0 MB)
   ---------------------------------------- 0.0/72.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/72.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/72.0 MB ? eta -:--:--
   ---------------------------------------- 0.8/72.0 MB 3.0 MB/s eta 0:00:24
    --------------------------------------- 1.3/72.0 MB 3.4 MB/s eta 0:00:22
   - -------------------------------------- 2.1/72.0 MB 3.5 MB/s eta 0:00:21
   - -------------------------------------- 3.4/72.0 MB 4.0 MB/s eta 0:00:18
   -- ------------------------------------- 4.5/72.0 MB 4.3 MB/s eta 0:00:16
   --- ------------------------------------ 5.5/72.0 MB 4.7 MB/s eta 0:00:15
   --- ------------------------------------ 7.1/72.0 MB 4.8 MB/s eta 0:00:14
   ---- ----------------------------------- 8.7/72.0 MB 5.1 MB/s eta 0:00:13
   ----- ------------------

In [18]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

from xgboost import XGBClassifier
import joblib


In [19]:
# 1. Load the data
df = pd.read_csv("C:/Users/91798/Desktop/crop_rec/Crop_recommendation.csv")

In [20]:
print(df.head())
print(df.info())

    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice
<class 'pandas.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   str    
dtypes: float64(4), int64(3), str(1)
memory usage: 137.6 KB
None


In [21]:
X = df.drop(columns=["label"])
y = df["label"]


In [22]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)


In [23]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y_encoded,
    test_size=0.2,
    random_state=42,
    stratify=y_encoded
)


In [24]:
xgb_model = XGBClassifier(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.9,
    colsample_bytree=0.9,
    objective="multi:softprob",
    eval_metric="mlogloss",
    random_state=42
)

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", xgb_model)
])


In [25]:
pipeline.fit(X_train, y_train)
print("✅ Model training completed")


✅ Model training completed


In [26]:
y_pred = pipeline.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.990909090909091
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        20
           2       0.95      1.00      0.98        20
           3       1.00      1.00      1.00        20
           4       1.00      1.00      1.00        20
           5       1.00      1.00      1.00        20
           6       1.00      1.00      1.00        20
           7       1.00      1.00      1.00        20
           8       0.95      1.00      0.98        20
           9       1.00      1.00      1.00        20
          10       1.00      0.90      0.95        20
          11       1.00      1.00      1.00        20
          12       1.00      1.00      1.00        20
          13       0.95      1.00      0.98        20
          14       0.95      1.00      0.98        20
          15       1.00      1.00      1.00        20
          16       1.00      1.00      1.00        20

In [27]:
def recommend_crops(
    N, P, K, temperature, humidity, ph, rainfall, top_k=5
):
    input_data = pd.DataFrame([{
        "N": N,
        "P": P,
        "K": K,
        "temperature": temperature,
        "humidity": humidity,
        "ph": ph,
        "rainfall": rainfall
    }])

    probabilities = pipeline.predict_proba(input_data)[0]

    crop_indices = np.arange(len(probabilities))
    crop_names = label_encoder.inverse_transform(crop_indices)

    results = pd.DataFrame({
        "Crop": crop_names,
        "Climate_Suitability_Score": probabilities
    })

    results = results.sort_values(
        by="Climate_Suitability_Score",
        ascending=False
    ).head(top_k)

    return results.reset_index(drop=True)


In [28]:
recommend_crops(
    N=90,
    P=42,
    K=43,
    temperature=25.6,
    humidity=80.3,
    ph=6.8,
    rainfall=210
)


Unnamed: 0,Crop,Climate_Suitability_Score
0,rice,0.987386
1,jute,0.009573
2,coffee,0.000396
3,banana,0.000256
4,coconut,0.000205


In [29]:
def estimate_yield(score, max_yield=5.0):
    return round(score * max_yield, 2)

output = recommend_crops(
    N=90, P=42, K=43,
    temperature=25.6, humidity=80.3,
    ph=6.8, rainfall=210
)

output["Predicted_Yield (t/ha)"] = output[
    "Climate_Suitability_Score"
].apply(estimate_yield)

output


Unnamed: 0,Crop,Climate_Suitability_Score,Predicted_Yield (t/ha)
0,rice,0.987386,4.94
1,jute,0.009573,0.05
2,coffee,0.000396,0.0
3,banana,0.000256,0.0
4,coconut,0.000205,0.0


In [30]:
joblib.dump(pipeline, "crop_planning_brain.pkl")
joblib.dump(label_encoder, "crop_label_encoder.pkl")

print("✅ Model and encoder saved")


✅ Model and encoder saved
