In [None]:
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load Dataset
df = pd.read_csv('cropbynpk.csv')
df.head()

In [None]:
# Define Features and Target
feature_columns = ['N', 'P', 'K', 'Temperature', 'Humidity', 'Ph', 'Rain']
label_column = 'Crop'

In [None]:
# Split into Train and Test
X = df[feature_columns]
y = df[label_column]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
# Preprocessing: Scaling and Label Encoding
scaler = StandardScaler()
le = LabelEncoder()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

In [None]:
# Train XGBoost Classifier
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train_scaled, y_train_encoded)


In [None]:
y_pred_encoded = model.predict(X_test_scaled)
y_pred_labels = le.inverse_transform(y_pred_encoded)

In [None]:
# Evaluation Metrics
acc = accuracy_score(y_test, y_pred_labels)
print(f"Accuracy: {acc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred_labels))

cm = confusion_matrix(y_test, y_pred_labels)

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=le.classes_,
            yticklabels=le.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Save Full Pipeline (Model + Scaler + Label Encoder)
model_pipeline = {
    'model': model,
    'scaler': scaler,
    'label_encoder': le,
    'feature_columns': feature_columns
}

with open(r'crop_recommendation_model.pkl', 'wb') as f:
    pickle.dump(model_pipeline, f)

print("\u2705 Full model pipeline saved!")

In [None]:
new_sample = [90, 42, 43, 20.87, 82.00, 6.5, 200.0]  # Example input
new_sample_df = pd.DataFrame([new_sample], columns=feature_columns)
new_sample_scaled = scaler.transform(new_sample_df)

# Predict probabilities
pred_proba = model.predict_proba(new_sample_scaled)

# Recommend Top 5 Crops
top_5_indices = np.argsort(pred_proba[0])[::-1][:5]
top_5_crops = label_encoder.inverse_transform(top_5_indices)

print("Top 5 Recommended Crops:")
for i, crop in enumerate(top_5_crops, 1):
    print(f"{i}. {crop}")