In [11]:
# Fertilizer Recommendation Model (Corrected for your dataset columns)

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import shap
import matplotlib.pyplot as plt
import joblib

# -------------------------------
# 1) Load Dataset
# -------------------------------
df = pd.read_csv("fertilizer.csv")  # update path if needed
print("Columns in dataset:", df.columns.tolist())

# -------------------------------
# 2) Define Target and Features
# -------------------------------
TARGET_COL = "Fertilizer Name"

CATEGORICAL_COLS = ["Soil Type", "Crop Type"]

NUMERIC_COLS = [
    "Temparature",
    "Humidity",
    "Moisture",
    "Nitrogen",
    "Phosphorous",
    "Potassium"
]

# Ensure columns exist
missing_cols = [c for c in CATEGORICAL_COLS + NUMERIC_COLS + [TARGET_COL] if c not in df.columns]
if missing_cols:
    raise ValueError(f"Columns missing from CSV: {missing_cols}")

# -------------------------------
# 3) Encode categorical columns
# -------------------------------
label_encoders = {}
for col in CATEGORICAL_COLS:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target
target_le = LabelEncoder()
df[TARGET_COL] = target_le.fit_transform(df[TARGET_COL])

# -------------------------------
# 4) Split Data
# -------------------------------
X = df[CATEGORICAL_COLS + NUMERIC_COLS]
y = df[TARGET_COL]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# -------------------------------
# 5) Scale Numerical Features
# -------------------------------
scaler = StandardScaler()
X_train[NUMERIC_COLS] = scaler.fit_transform(X_train[NUMERIC_COLS])
X_test[NUMERIC_COLS] = scaler.transform(X_test[NUMERIC_COLS])

# -------------------------------
# 6) Train Model
# -------------------------------
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# -------------------------------
# 7) Evaluate
# -------------------------------
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=target_le.classes_))

# -------------------------------
# 8) Save model and encoders
# -------------------------------
joblib.dump(model, "fertilizer_model.pkl")
joblib.dump(scaler, "fertilizer_scaler.pkl")
joblib.dump(label_encoders, "fertilizer_label_encoders.pkl")
joblib.dump(target_le, "fertilizer_target_encoder.pkl")

print("✅ Model and preprocessors saved.")

# -------------------------------
# 9) SHAP Explainability
# -------------------------------
explainer = shap.TreeExplainer(model)
shap_values = explainer(X_test, check_additivity=False)

print("SHAP shape:", shap_values.values.shape)

# Feature importance summary
shap.summary_plot(shap_values.values, X_test, feature_names=X_test.columns, show=False)
plt.savefig("fertilizer_shap_summary.png", bbox_inches="tight")
plt.close()

# Beeswarm plot
shap.summary_plot(shap_values, X_test, feature_names=X_test.columns, show=False)
plt.savefig("fertilizer_shap_beeswarm.png", bbox_inches="tight")
plt.close()


Columns in dataset: ['Temparature', 'Humidity', 'Moisture', 'Soil Type', 'Crop Type', 'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name']
Accuracy: 0.145
              precision    recall  f1-score   support

    10-26-26       0.15      0.15      0.15       226
    14-35-14       0.14      0.16      0.15       238
    17-17-17       0.11      0.10      0.10       225
       20-20       0.19      0.17      0.18       220
       28-28       0.16      0.14      0.15       224
         DAP       0.18      0.18      0.18       233
        Urea       0.11      0.12      0.11       234

    accuracy                           0.14      1600
   macro avg       0.15      0.14      0.15      1600
weighted avg       0.15      0.14      0.15      1600

✅ Model and preprocessors saved.
SHAP shape: (1600, 8, 7)
