In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import shap

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier

# -------------------------------
# Load dataset
# -------------------------------
data = pd.read_csv("crop_recommendation.csv")

# Encode categorical labels
le = LabelEncoder()
data['label'] = le.fit_transform(data['label'])

# Features and target
X = data.drop('label', axis=1)
y = data['label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# -------------------------------
# Train model
# -------------------------------
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=le.classes_))

# -------------------------------
# Feature importance plot
# -------------------------------
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]

plt.figure(figsize=(8, 5))
sns.barplot(x=importances[indices], y=X.columns[indices], hue=X.columns[indices], palette="viridis", legend=False)
plt.title("Feature Importance (RandomForest)")
plt.tight_layout()
plt.savefig("crop_feature_importance.png")
plt.close()

# -------------------------------
# SHAP Explainability
# -------------------------------
explainer = shap.TreeExplainer(model)
shap_values = explainer(X_test, check_additivity=False)  # FIX applied

print("SHAP shape:", shap_values.values.shape)

# Handle regression vs multi-output automatically
if shap_values.values.ndim == 3:  
    shap_to_plot = shap_values.values[:, :, 0]
else:
    shap_to_plot = shap_values.values

# ---- SHAP summary plot (bar) ----
shap.summary_plot(shap_to_plot, X_test, plot_type="bar", show=False)
plt.savefig("crop_shap_summary.png", bbox_inches="tight")
plt.close()

# ---- SHAP summary plot (beeswarm) ----
shap.summary_plot(shap_to_plot, X_test, show=False)
plt.savefig("crop_shap_beeswarm.png", bbox_inches="tight")
plt.close()


Accuracy: 0.9954545454545455
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       1.00      1.00      1.00        20
   blackgram       1.00      0.95      0.97        20
    chickpea       1.00      1.00      1.00        20
     coconut       1.00      1.00      1.00        20
      coffee       1.00      1.00      1.00        20
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        20
        jute       0.95      1.00      0.98        20
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        20
       maize       0.95      1.00      0.98        20
       mango       1.00      1.00      1.00        20
   mothbeans       1.00      1.00      1.00        20
    mungbean       1.00      1.00      1.00        20
   muskmelon       1.00      1.00      1.00        20
      orange       1.00      1.00      1.00        2

## Fer

In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import shap
import matplotlib.pyplot as plt
import joblib
import os

# 1) Load Dataset
df = pd.read_csv("fertilizer.csv")
print("Columns:", df.columns.tolist())

# 2) Define target and features
TARGET_COL = "Fertilizer Name"
CATEGORICAL_COLS = ["Soil Type", "Crop Type"]
NUMERIC_COLS = ["Temparature", "Humidity", "Moisture", "Nitrogen", "Phosphorous", "Potassium"]

# 3) Encode categorical columns
label_encoders = {}
for col in CATEGORICAL_COLS:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target column
target_le = LabelEncoder()
df[TARGET_COL] = target_le.fit_transform(df[TARGET_COL])

# 4) Split dataset into features and target
X = df[CATEGORICAL_COLS + NUMERIC_COLS]
y = df[TARGET_COL]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 5) Scale numerical features
scaler = StandardScaler()
X_train[NUMERIC_COLS] = scaler.fit_transform(X_train[NUMERIC_COLS])
X_test[NUMERIC_COLS] = scaler.transform(X_test[NUMERIC_COLS])

# 6) Train Random Forest model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# 7) Predict and evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=target_le.classes_))

# 8) Create folders to save models and explainability plots if not exist
os.makedirs("models", exist_ok=True)
os.makedirs("explainability", exist_ok=True)

# 9) Save the model, scaler, encoders
joblib.dump(model, "models/fertilizer_model.pkl")
joblib.dump(scaler, "models/fertilizer_scaler.pkl")
joblib.dump(label_encoders, "models/fertilizer_label_encoders.pkl")
joblib.dump(target_le, "models/fertilizer_target_encoder.pkl")

print("✅ Model and preprocessors saved.")

# 10) SHAP explainability plots
explainer = shap.TreeExplainer(model)
shap_values = explainer(X_test, check_additivity=False)

shap.summary_plot(shap_values.values, X_test, feature_names=X_test.columns, show=False)
plt.savefig("explainability/fertilizer_shap_summary.png", bbox_inches="tight")
plt.close()

shap.summary_plot(shap_values, X_test, feature_names=X_test.columns, show=False)
plt.savefig("explainability/fertilizer_shap_beeswarm.png", bbox_inches="tight")
plt.close()


Columns: ['Temparature', 'Humidity', 'Moisture', 'Soil Type', 'Crop Type', 'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name']
Accuracy: 0.145
              precision    recall  f1-score   support

    10-26-26       0.15      0.15      0.15       226
    14-35-14       0.14      0.16      0.15       238
    17-17-17       0.11      0.10      0.10       225
       20-20       0.19      0.17      0.18       220
       28-28       0.16      0.14      0.15       224
         DAP       0.18      0.18      0.18       233
        Urea       0.11      0.12      0.11       234

    accuracy                           0.14      1600
   macro avg       0.15      0.14      0.15      1600
weighted avg       0.15      0.14      0.15      1600

✅ Model and preprocessors saved.
