In [41]:
import pandas as pd
df = pd.read_csv('data/Fertilizer_Prediction_Remarks.csv')

In [42]:
print(df.columns.tolist())

['Temperature', 'Moisture', 'Rainfall', 'PH', 'Nitrogen', 'Phosphorous', 'Potassium', 'Carbon', 'Soil', 'Crop', 'Fertilizer', 'Remark']


In [43]:
print(df.isnull().sum())

Temperature    0
Moisture       0
Rainfall       0
PH             0
Nitrogen       0
Phosphorous    0
Potassium      0
Carbon         0
Soil           0
Crop           0
Fertilizer     0
Remark         0
dtype: int64


In [44]:
print(df['Remark'].value_counts())

Remark
Rich in phosphorus, essential for root development. Prefer this for phosphorus-deficient soils to improve plant establishment.                             1054
Improves water retention in dry soils. Prefer this for soils with low moisture to prevent water stress in plants.                                           675
Enhances organic matter and improves soil structure. Prefer this for low-carbon soils to boost soil health naturally.                                       375
High potassium content, improves fruit and flower quality. Prefer this for potassium-deficient soils to enhance crop productivity.                          326
Neutralizes acidic soil and improves pH balance. Prefer this to correct low soil pH, improving nutrient availability.                                       181
Provides a balanced mix of nitrogen, phosphorus, and potassium for loamy soils. Prefer this for general-purpose fertilization in well-structured soils.     157
Provides high nitrogen, ideal for

In [45]:
print(df['Fertilizer'].value_counts())

Fertilizer
DAP                           1054
Water Retaining Fertilizer     675
Compost                        375
Muriate of Potash              326
Lime                           181
Balanced NPK Fertilizer        157
Urea                           154
Organic Fertilizer              95
Gypsum                          52
General Purpose Fertilizer      31
Name: count, dtype: int64


In [48]:
# =====================
# 1. Imports
# =====================
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier

# =====================
# 2. Load Dataset
# =====================
# Assuming your dataframe is named df
# Drop 'Remark' since it's not used for training
X = df.drop(["Remark", "Fertilizer"], axis=1)
y = df["Fertilizer"]

# =====================
# 3. Encode Categorical Features
# =====================
label_encoders = {}
for col in ["Soil", "Crop"]:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

# Encode target labels (fertilizer names)
le_target = LabelEncoder()
y = le_target.fit_transform(y)

# =====================
# 4. Scale Numerical Features
# =====================
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# =====================
# 5. Train-Test Split
# =====================
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# =====================
# 6. Train XGBoost Model
# =====================
xgb = XGBClassifier(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    objective="multi:softmax",
    num_class=len(le_target.classes_),
    eval_metric="mlogloss"
)

xgb.fit(X_train, y_train)

# =====================
# 7. Evaluation
# =====================
y_pred = xgb.predict(X_val)

val_acc = accuracy_score(y_val, y_pred)
print(f"✅ Validation Accuracy (XGBoost): {val_acc:.4f}")

print("\nClassification Report (XGBoost):")
print(classification_report(y_val, y_pred, target_names=le_target.classes_))

# Training accuracy
train_pred = xgb.predict(X_train)
train_acc = accuracy_score(y_train, train_pred)
print(f"✅ Training Accuracy (XGBoost): {train_acc:.4f}")

✅ Validation Accuracy (XGBoost): 0.9839

Classification Report (XGBoost):
                            precision    recall  f1-score   support

   Balanced NPK Fertilizer       0.97      1.00      0.98        31
                   Compost       1.00      1.00      1.00        75
                       DAP       1.00      0.98      0.99       211
General Purpose Fertilizer       1.00      1.00      1.00         6
                    Gypsum       1.00      0.82      0.90        11
                      Lime       0.95      1.00      0.97        36
         Muriate of Potash       1.00      0.98      0.99        65
        Organic Fertilizer       0.95      1.00      0.97        19
                      Urea       0.91      0.97      0.94        31
Water Retaining Fertilizer       0.99      0.99      0.99       135

                  accuracy                           0.98       620
                 macro avg       0.98      0.97      0.97       620
              weighted avg       0.98   

In [49]:
import joblib

# Save the trained XGBoost model
joblib.dump(xgb, "fertilizer_xgb_model.pkl")
print("✅ Model saved as 'fertilizer_xgb_model.pkl'")

# Later, to load the model:
# loaded_model = joblib.load("fertilizer_xgb_model.pkl")
# y_pred_loaded = loaded_model.predict(X_val)

✅ Model saved as 'fertilizer_xgb_model.pkl'


In [51]:
import joblib
from sklearn.metrics import accuracy_score, classification_report

# Load the saved XGBoost model
xgb_loaded = joblib.load("fertilizer_xgb_model.pkl")

from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Get the unique labels actually in y_val
val_labels = np.unique(y_val)

print("✅ Validation Accuracy:", accuracy_score(y_val, y_pred_val))
print("\nClassification Report (XGBoost on Validation Set):")
print(classification_report(
    y_val,
    y_pred_val,
    labels=val_labels,
    target_names=[le.classes_[i] for i in val_labels]
))


✅ Validation Accuracy: 0.9838709677419355

Classification Report (XGBoost on Validation Set):
              precision    recall  f1-score   support

Adzuki Beans       0.97      1.00      0.98        31
  Black gram       1.00      1.00      1.00        75
    Chickpea       1.00      0.98      0.99       211
     Coconut       1.00      1.00      1.00         6
      Coffee       1.00      0.82      0.90        11
      Cotton       0.95      1.00      0.97        36
  Ground Nut       1.00      0.98      0.99        65
        Jute       0.95      1.00      0.97        19
Kidney Beans       0.91      0.97      0.94        31
      Lentil       0.99      0.99      0.99       135

    accuracy                           0.98       620
   macro avg       0.98      0.97      0.97       620
weighted avg       0.98      0.98      0.98       620



In [None]:
# # =====================
# # 1. Imports
# # =====================
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler, LabelEncoder
# from sklearn.metrics import accuracy_score, classification_report
# from tensorflow import keras
# from tensorflow.keras import layers
# from xgboost import XGBClassifier

# # =====================
# # 2. Load + Preprocess
# # =====================
# # Assuming your DataFrame is called df
# # Drop "Remark" since it's not needed
# X = df.drop(["Fertilizer", "Remark"], axis=1)
# y = df["Fertilizer"]

# # Encode categorical columns (Soil, Crop)
# for col in ["Soil", "Crop"]:
#     if X[col].dtype == "object":  # only encode if categorical
#         X[col] = LabelEncoder().fit_transform(X[col])

# # Encode target (Fertilizer)
# le = LabelEncoder()
# y = le.fit_transform(y)

# # Scale numeric features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)

# # Train/Validation Split
# X_train, X_val, y_train, y_val = train_test_split(
#     X_scaled, y, test_size=0.2, random_state=42, stratify=y
# )

# # =====================
# # 3. Keras Neural Network
# # =====================
# print("\n=== Keras Neural Network ===")
# model = keras.Sequential([
#     layers.Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
#     layers.Dropout(0.3),
#     layers.Dense(64, activation="relu"),
#     layers.Dropout(0.3),
#     layers.Dense(len(le.classes_), activation="softmax")
# ])

# model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# history = model.fit(
#     X_train, y_train,
#     validation_data=(X_val, y_val),
#     epochs=20, batch_size=32, verbose=1
# )

# val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
# print(f"✅ Keras Validation Accuracy: {val_acc:.4f}")

# # Predictions
# y_pred_nn = np.argmax(model.predict(X_val), axis=1)
# print("\nClassification Report (Keras NN):")
# print(classification_report(y_val, y_pred_nn, target_names=le.classes_))

# # =====================
# # 4. XGBoost Classifier
# # =====================
# print("\n=== XGBoost Classifier ===")
# xgb = XGBClassifier(
#     n_estimators=500,
#     learning_rate=0.05,
#     max_depth=6,
#     subsample=0.8,
#     colsample_bytree=0.8,
#     random_state=42,
#     objective="multi:softmax",
#     num_class=len(le.classes_),
#     eval_metric="mlogloss"
# )

# # Ensure integer labels
# y_train = np.array(y_train, dtype=np.int32)
# y_val = np.array(y_val, dtype=np.int32)

# # Train XGBoost
# xgb.fit(X_train, y_train)

# # Predictions
# y_pred_xgb = xgb.predict(X_val)

# print("✅ XGBoost Accuracy:", accuracy_score(y_val, y_pred_xgb))
# print("\nClassification Report (XGBoost):")
# print(classification_report(y_val, y_pred_xgb, target_names=le.classes_))



=== Keras Neural Network ===
Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4367 - loss: 1.6922 - val_accuracy: 0.5823 - val_loss: 1.2850
Epoch 2/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5669 - loss: 1.2831 - val_accuracy: 0.6790 - val_loss: 1.0113
Epoch 3/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6286 - loss: 1.0976 - val_accuracy: 0.7274 - val_loss: 0.8680
Epoch 4/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6552 - loss: 0.9890 - val_accuracy: 0.7710 - val_loss: 0.7757
Epoch 5/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6976 - loss: 0.8807 - val_accuracy: 0.7710 - val_loss: 0.7130
Epoch 6/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7065 - loss: 0.8461 - val_accuracy: 0.7774 - val_loss: 0.6739
Epoch 7/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━

In [None]:
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler, LabelEncoder
# from sklearn.metrics import accuracy_score, classification_report, r2_score
# from tensorflow import keras
# from tensorflow.keras import layers
# from xgboost import XGBClassifier

# # =====================
# # 2. Preprocessing
# # =====================
# df = pd.read_csv("data/Fertilizer Prediction.csv")

# X = df.drop("Fertilizer Name", axis=1)
# y = df["Fertilizer Name"]

# # Encode categorical columns
# for col in ["Soil Type", "Crop Type"]:
#     X[col] = LabelEncoder().fit_transform(X[col])

# # Encode target labels
# le = LabelEncoder()
# y = le.fit_transform(y)

# # Scale features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)

# # Train/val split
# X_train, X_val, y_train, y_val = train_test_split(
#     X_scaled, y, test_size=0.2, random_state=42, stratify=y
# )

# # =====================
# # 3. Keras Neural Network
# # =====================
# print("\n=== Keras Neural Network ===")
# model = keras.Sequential([
#     layers.Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
#     layers.Dropout(0.3),
#     layers.Dense(64, activation="relu"),
#     layers.Dropout(0.3),
#     layers.Dense(len(le.classes_), activation="softmax")
# ])

# model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# history = model.fit(
#     X_train, y_train,
#     validation_data=(X_val, y_val),
#     epochs=20, batch_size=32, verbose=1
# )

# val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
# print(f"✅ Keras Validation Accuracy: {val_acc:.4f}")

# y_pred_nn = np.argmax(model.predict(X_val), axis=1)
# print("\nClassification Report (Keras NN):")
# print(classification_report(y_val, y_pred_nn, target_names=le.classes_))

# # =====================
# # 4. XGBoost Classifier
# # =====================
# print("\n=== XGBoost Classifier ===")
# xgb = XGBClassifier(
#     n_estimators=500,
#     learning_rate=0.05,
#     max_depth=6,
#     subsample=0.8,
#     colsample_bytree=0.8,
#     random_state=42,
#     objective="multi:softmax",
#     num_class=len(le.classes_),
#     eval_metric="mlogloss"
# )

# # Ensure y are integers
# y_train = np.array(y_train, dtype=np.int32)
# y_val = np.array(y_val, dtype=np.int32)

# xgb.fit(X_train, y_train)

# y_pred_xgb = xgb.predict(X_val)

# print("✅ XGBoost Accuracy:", accuracy_score(y_val, y_pred_xgb))
# print("\nClassification Report (XGBoost):")
# print(classification_report(y_val, y_pred_xgb, target_names=le.classes_))



=== Keras Neural Network ===
Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - accuracy: 0.2025 - loss: 1.9114 - val_accuracy: 0.4500 - val_loss: 1.8405
Epoch 2/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.1899 - loss: 1.8842 - val_accuracy: 0.5500 - val_loss: 1.7681
Epoch 3/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.1899 - loss: 1.8313 - val_accuracy: 0.5500 - val_loss: 1.6968
Epoch 4/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.3544 - loss: 1.7505 - val_accuracy: 0.6000 - val_loss: 1.6308
Epoch 5/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.4557 - loss: 1.6595 - val_accuracy: 0.6000 - val_loss: 1.5652
Epoch 6/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.4937 - loss: 1.6630 - val_accuracy: 0.6500 - val_loss: 1.5043
Epoch 7/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


✅ XGBoost Accuracy: 1.0

Classification Report (XGBoost):
              precision    recall  f1-score   support

    10-26-26       1.00      1.00      1.00         1
    14-35-14       1.00      1.00      1.00         3
    17-17-17       1.00      1.00      1.00         1
       20-20       1.00      1.00      1.00         3
       28-28       1.00      1.00      1.00         3
         DAP       1.00      1.00      1.00         4
        Urea       1.00      1.00      1.00         5

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



In [None]:
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score, classification_report

# # Load your dataset
# data = pd.read_csv("data/Fertilizer Prediction.csv")

# # One-hot encode categorical features
# data = pd.get_dummies(data, columns=['Soil Type', 'Crop Type'])

# # Split features & target
# X = data.drop("Fertilizer Name", axis=1)
# y = data["Fertilizer Name"]

# X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# # RandomForest
# rf = RandomForestClassifier(n_estimators=200, random_state=42)
# rf.fit(X_train, y_train)
# rf_preds = rf.predict(X_val)

# print("🌲 RandomForest Accuracy:", accuracy_score(y_val, rf_preds))
# print(classification_report(y_val, rf_preds))

# # Logistic Regression (multinomial)
# logreg = LogisticRegression(max_iter=500, multi_class="multinomial")
# logreg.fit(X_train, y_train)
# log_preds = logreg.predict(X_val)

# print("📊 Logistic Regression Accuracy:", accuracy_score(y_val, log_preds))
# print(classification_report(y_val, log_preds))


🌲 RandomForest Accuracy: 0.14325
              precision    recall  f1-score   support

    10-26-26       0.13      0.15      0.14      2876
    14-35-14       0.15      0.16      0.15      2898
    17-17-17       0.14      0.14      0.14      2834
       20-20       0.15      0.14      0.15      2836
       28-28       0.14      0.14      0.14      2847
         DAP       0.15      0.14      0.14      2844
        Urea       0.14      0.13      0.14      2865

    accuracy                           0.14     20000
   macro avg       0.14      0.14      0.14     20000
weighted avg       0.14      0.14      0.14     20000





📊 Logistic Regression Accuracy: 0.1439
              precision    recall  f1-score   support

    10-26-26       0.14      0.14      0.14      2876
    14-35-14       0.15      0.26      0.19      2898
    17-17-17       0.15      0.07      0.10      2834
       20-20       0.14      0.07      0.09      2836
       28-28       0.14      0.14      0.14      2847
         DAP       0.15      0.14      0.15      2844
        Urea       0.14      0.18      0.16      2865

    accuracy                           0.14     20000
   macro avg       0.14      0.14      0.14     20000
weighted avg       0.14      0.14      0.14     20000



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
