In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv(r"..\dataset\Fertilizer_Simulated_Dataset.csv")

In [None]:
df.head()

Unnamed: 0,Temperature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26.0,52.0,38.0,Sandy,Maize,37,0,0,Urea
1,29.0,52.0,45.0,Loamy,Sugarcane,12,0,36,DAP
2,34.0,65.0,62.0,Black,Cotton,7,9,30,14-35-14
3,32.0,62.0,34.0,Red,Tobacco,22,0,20,28-28
4,28.0,54.0,46.0,Clayey,Paddy,35,0,0,Urea


In [None]:
df.describe()

Unnamed: 0,Temperature,Humidity,Moisture,Nitrogen,Potassium,Phosphorous
count,249.0,249.0,249.0,249.0,249.0,249.0
mean,27.05759,57.22759,36.441084,35.39759,27.618474,31.493976
std,5.913828,16.496311,15.122563,27.679592,28.418868,21.006359
min,15.27,20.76,5.33,0.0,0.0,0.0
25%,22.68,47.23,25.78,12.0,0.0,15.0
50%,28.0,58.0,37.39,24.0,21.0,30.0
75%,32.0,67.0,48.0,62.0,41.0,47.0
max,38.0,89.8,65.0,100.0,100.0,80.0


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
# Encode categorical features
label_encoders = {}
for col in ['Crop Type', 'Soil Type']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Define features and target
X = df.drop('Fertilizer Name', axis=1)
y = df['Fertilizer Name']

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Scale numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Define models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42, n_estimators=100),
    'SVM': SVC(kernel='linear', probability=True, random_state=42)
}

# Train and evaluate modesl
results = {}

In [None]:
for name, model in models.items():
    # Train the model
    model.fit(X_train,y_train)
    # Predict on test data
    y_pred = model.predict(X_test)
    # Evaluate
    acc = accuracy_score(y_test,y_pred)
    print(f"{name} Accuracy: {acc:.2f}")
    print(f"Classification Report for {name}:\n")
    print(classification_report(y_test,y_pred))
    print("-" * 50)
    # Save results
    results[name] = acc

Logistic Regression Accuracy: 0.86
Classification Report for Logistic Regression:

                   precision    recall  f1-score   support

       10/26/2026       0.33      1.00      0.50         1
         14-35-14       1.00      1.00      1.00         3
         17-17-17       1.00      1.00      1.00         1
            20-20       1.00      1.00      1.00         3
            28-28       1.00      1.00      1.00         3
Ammonium Sulphate       1.00      0.40      0.57         5
              DAP       0.75      0.75      0.75         8
              MOP       1.00      1.00      1.00         6
              NPK       1.00      1.00      1.00         4
              SSP       1.00      1.00      1.00         5
             Urea       0.75      0.82      0.78        11

         accuracy                           0.86        50
        macro avg       0.89      0.91      0.87        50
     weighted avg       0.89      0.86      0.86        50

-----------------------------

In [None]:
# Compare models
best_model = max(results, key=results.get)
print(f"The best model is {best_model} with an accuracy of {results[best_model]:.2f}")

The best model is Random Forest with an accuracy of 0.92
