In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier

# Load the dataset
df = pd.read_csv('Chemical_Fertilizer_Dataset.csv')

# Separate features and target variable
X = df[['N', 'P', 'K', 'Crop']]
y = df['Fertilizer']

# Define preprocessing steps
categorical_features = ['Crop']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

numeric_features = ['N', 'P', 'K']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

# Define preprocessing steps with sparse_threshold=0
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', numeric_transformer, numeric_features)
    ],
    sparse_threshold=0  # Force output dense arrays
)

# Split Data into Train and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers
classifiers = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'KNN': KNeighborsClassifier(),
    'SVM': SVC(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Naive Bayes': GaussianNB(),
    'XGBoost': XGBClassifier()
}

# Create pipeline with preprocessing and classifier
pipelines = {}
for name, clf in classifiers.items():
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', clf)
    ])
    pipelines[name] = pipeline

# Encode the categorical target variable y_train
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

# Train classifiers and evaluate accuracies
accuracies = {}
for name, pipeline in pipelines.items():
    pipeline.fit(X_train, y_train_encoded)  # Use encoded labels
    accuracy = pipeline.score(X_test, label_encoder.transform(y_test))  # Transform test labels too
    accuracies[name] = accuracy

# Compare accuracies
print("Model Comparisons:")
for name, accuracy in accuracies.items():
    print(f'{name} Accuracy: {accuracy}')
    
# Select the Best Model
best_model_name = max(accuracies, key=accuracies.get)
best_model_accuracy = accuracies[best_model_name]
print(f'\nThe best model is {best_model_name} with an accuracy of {best_model_accuracy}.')

Model Comparisons:
Random Forest Accuracy: 0.521594684385382
Decision Tree Accuracy: 0.717607973421927
KNN Accuracy: 0.49612403100775193
SVM Accuracy: 0.4152823920265781
Gradient Boosting Accuracy: 0.4640088593576966
Naive Bayes Accuracy: 0.39756367663344405
XGBoost Accuracy: 0.7242524916943521

The best model is XGBoost with an accuracy of 0.7242524916943521.


In [6]:
print(label_encoder.classes_)


['10:10:10 NPK' '10:26:26 NPK' '12:32:16 NPK' '13:32:26 NPK'
 '18:46:00 NPK' '19:19:19 NPK' '20:20:20 NPK' '50:26:26 NPK'
 'Ammonium Sulphate' 'Chilated Micronutrient' 'DAP' 'Ferrous Sulphate'
 'Hydrated Lime' 'MOP' 'Magnesium Sulphate' 'SSP' 'Sulphur' 'Urea'
 'White Potash']


In [4]:
# Create pipeline with preprocessing and XGBoost classifier
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', classifiers['XGBoost'])
])

# Train XGBoost classifier
pipeline.fit(X_train, y_train_encoded)

# Evaluate the trained model on the test data
accuracy = pipeline.score(X_test, label_encoder.transform(y_test))
print(f'Accuracy: {accuracy}')

# Define input values as a dictionary
input_values = {
    'N': 70,  # Example value for Nitrogen
    'P': 60,  # Example value for Phosphorus
    'K': 60,  # Example value for Potassium
    'Crop': 'Sugarcane'  # Example crop
}

# Convert input values to DataFrame with a single row
input_df = pd.DataFrame(input_values, index=[0])

# Predict fertilizer using the trained model
predicted_fertilizer_encoded = pipeline.predict(input_df)
predicted_fertilizer = label_encoder.inverse_transform(predicted_fertilizer_encoded)

print(f"The predicted fertilizer for the input values is: {predicted_fertilizer[0]}")

Accuracy: 0.7242524916943521
The predicted fertilizer for the input values is: Urea


In [7]:
import pickle

# Encode the categorical target variable y_train
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

# Save the trained LabelEncoder object to a file
with open('label_encoder.pkl', 'wb') as file:
    pickle.dump(label_encoder, file)
    
# Save the trained XGBoost model to a file
with open('xgboost_model.pkl', 'wb') as file:
    pickle.dump(pipeline, file)
