In [None]:
import pandas as pd
import numpy as np

In [None]:
# Load Dataset
df=pd.read_csv('fertilizer_recommendation.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df.isnull().sum()

In [None]:
# Data Cleaning
df.dropna(inplace=True)

In [None]:
# Graphs (EDA Visualization)
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(6,4))
sns.countplot(x='Recommended_Fertilizer', data=df)
plt.xticks(rotation=45)
plt.title('Fertilizer Distribution')
plt.tight_layout()
plt.show()
plt.savefig('fertilizer_distribution.png')
plt.close()


In [None]:
plt.figure(figsize=(6,4))
sns.histplot(df['Temperature'], kde=True)
plt.title('Temperature Distribution')
plt.tight_layout()
plt.show()
plt.savefig('temperature_distribution.png')
plt.close()


In [None]:
plt.figure(figsize=(8,8))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.tight_layout()
plt.show()
plt.savefig('correlation_heatmap.png')
plt.show()
plt.close()

In [None]:
# Feature Engineering
# NPK Ratio
df['NPK_Ratio'] = df['Nitrogen_Level'] / (df['Phosphorus_Level'] + df['Potassium_Level'] + 1e-6)
print("""
NPK_Ratio sample:
""", df[['Nitrogen_Level','Phosphorus_Level','Potassium_Level','NPK_Ratio']].head())

In [None]:
# Moisture Level Category
df['Moisture_Level'] = pd.cut(df['Soil_Moisture'], bins=[-1,30,60,100], labels=['Dry','Medium','Wet'])
print("""
Moisture_Level counts:
""", df['Moisture_Level'].value_counts())

In [None]:
# Plot Moisture Level
plt.figure(figsize=(6,4))
sns.countplot(x='Moisture_Level', data=df)
plt.title('Moisture Level Distribution')
plt.tight_layout()
plt.show()
plt.savefig('moisture_level_distribution.png')
plt.show()
plt.close()

In [None]:
# Encoding Categorical Data
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for col in df.select_dtypes(include=['object', 'category']).columns:
    df[col] = le.fit_transform(df[col])

In [None]:
# Define Features & Target
X = df.drop('Recommended_Fertilizer', axis=1)
y = df['Recommended_Fertilizer'] # target variable

In [None]:
# Train Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)

In [None]:
# Model Training (Classification)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Prediction & Evaluation
y_pred = model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)

In [None]:
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.tight_layout()
plt.show()
plt.savefig('confusion_matrix.png')
plt.show()
plt.close()

In [None]:
# Class imbalance check
print("""
Class distribution:
""", df['Recommended_Fertilizer'].value_counts())

In [None]:
# Cross validation
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5)
print(scores.mean())

In [None]:
# Feature Importance Plot
importances = model.feature_importances_
plt.figure(figsize=(7,4))
sns.barplot(x=importances, y=X.columns)
plt.title('Feature Importance')
plt.tight_layout()
plt.show()
plt.savefig('feature_importance.png')
plt.show()
plt.close()

In [None]:
# Save Model (PKL File)
import pickle

with open('fertilizer_model.pkl', 'wb') as f:
    pickle.dump(model, f)


print("Model saved as fertilizer_model.pkl")