In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

In [1]:
from google.colab import files

# This will open a file picker in the Colab interface
uploaded = files.upload()

Saving fertilizer_recommendation_dataset.csv to fertilizer_recommendation_dataset.csv


In [4]:
# Load your dataset
df = pd.read_csv('fertilizer_recommendation_dataset.csv')

In [5]:
# DROP REMARK COLUMN BEFORE TRAINING
df = df.drop('Remark', axis=1)

In [8]:
# Explore the data
print(df.head())
print(df.info())
print(df.describe())

   Temperature  Moisture    Rainfall        PH   Nitrogen  Phosphorous  \
0    50.179845  0.725893  205.600816  6.227358  66.701872    76.963560   
1    21.633318  0.721958  306.081601  7.173131  71.583316   163.057636   
2    23.060964  0.685751  259.336414  7.380793  75.709830    62.091508   
3    26.241975  0.755095  212.703513  6.883367  78.033687   151.012521   
4    21.490157  0.730672  268.786767  7.578760  71.765123    66.257371   

    Potassium    Carbon        Soil  Crop                  Fertilizer  
0   96.429065  0.496300  Loamy Soil  rice                     Compost  
1  148.128347  1.234242  Loamy Soil  rice     Balanced NPK Fertilizer  
2   80.308971  1.795650  Peaty Soil  rice  Water Retaining Fertilizer  
3  153.005712  1.517556  Loamy Soil  rice     Balanced NPK Fertilizer  
4   97.000886  1.782985  Peaty Soil  rice          Organic Fertilizer  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3100 entries, 0 to 3099
Data columns (total 11 columns):
 #   Column     

In [6]:
# Check for missing values
print(df.isnull().sum())

Temperature    0
Moisture       0
Rainfall       0
PH             0
Nitrogen       0
Phosphorous    0
Potassium      0
Carbon         0
Soil           0
Crop           0
Fertilizer     0
dtype: int64


In [7]:
# Preprocess categorical variables
label_encoders = {}
categorical_cols = ['Soil', 'Crop', 'Fertilizer']
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [8]:
# Separate features and target
X = df.drop(['Fertilizer'], axis=1)
y = df['Fertilizer']

In [12]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
# Random Forest often works well for this type of data
model = RandomForestClassifier(n_estimators=200,
                              max_depth=10,
                              min_samples_split=5,
                              random_state=42,
                              class_weight='balanced')

model.fit(X_train_scaled, y_train)

In [14]:
# Evaluate
y_pred = model.predict(X_test_scaled)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       1.00      0.99      0.99        84
           2       0.99      1.00      1.00       206
           3       1.00      1.00      1.00         7
           4       1.00      0.83      0.91         6
           5       0.97      0.97      0.97        39
           6       1.00      1.00      1.00        66
           7       1.00      0.94      0.97        18
           8       1.00      1.00      1.00        34
           9       0.98      0.99      0.99       128

    accuracy                           0.99       620
   macro avg       0.99      0.97      0.98       620
weighted avg       0.99      0.99      0.99       620



In [15]:
# Save model and preprocessing objects
joblib.dump(model, 'fertilizer_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')

['label_encoders.pkl']