In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# import xgboost as xgb
# import catboost as cat_
# import lightgbm as lgb
# import optuna.integration.lightgbm as lgb2
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, log_loss

In [21]:
data = pd.read_csv("fertilizer_recommendation_dataset.csv")


def data_information(data):
    print('Data Information Table')
    display(data.info(verbose=True, show_counts=True))
    print('\n Data Head Table')
    display(data.head())
    print('\n Data Describe Information Table')
    display(data.describe())
    print(f'\n Data Shape: {data.shape}')
    print(f'\n Data Columns {data.columns}')
    print('\n Data Null item Information Table')
    return

In [22]:
data_information(data)


Data Information Table
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3100 entries, 0 to 3099
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Temperature  3100 non-null   float64
 1   Moisture     3100 non-null   float64
 2   Rainfall     3100 non-null   float64
 3   PH           3100 non-null   float64
 4   Nitrogen     3100 non-null   float64
 5   Phosphorous  3100 non-null   float64
 6   Potassium    3100 non-null   float64
 7   Carbon       3100 non-null   float64
 8   Soil         3100 non-null   object 
 9   Crop         3100 non-null   object 
 10  Fertilizer   3100 non-null   object 
 11  Remark       3100 non-null   object 
dtypes: float64(8), object(4)
memory usage: 290.8+ KB


None


 Data Head Table


Unnamed: 0,Temperature,Moisture,Rainfall,PH,Nitrogen,Phosphorous,Potassium,Carbon,Soil,Crop,Fertilizer,Remark
0,50.179845,0.725893,205.600816,6.227358,66.701872,76.96356,96.429065,0.4963,Loamy Soil,rice,Compost,Enhances organic matter and improves soil stru...
1,21.633318,0.721958,306.081601,7.173131,71.583316,163.057636,148.128347,1.234242,Loamy Soil,rice,Balanced NPK Fertilizer,"Provides a balanced mix of nitrogen, phosphoru..."
2,23.060964,0.685751,259.336414,7.380793,75.70983,62.091508,80.308971,1.79565,Peaty Soil,rice,Water Retaining Fertilizer,Improves water retention in dry soils. Prefer ...
3,26.241975,0.755095,212.703513,6.883367,78.033687,151.012521,153.005712,1.517556,Loamy Soil,rice,Balanced NPK Fertilizer,"Provides a balanced mix of nitrogen, phosphoru..."
4,21.490157,0.730672,268.786767,7.57876,71.765123,66.257371,97.000886,1.782985,Peaty Soil,rice,Organic Fertilizer,"Enhances fertility naturally, ideal for peaty ..."



 Data Describe Information Table


Unnamed: 0,Temperature,Moisture,Rainfall,PH,Nitrogen,Phosphorous,Potassium,Carbon
count,3100.0,3100.0,3100.0,3100.0,3100.0,3100.0,3100.0,3100.0
mean,27.271096,0.600763,111.024949,6.379775,61.27848,69.58285,69.721167,1.474325
std,9.11445,0.144947,77.15188,0.957103,10.18641,42.226712,35.304241,0.6983
min,4.011486,0.227436,-85.709593,3.448974,35.527718,-37.649739,-20.509108,-0.28056
25%,22.408505,0.493595,61.283985,5.860489,54.835715,43.03926,49.992405,0.956401
50%,26.132938,0.59952,96.485935,6.337308,59.429649,58.205269,58.710808,1.463357
75%,29.987887,0.708575,147.619401,6.899739,65.79279,86.108983,76.318626,2.003534
max,56.653592,0.972361,411.955947,9.562341,98.581039,177.314481,161.067982,3.241984



 Data Shape: (3100, 12)

 Data Columns Index(['Temperature', 'Moisture', 'Rainfall', 'PH', 'Nitrogen', 'Phosphorous',
       'Potassium', 'Carbon', 'Soil', 'Crop', 'Fertilizer', 'Remark'],
      dtype='object')

 Data Null item Information Table


In [23]:
X = df[['Temperature', 'Moisture', 'Soil', 'Crop']]
y = df[['Fertilizer', 'Remark']]

In [25]:
numeric_features = ['Temperature', 'Moisture']
categorical_features = ['Soil', 'Crop']

# Set up transformers for preprocessing
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

In [26]:
clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', MultiOutputClassifier(RandomForestClassifier(n_estimators=100, random_state=42)))
])

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
clf.fit(X_train, y_train)


In [29]:
y_pred = clf.predict(X_test)

# Show performance
from sklearn.metrics import classification_report

print("Fertilizer Report:\n", classification_report(y_test['Fertilizer'], y_pred[:, 0]))
print("Remark Report:\n", classification_report(y_test['Remark'], y_pred[:, 1]))

Fertilizer Report:
                             precision    recall  f1-score   support

   Balanced NPK Fertilizer       0.63      0.81      0.71        32
                   Compost       0.30      0.15      0.20        84
                       DAP       0.61      0.73      0.67       206
General Purpose Fertilizer       0.50      0.29      0.36         7
                    Gypsum       0.00      0.00      0.00         6
                      Lime       0.21      0.13      0.16        39
         Muriate of Potash       0.35      0.33      0.34        66
        Organic Fertilizer       0.29      0.28      0.29        18
                      Urea       0.12      0.06      0.08        34
Water Retaining Fertilizer       0.51      0.63      0.56       128

                  accuracy                           0.49       620
                 macro avg       0.35      0.34      0.34       620
              weighted avg       0.45      0.49      0.46       620

Remark Report:
          

In [30]:
import joblib
joblib.dump(clf, "fertilizer_remark_model.pkl")


['fertilizer_remark_model.pkl']

In [38]:
model = joblib.load("fertilizer_remark_model.pkl")

# Make predictions
sample = pd.DataFrame([{
    'Temperature': 28,
    'Moisture': 0.5,
    'Soil': 'Acidic Soil',
    'Crop': 'watermelon'
}])
print(model.predict(sample))
res=model.predict(sample)

[['Muriate of Potash'
  'High potassium content, improves fruit and flower quality. Prefer this for potassium-deficient soils to enhance crop productivity.']]


In [42]:
res[0][1]

'High potassium content, improves fruit and flower quality. Prefer this for potassium-deficient soils to enhance crop productivity.'

In [36]:
print(df["Soil"].unique())

['Loamy Soil' 'Peaty Soil' 'Acidic Soil' 'Neutral Soil' 'Alkaline Soil']


In [37]:
print(df["Crop"].unique())

['rice' 'wheat' 'Mung Bean' 'Tea' 'millet' 'maize' 'Lentil' 'Jute'
 'Coffee' 'Cotton' 'Ground Nut' 'Peas' 'Rubber' 'Sugarcane' 'Tobacco'
 'Kidney Beans' 'Moth Beans' 'Coconut' 'Black gram' 'Adzuki Beans'
 'Pigeon Peas' 'Chickpea' 'banana' 'grapes' 'apple' 'mango' 'muskmelon'
 'orange' 'papaya' 'pomegranate' 'watermelon']
