## **Exploratory Data Analysis**

In [1]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Loading the downloaded dataset
path = r"/content/Fertilizer Prediction.csv"
df = pd.read_csv(path)

# rename target column
df = df.rename({'Fertilizer Name': 'Fertilizer','Crop Type': 'Crop_Type','Soil Type': 'Soil_Type'}, axis=1)

df.sample(15)

Unnamed: 0,Temparature,Humidity,Moisture,Soil_Type,Crop_Type,Nitrogen,Potassium,Phosphorous,Fertilizer
33,36,68,38,Sandy,Barley,7,9,30,14-35-14
58,25,50,56,Loamy,Sugarcane,11,13,15,17-17-17
96,38,72,51,Loamy,Wheat,39,0,0,Urea
79,34,65,63,Red,Cotton,14,0,38,DAP
78,26,52,36,Clayey,Pulses,7,16,20,10-26-26
54,31,62,63,Red,Cotton,11,12,15,17-17-17
21,34,65,53,Loamy,Sugarcane,12,14,12,17-17-17
85,35,67,28,Clayey,Pulses,8,7,31,14-35-14
57,29,58,37,Sandy,Millets,8,0,15,20-20
91,34,65,45,Clayey,Paddy,6,19,21,10-26-26


In [3]:
print("SHAPE : ", df.shape)
df.info()

SHAPE :  (99, 9)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Temparature  99 non-null     int64 
 1   Humidity     99 non-null     int64 
 2   Moisture     99 non-null     int64 
 3   Soil_Type    99 non-null     object
 4   Crop_Type    99 non-null     object
 5   Nitrogen     99 non-null     int64 
 6   Potassium    99 non-null     int64 
 7   Phosphorous  99 non-null     int64 
 8   Fertilizer   99 non-null     object
dtypes: int64(6), object(3)
memory usage: 7.1+ KB


In [4]:
df.describe()

Unnamed: 0,Temparature,Humidity,Moisture,Nitrogen,Potassium,Phosphorous
count,99.0,99.0,99.0,99.0,99.0,99.0
mean,30.282828,59.151515,43.181818,18.909091,3.383838,18.606061
std,3.502304,5.840331,11.271568,11.599693,5.814667,13.476978
min,25.0,50.0,25.0,4.0,0.0,0.0
25%,28.0,54.0,34.0,10.0,0.0,9.0
50%,30.0,60.0,41.0,13.0,0.0,19.0
75%,33.0,64.0,50.5,24.0,7.5,30.0
max,38.0,72.0,65.0,42.0,19.0,42.0


In [5]:
# Printing number of samples per each class
df["Crop_Type"].value_counts()

Crop_Type
Sugarcane      13
Cotton         12
Millets        11
Paddy          10
Pulses         10
Wheat           9
Tobacco         7
Barley          7
Oil seeds       7
Ground Nuts     7
Maize           6
Name: count, dtype: int64

## **Analyze Independent Variables**

In [6]:
# list of all numerical variables in dataset
numerical_features = [feature for feature in df.columns if df[feature].dtypes != 'O']
print('Number of numerical variables: ', len(numerical_features), numerical_features)

# list of all discrete variables in dataset
discrete_features=[feature for feature in numerical_features if len(df[feature].unique())<25]
print('Number of Discrete variables: ', len(discrete_features), discrete_features)

# list of all continuous variables in dataset
continuous_features=[feature for feature in numerical_features if feature not in discrete_features]
print('Number of Continuous variables: ', len(continuous_features), continuous_features)

# list of all categorical variables in dataset
categorical_features=[feature for feature in df.columns if df[feature].dtypes=='O']
print('Number of categorical variables: ', len(categorical_features), categorical_features)


Number of numerical variables:  6 ['Temparature', 'Humidity ', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']
Number of Discrete variables:  4 ['Temparature', 'Humidity ', 'Nitrogen', 'Potassium']
Number of Continuous variables:  2 ['Moisture', 'Phosphorous']
Number of categorical variables:  3 ['Soil_Type', 'Crop_Type', 'Fertilizer']


In [7]:
# Find the Cardinality i.e number of categories in each categorical feature
for feature in categorical_features:
    if feature=='Fertilizer':
      pass
    print('The feature is {} and no. of categories are {}'.format(feature,len(df[feature].unique())))

The feature is Soil_Type and no. of categories are 5
The feature is Crop_Type and no. of categories are 11
The feature is Fertilizer and no. of categories are 7


## **One-Hot Encoding the Categorical Variables**

In [8]:
# list of categorical features in dataset
categorical_features=[feature for feature in df.columns if df[feature].dtype=='O']

# Remove the Target variable.
categorical_features.remove('Fertilizer')

# encode categorical features
new_encoded_columns = pd.get_dummies(df[categorical_features])

# Concatinating with original dataframe
df = pd.concat([df,new_encoded_columns],axis="columns")

# dropping the categorical variables since they are redundant now.
df = df.drop(categorical_features,axis="columns")

## **Training the Model**

In [9]:
x = df.drop("Fertilizer",axis=1)
x.head(10)

Unnamed: 0,Temparature,Humidity,Moisture,Nitrogen,Potassium,Phosphorous,Soil_Type_Black,Soil_Type_Clayey,Soil_Type_Loamy,Soil_Type_Red,...,Crop_Type_Cotton,Crop_Type_Ground Nuts,Crop_Type_Maize,Crop_Type_Millets,Crop_Type_Oil seeds,Crop_Type_Paddy,Crop_Type_Pulses,Crop_Type_Sugarcane,Crop_Type_Tobacco,Crop_Type_Wheat
0,26,52,38,37,0,0,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
1,29,52,45,12,0,36,False,False,True,False,...,False,False,False,False,False,False,False,True,False,False
2,34,65,62,7,9,30,True,False,False,False,...,True,False,False,False,False,False,False,False,False,False
3,32,62,34,22,0,20,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
4,28,54,46,35,0,0,False,True,False,False,...,False,False,False,False,False,True,False,False,False,False
5,26,52,35,12,10,13,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
6,25,50,64,9,0,10,False,False,False,True,...,True,False,False,False,False,False,False,False,False,False
7,33,64,50,41,0,0,False,False,True,False,...,False,False,False,False,False,False,False,False,False,True
8,30,60,42,21,0,18,False,False,False,False,...,False,False,False,True,False,False,False,False,False,False
9,29,58,33,9,7,30,True,False,False,False,...,False,False,False,False,True,False,False,False,False,False


In [10]:
y = df["Fertilizer"]
y.head(10)

0        Urea
1         DAP
2    14-35-14
3       28-28
4        Urea
5    17-17-17
6       20-20
7        Urea
8       28-28
9    14-35-14
Name: Fertilizer, dtype: object

## **Data Splitting**

In [11]:
# DATA SPLITTING
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2,shuffle=True)

In [12]:
x_train.values[:10]

array([[37, 70, 37, 12, 0, 41, False, True, False, False, False, False,
        False, False, False, False, False, True, False, False, False,
        False],
       [29, 58, 61, 11, 0, 38, False, False, True, False, False, False,
        True, False, False, False, False, False, False, False, False,
        False],
       [25, 50, 56, 11, 13, 15, False, False, True, False, False, False,
        False, False, False, False, False, False, False, True, False,
        False],
       [31, 62, 44, 21, 0, 28, False, False, False, False, True, True,
        False, False, False, False, False, False, False, False, False,
        False],
       [25, 50, 65, 36, 0, 0, False, False, True, False, False, False,
        True, False, False, False, False, False, False, False, False,
        False],
       [26, 52, 36, 14, 0, 13, False, True, False, False, False, False,
        False, False, False, False, False, False, True, False, False,
        False],
       [30, 60, 61, 8, 10, 31, False, False, True, F

In [13]:
y_train.values[:10]

array(['DAP', 'DAP', '17-17-17', '28-28', 'Urea', '20-20', '14-35-14',
       '10-26-26', '14-35-14', '28-28'], dtype=object)

## **LightGBM Classifier Model**

In [14]:
# Creating a lightgbm model
import lightgbm as lgb

model = lgb.LGBMClassifier()

# Training the model using Training Data
model.fit(x_train,y_train)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000285 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 91
[LightGBM] [Info] Number of data points in the train set: 79, number of used features: 6
[LightGBM] [Info] Start training from score -2.423538
[LightGBM] [Info] Start training from score -1.971553
[LightGBM] [Info] Start training from score -2.577688
[LightGBM] [Info] Start training from score -1.971553
[LightGBM] [Info] Start training from score -1.661398
[LightGBM] [Info] Start training from score -1.730391
[LightGBM] [Info] Start training from score -1.661398


## **Input Function**

In [15]:
import numpy as np

def get_input(x):

    # Index values of each variable in x
    x_structure = {
        "Temparature": 0, "Humidity": 1, "Moisture": 2, "Nitrogen": 3,
        "Potassium": 4, "Phosphorous": 5, "Black": 6,  "Clayey": 7, "Loamy": 8,
        "Red": 9, "Sandy": 10, "Barley": 11, "Cotton": 12, "Ground Nuts": 13, "Maize": 14,
        "Millets": 15, "Oil seeds": 16, "Paddy": 17, "Pulses": 18, "Sugarcane": 19, "Tobacco": 20,
        "Wheat": 21
    }

    output = np.zeros(len(x_structure))
    output[0] = x[0]
    output[1] = x[1]
    output[2] = x[2]
    output[3] = x[3]
    output[4] = x[4]
    output[5] = x[5]
    output[x_structure[x[6]]] = 1
    output[x_structure[x[7]]] = 1
    return output


In [16]:
# Make Prediction
x1 = get_input([25,	50	,64,	9	,0,	10, "Red", "Cotton"])

y1 = model.predict([x1])
print("Predicted Fertilizer : ",y1[0])

Predicted Fertilizer :  20-20


In [20]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import cv2
from google.colab.patches import cv2_imshow

# Function to predict crop label based on inputs
def predict_fert(N, P, K, temperature, humidity, moisture, soiltype, cropname):
    # Assuming `get_input` is defined elsewhere and returns a list of input values
    x1 = get_input([N, P, K, temperature, humidity, moisture, str(soiltype), str(cropname)])
    fertilizer_images = {
        "Urea": "/content/fertimages/Urea.jpg",
        "DAP": "/content/fertimages/DAP.jpg",
        "14-35-14": "/content/fertimages/14-35-14.jpg",
        "28-28": "/content/fertimages/28-28.jpg",
        "17-17-17": "/content/fertimages/17-17-17.jpg",
        "20-20": "/content/fertimages/20-20.jpg"
    }
    fertilizer_links = {
        "Urea": "https://greenmartfertilizerindustries.in/product/17034439/KISAN-UREA",
        "DAP": "https://greenmartfertilizerindustries.in/product/17034596/NOVRATANA-DAP",
        "14-35-14": "https://rythuagro.in/product/gromor-14-35-14/",
        "28-28": "https://rythuagro.in/product/gromor-28-28-0/",
        "17-17-17": "https://www.indiamart.com/proddetail/gromor-17-17-17-fertilizer-10329151391.html",
        "20-20": "https://krishibazaar.in/product/agriplus-haifa-npk-20-20-20-water-soluble-fertilizers"
    }
    # Assuming `model` is defined elsewhere and used for prediction
    y1 = model.predict([x1])

    return y1[0], fertilizer_images.get(y1[0], None), fertilizer_links.get(y1[0], None)


# Create input widgets
temperature_input = widgets.FloatText(description='Temperature:')
humidity_input = widgets.FloatText(description='Humidity:')
moisture_input = widgets.FloatText(description='Moisture:')
N_input = widgets.FloatText(description='N:')
P_input = widgets.FloatText(description='P:')
K_input = widgets.FloatText(description='K:')

soil_type_input = widgets.Text(description='Soil Type:')
crop_name_input = widgets.Text(description='Crop Name:')
# Create a button widget
button = widgets.Button(description="Predict")

# Output widget to display prediction
output = widgets.Output()

# Function to handle button click event
def on_button_clicked(b):
    with output:
        output.clear_output()  # Clear previous output
        prediction, img_url, buying_link = predict_fert(
            temperature_input.value, humidity_input.value,
            moisture_input.value, N_input.value, P_input.value, K_input.value, soil_type_input.value, crop_name_input.value
        )
        print("The Suggested Fertilizer for Given Crop is:", prediction)
        if img_url:
            img = cv2.imread(img_url)
            cv2_imshow(img)
            if buying_link:
                print("Buy this fertilizer:", buying_link)


button.on_click(on_button_clicked)

# Display the widgets
display(temperature_input, humidity_input, moisture_input, N_input, P_input, K_input, soil_type_input, crop_name_input, button, output)


FloatText(value=0.0, description='Temperature:')

FloatText(value=0.0, description='Humidity:')

FloatText(value=0.0, description='Moisture:')

FloatText(value=0.0, description='N:')

FloatText(value=0.0, description='P:')

FloatText(value=0.0, description='K:')

Text(value='', description='Soil Type:')

Text(value='', description='Crop Name:')

Button(description='Predict', style=ButtonStyle())

Output()

In [18]:
# Save the model
model.booster_.save_model("fertilizer_model.txt")

<lightgbm.basic.Booster at 0x7b2bb08bf400>