## **CROP AND FERTILIZER RECOMMENDATION SYSTEM USING ML**

In [8]:
# importing necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

In [9]:
# loading the datasets

crop = pd.read_csv("datasets/Crop_recommendation.csv")

In [10]:
# First 5 rows of the dataset

print(crop.head())

# Output: Displays the first 5 rows with column names and values.

    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice


In [11]:
# Last 5 rows of the dataset

print(crop.tail())

# Output: Displays the last 5 rows.

        N   P   K  temperature   humidity        ph    rainfall   label
2195  107  34  32    26.774637  66.413269  6.780064  177.774507  coffee
2196   99  15  27    27.417112  56.636362  6.086922  127.924610  coffee
2197  118  33  30    24.131797  67.225123  6.362608  173.322839  coffee
2198  117  32  34    26.272418  52.127394  6.758793  127.175293  coffee
2199  104  18  30    23.603016  60.396475  6.779833  140.937041  coffee


In [12]:
# Returns the shape (rows, columns)

crop.shape

(2200, 8)

In [13]:
# Displays information about the dataset

crop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB


In [14]:
# To check for missing values in the dataset

crop.isnull()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...
2195,False,False,False,False,False,False,False,False
2196,False,False,False,False,False,False,False,False
2197,False,False,False,False,False,False,False,False
2198,False,False,False,False,False,False,False,False


In [15]:
# Returns the sum of missing values in the dataset

crop.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

In [16]:
# Check for duplicated values in the dataset

crop.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
2195    False
2196    False
2197    False
2198    False
2199    False
Length: 2200, dtype: bool

In [17]:
# Return the sum of duplicated values in the dataset

crop.duplicated().sum()

0

In [18]:
# Check the statistical summary of the dataset

crop.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


In [19]:
# Displays all column names in the dataset

crop.columns

Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')

In [25]:
#  To check target feature distribution 

crop['label'].value_counts()

# it counts how many times each unique value appears in a column.

label
rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: count, dtype: int64

### Additional measurements

In [63]:
# To check Unique Values (for categorical data)
crop["label"].unique() #we use any column name to check the unique values in the dataset

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [28]:
# to check the data types of each column

crop.dtypes

N                int64
P                int64
K                int64
temperature    float64
humidity       float64
ph             float64
rainfall       float64
label           object
dtype: object

In [31]:
# To check Minimum value in each column

crop.min()

N                      0
P                      5
K                      5
temperature     8.825675
humidity        14.25804
ph              3.504752
rainfall       20.211267
label              apple
dtype: object

In [32]:
# To check Maximum values in each column

crop.max()

N                     140
P                     145
K                     205
temperature     43.675493
humidity        99.981876
ph               9.935091
rainfall       298.560117
label          watermelon
dtype: object

In [64]:
# to count number of values

crop.count()

N              2200
P              2200
K              2200
temperature    2200
humidity       2200
ph             2200
rainfall       2200
label          2200
dtype: int64

In [69]:
# to check the mean value

crop.mean(numeric_only=True) # it avoids non numeric columns

N               50.551818
P               53.362727
K               48.149091
temperature     25.616244
humidity        71.481779
ph               6.469480
rainfall       103.463655
dtype: float64

In [70]:
# to check the median value

crop.median(numeric_only=True) # it avoids non numeric columns

N              37.000000
P              51.000000
K              32.000000
temperature    25.598693
humidity       80.473146
ph              6.425045
rainfall       94.867624
dtype: float64

In [71]:
# to check the mode value

crop.mode(numeric_only=True) # it avoids non numeric columns

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
0,22.0,60.0,17.0,8.825675,14.258040,3.504752,20.211267
1,40.0,,,9.467960,14.273280,3.510404,20.360011
2,,,,9.535586,14.280419,3.525366,20.390205
3,,,,9.724458,14.323138,3.532009,20.490356
4,,,,9.851243,14.338474,3.558823,20.661278
...,...,...,...,...,...,...,...
2195,,,,43.037143,99.658092,9.416003,291.298662
2196,,,,43.080227,99.724010,9.459493,295.609449
2197,,,,43.302049,99.846716,9.679241,295.924880
2198,,,,43.360515,99.969060,9.926212,298.401847


In [73]:
# to check standard derivation of columns

crop.std(numeric_only=True) # it avoids non numeric columns

N              36.917334
P              32.985883
K              50.647931
temperature     5.063749
humidity       22.263812
ph              0.773938
rainfall       54.958389
dtype: float64

In [68]:
# to find specific values at given percentages in your dataset we use quantile() function
# np.percentile() is also used to get these details
Q1 = crop['N'].quantile(0.25)
Q2 = crop['N'].quantile(0.50)
Q3 = crop['N'].quantile(0.75)
Q4 = crop['N'].quantile(1.00)
print(f"1st Quartile: {Q1}")
print(f"2nd Quartile (Median): {Q2}")
print(f"3rd Quartile: {Q3}")
print(f"4th Quartile (Max): {Q4}")

1st Quartile: 21.0
2nd Quartile (Median): 37.0
3rd Quartile: 84.25
4th Quartile (Max): 140.0
