### Load dataset

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('fertilizer.csv')
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,Sandy,Maize,37,0,0,Urea
1,29,52,45,Loamy,Sugarcane,12,0,36,DAP
2,34,65,62,Black,Cotton,7,9,30,14-35-14
3,32,62,34,Red,Tobacco,22,0,20,28-28
4,28,54,46,Clayey,Paddy,35,0,0,Urea


In [3]:
df.columns

Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')

In [4]:
df['Soil Type'].unique()

array(['Sandy', 'Loamy', 'Black', 'Red', 'Clayey'], dtype=object)

In [5]:
df['Crop Type'].unique()

array(['Maize', 'Sugarcane', 'Cotton', 'Tobacco', 'Paddy', 'Barley',
       'Wheat', 'Millets', 'Oil seeds', 'Pulses', 'Ground Nuts'],
      dtype=object)

### Feature Engineering / preprocessing

In [6]:
drop_column = ['Moisture' , 'Nitrogen', 'Potassium', 'Phosphorous' , 'Fertilizer Name']

In [7]:
y = df['Fertilizer Name']

In [8]:
x = df.drop(columns=drop_column,axis=1)
x.head()

Unnamed: 0,Temparature,Humidity,Soil Type,Crop Type
0,26,52,Sandy,Maize
1,29,52,Loamy,Sugarcane
2,34,65,Black,Cotton
3,32,62,Red,Tobacco
4,28,54,Clayey,Paddy


In [9]:
categorical_columns = ['Soil Type', 'Crop Type']

##### Using Label Encoding

In [10]:
from sklearn.preprocessing import LabelEncoder

enc = LabelEncoder()
x['Soil Type']= enc.fit_transform(x['Soil Type'])

In [11]:
enc.classes_

array(['Black', 'Clayey', 'Loamy', 'Red', 'Sandy'], dtype=object)

In [12]:
crop_enc = LabelEncoder()

x['Crop Type']= crop_enc.fit_transform(x['Crop Type'])

In [13]:
crop_enc.classes_

array(['Barley', 'Cotton', 'Ground Nuts', 'Maize', 'Millets', 'Oil seeds',
       'Paddy', 'Pulses', 'Sugarcane', 'Tobacco', 'Wheat'], dtype=object)

In [14]:
x

Unnamed: 0,Temparature,Humidity,Soil Type,Crop Type
0,26,52,4,3
1,29,52,2,8
2,34,65,0,1
3,32,62,3,9
4,28,54,1,6
...,...,...,...,...
94,25,50,1,7
95,30,60,3,9
96,38,72,2,10
97,36,60,4,4


### Data Splitting

In [15]:
from sklearn.model_selection import train_test_split
x_train , x_test , y_train , y_test = train_test_split(x , y , random_state=42 , test_size=0.15)

### ML Model - Decision Tree

In [16]:
from sklearn.tree import DecisionTreeClassifier

In [17]:
model = DecisionTreeClassifier()

In [18]:
model.fit(x_train , y_train)

### Save the model

In [19]:
import pickle

In [20]:
with open('../models/fertilizer.pkl','wb') as f:
    pickle.dump(model,f)
print("Model Saved Successfully!!!")

Model Saved Successfully!!!


#### For Prediction -
for predictions we are using only these features :
- Temperature
- Humidity
- Soil Type
- Crop type