In [1]:
import numpy as np
import pandas as pd
import joblib as jb
from sklearn.svm import SVC
from collections import Counter
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [2]:
df=pd.read_csv("new_data.csv")
df.head()

Unnamed: 0,N,P,K,Crop Type,fertilizer
0,90,42,43,rice,urea
1,85,58,41,rice,urea
2,60,55,44,rice,urea
3,74,35,40,rice,urea
4,78,42,42,rice,urea


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   N           2200 non-null   int64 
 1   P           2200 non-null   int64 
 2   K           2200 non-null   int64 
 3   Crop Type   2200 non-null   object
 4   fertilizer  2200 non-null   object
dtypes: int64(3), object(2)
memory usage: 86.1+ KB


In [4]:
df.dropna(inplace=True)

In [5]:
df['fertilizer'].unique()

array(['urea', 'superphosphate', 'nitrogen', '10-10-10', '14-14-14',
       '20:40:20', 'Rhizobium', '8-8-8', '5-10-10', '10-20-20',
       'Hydrated Lime', '5-5-5', 'FYM 20 t/ha', 'FYM', 'P2O5', 'ERP',
       '20-10-10', '40:20:20', 'lime'], dtype=object)

In [6]:
maps = {}
for variable in ['Crop Type','fertilizer']:
    mappings = df[variable].factorize()[1]
    df[variable] = df[variable].factorize()[0]
    maps[variable] = {k: i for i, k in enumerate(mappings)}
df.head()

Unnamed: 0,N,P,K,Crop Type,fertilizer
0,90,42,43,0,0
1,85,58,41,0,0
2,60,55,44,0,0
3,74,35,40,0,0
4,78,42,42,0,0


In [7]:
with open("map_ftlzr.txt", "w") as file:
    file.write(str(maps))

In [8]:
x=df.iloc[:,:-1]
y=df.iloc[:,-1:]

In [9]:
xtrain,xtest,ytrain,ytest=train_test_split(x, y,test_size=0.2)

In [10]:
clf_svc=SVC()
clf_dt=DecisionTreeClassifier()
clf_rf=RandomForestClassifier()

In [11]:
clf_svc.fit(xtrain,ytrain)
clf_dt.fit(xtrain,ytrain)
clf_rf.fit(xtrain,ytrain)

  return f(*args, **kwargs)
  This is separate from the ipykernel package so we can avoid doing imports until


RandomForestClassifier()

In [12]:
ypredict_svc=clf_svc.predict(xtest)
ypredict_dt=clf_dt.predict(xtest)
ypredict_rf=clf_rf.predict(xtest)

In [13]:
print(accuracy_score(ytest,ypredict_svc))
print(accuracy_score(ytest,ypredict_dt))
print(accuracy_score(ytest,ypredict_rf))

0.7477272727272727
1.0
1.0


In [14]:
print(classification_report(ytest,ypredict_svc),"\n\n")
print(classification_report(ytest,ypredict_dt),"\n\n")
print(classification_report(ytest,ypredict_rf),"\n\n")

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        19
           2       0.78      0.95      0.86        73
           3       0.00      0.00      0.00        29
           4       0.67      0.57      0.62        42
           5       0.47      0.53      0.50        15
           6       0.26      0.64      0.37        14
           7       0.94      0.85      0.89        20
           8       1.00      1.00      1.00        20
           9       0.62      0.81      0.70        16
          10       0.53      0.50      0.51        18
          11       0.47      1.00      0.64        16
          12       0.00      0.00      0.00        18
          13       0.50      0.53      0.51        17
          14       1.00      1.00      1.00        18
          15       1.00      0.79      0.88        24
          16       1.00      1.00      1.00        19
          17       0.95    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
print(confusion_matrix(ytest,ypredict_svc),"\n\n")
print(confusion_matrix(ytest,ypredict_dt),"\n\n")
print(confusion_matrix(ytest,ypredict_rf),"\n\n")

[[19  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0 19  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0 69  0  4  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  8  0  5  5 11  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  5  0 24  3 10  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  2  8  5  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  4  0  0  1  9  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 17  0  3  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  2  0  0  0  0  1  0 13  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  9  0  0  9  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 16  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 18  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  8  0  0  9  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0 18  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  5  0  0  0  0  0 19  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0

In [16]:
jb.dump(clf_svc,'ftlzr_svc.pkl')
jb.dump(clf_dt,'ftlzr_dt.pkl')
jb.dump(clf_rf,'ftlzr_rf.pkl')

['ftlzr_rf.pkl']