In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df =pd.read_csv('Crop_recommendation.csv')

In [4]:
df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [5]:
df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [6]:
df['label'].unique()

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [7]:
crop_dict={
    'rice': 1,
    'maize': 2,
    'jute': 3,
    'cotton': 4,
    'coconut': 5,
    'papaya': 6,
    'orange': 7,
    'apple': 8,
    'muskmelon': 9,
    'watermelon': 10,
    'grapes': 11,
    'mango': 12,
    'banana': 13,
    'pomegranate': 14,
    'lentil': 15,
    'blackgram': 16,
    'mungbean': 17,
    'mothbeans': 18,
    'pigeonpeas': 19,
    'kidneybeans': 20,
    'chickpea': 21,
    'coffee': 22
}

df['label'] = df['label'].map(crop_dict)

In [16]:
x = df.drop(columns=['label'])
y=df['label']

In [17]:
# Create Column Transformer with 3 types of transformers
num_columns = x.select_dtypes(exclude="object").columns
cat_columns= x.select_dtypes(include="object").columns

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

num_transformer = Pipeline(steps=[("scaler", StandardScaler())])
cat_transformer = Pipeline(steps=[("onehot", OneHotEncoder())])

preprocessor = ColumnTransformer(
    [
        ("cat", cat_transformer, cat_columns),
         ("num", num_transformer, num_columns),        
    ]
)

In [18]:
x =preprocessor.fit_transform(x)

In [11]:
x.shape

(2200, 7)

In [12]:
# separate dataset into train and test
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=42)
x_train.shape, x_test.shape

((1760, 7), (440, 7))

In [13]:
models = {
    'LogisticRegression': LogisticRegression(),
    'GaussianNB':GaussianNB(),
    'SVC':SVC(),
    'KNeighborsClassifier':KNeighborsClassifier(),
    'DecisionTreeClassifier':DecisionTreeClassifier(),
    'ExtraTreeClassifier':ExtraTreeClassifier(),
    'RandomForestClassifier':RandomForestClassifier(),
    'BaggingClassifier':BaggingClassifier(),
    'GradientBoostingClassifier':GradientBoostingClassifier(),
    'AdaBoostClassifier':AdaBoostClassifier()
}

In [14]:
report ={}

In [23]:

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(x_train, y_train) # Train model

    # Make predictions
    y_train_pred = model.predict(x_train)
    y_test_pred = model.predict(x_test)
    
    score = accuracy_score(y_test, y_test_pred)
    print(f"{model} model with accuracy: {score}")
    report[list(models.keys())[i]]=score

LogisticRegression() model with accuracy: 0.9636363636363636
GaussianNB() model with accuracy: 0.9954545454545455
SVC() model with accuracy: 0.9681818181818181
KNeighborsClassifier() model with accuracy: 0.9590909090909091
DecisionTreeClassifier() model with accuracy: 0.9886363636363636
ExtraTreeClassifier() model with accuracy: 0.9227272727272727
RandomForestClassifier() model with accuracy: 0.9931818181818182
BaggingClassifier() model with accuracy: 0.9886363636363636
GradientBoostingClassifier() model with accuracy: 0.9818181818181818




AdaBoostClassifier() model with accuracy: 0.1409090909090909


In [24]:
report

{'LogisticRegression': 0.9636363636363636,
 'GaussianNB': 0.9954545454545455,
 'SVC': 0.9681818181818181,
 'KNeighborsClassifier': 0.9590909090909091,
 'DecisionTreeClassifier': 0.9886363636363636,
 'ExtraTreeClassifier': 0.9227272727272727,
 'RandomForestClassifier': 0.9931818181818182,
 'BaggingClassifier': 0.9886363636363636,
 'GradientBoostingClassifier': 0.9818181818181818,
 'AdaBoostClassifier': 0.1409090909090909}

In [25]:
#Extracting the highest score from our model

best_Model_score = max(sorted(report.values()))

In [26]:
best_Model_score

0.9954545454545455

In [27]:
best_model_name= list(report)[list(report.values()).index(best_Model_score)]

In [28]:
best_model_name

'GaussianNB'

In [29]:
best_model=models[best_model_name]

In [30]:
best_model.fit(x_train,y_train)

In [31]:
y_predict= best_model.predict(x_test)

In [32]:
accuracy_score(y_test, y_predict)

0.9954545454545455

In [49]:
crop_dict1_reversed = {
    1: 'rice',
    2: 'maize',
    3: 'jute',
    4: 'cotton',
    5: 'coconut',
    6: 'papaya',
    7: 'orange',
    8: 'apple',
    9: 'muskmelon',
    10: 'watermelon',
    11: 'grapes',
    12: 'mango',
    13: 'banana',
    14: 'pomegranate',
    15: 'lentil',
    16: 'blackgram',
    17: 'mungbean',
    18: 'mothbeans',
    19: 'pigeonpeas',
    20: 'kidneybeans',
    21: 'chickpea',
    22: 'coffee'
}


In [50]:
crop_dict1_reversed

{1: 'rice',
 2: 'maize',
 3: 'jute',
 4: 'cotton',
 5: 'coconut',
 6: 'papaya',
 7: 'orange',
 8: 'apple',
 9: 'muskmelon',
 10: 'watermelon',
 11: 'grapes',
 12: 'mango',
 13: 'banana',
 14: 'pomegranate',
 15: 'lentil',
 16: 'blackgram',
 17: 'mungbean',
 18: 'mothbeans',
 19: 'pigeonpeas',
 20: 'kidneybeans',
 21: 'chickpea',
 22: 'coffee'}

In [51]:
def recommendation(N,P,K,temperature,humidity,ph,rainfall):
    features = np.array([[N,P,K,temperature,humidity,ph,rainfall]])
    prediction = best_model.predict(features)
    if prediction[0] in crop_dict1_reversed:
        return crop_dict1_reversed[prediction[0]]

In [52]:
crop_dict1_reversed = {
    1: 'rice',
    2: 'maize',
    3: 'jute',
    4: 'cotton',
    5: 'coconut',
    6: 'papaya',
    7: 'orange',
    8: 'apple',
    9: 'muskmelon',
    10: 'watermelon',
    11: 'grapes',
    12: 'mango',
    13: 'banana',
    14: 'pomegranate',
    15: 'lentil',
    16: 'blackgram',
    17: 'mungbean',
    18: 'mothbeans',
    19: 'pigeonpeas',
    20: 'kidneybeans',
    21: 'chickpea',
    22: 'coffee'
}


In [53]:
recommendation(43,56,87,30,83,6,203)

'coffee'