In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('Crop_recommendation.csv')

In [3]:
df.sample(7)

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
1322,120,19,49,25.794489,84.268307,6.762472,56.452292,watermelon
1669,39,9,15,25.354676,91.811832,7.992042,116.755594,orange
403,27,57,24,27.335349,43.35796,6.091863,142.330368,pigeonpeas
187,78,37,22,25.342171,63.31802,6.330554,74.52082,maize
1344,103,16,49,24.067315,81.640753,6.915717,51.752124,watermelon
2111,85,33,25,26.208114,52.50988,6.910824,189.094482,coffee
1317,109,21,55,24.90046,89.735242,6.770278,57.449421,watermelon


In [4]:
df['label'].unique()

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB


In [6]:
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

In [8]:
x_train, x_test, y_train, y_test = train_test_split(df.drop(columns='label'), df['label'], test_size=0.2)

In [9]:
x_test

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
1232,32,139,198,35.893075,82.668507,6.358187,66.539466
1869,20,28,26,26.379785,91.498830,5.547595,167.047100
653,21,44,18,27.069100,86.899341,7.128511,50.467461
1350,97,25,50,26.220060,80.901270,6.093815,49.085539
28,60,49,44,20.775761,84.497744,6.244841,240.081065
...,...,...,...,...,...,...,...
1577,29,132,204,23.089507,90.225073,6.096753,108.216660
515,29,57,20,25.609734,50.733007,5.877075,53.392495
1261,22,133,201,23.819957,80.122116,6.002996,67.273986
849,24,70,16,25.178853,68.933073,6.548035,35.034848


In [10]:
le = LabelEncoder()
y_train1 = le.fit_transform(y_train)
y_test1 = le.fit_transform(y_test)

In [11]:
from sklearn.metrics import accuracy_score, r2_score

In [12]:
classes = np.unique(y_test1)
classes

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21])

In [13]:
sgd = SGDClassifier()

In [14]:
sgd.fit(x_train, y_train1)

In [15]:
pred1 = sgd.predict(x_test)

In [16]:
accuracy_score(y_test1, pred1)

0.7840909090909091

In [17]:
sgd.partial_fit(x_test, y_test1, classes=classes)

In [18]:
pred2 = sgd.predict(x_test)

In [19]:
accuracy_score(y_test1, pred2)

0.8431818181818181

In [20]:
clf = SGDClassifier(loss='log_loss')
clf.fit(x_train, y_train)

probs = clf.predict_proba([np.array([120,	40,	33,	24.238506,	54.303296,	6.734105,	115.156401])])
print(probs)

[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]




In [21]:
safe_scores = np.nan_to_num(probs, nan=-np.inf)

top3_indices = np.argsort(safe_scores, axis=1)[:, -3:][:, ::-1]

print("Top 3 class indices for each sample:\n", top3_indices)

Top 3 class indices for each sample:
 [[ 5 21 19]]


In [22]:
class_names = le.classes_

In [23]:
top3_class_names = [[class_names[i] for i in row] for row in top3_indices]
print("Top 3 predicted classes per sample:\n", top3_class_names)

Top 3 predicted classes per sample:
 [['coffee', 'watermelon', 'pomegranate']]


In [24]:
import pickle

In [25]:
pickle.dump(clf, open('../models/model.pkl', 'wb'))
pickle.dump(le, open('../models/label.pkl', 'wb'))

In [26]:
pickle.dump(classes, open('../models/classes.pkl', 'wb'))