In [1]:
import pandas as pd 
import matplotlib.pyplot as plt 
import pickle 
import numpy as np 

from sklearn.model_selection import train_test_split 
import sklearn.metrics as metrics 
from sklearn.linear_model import LogisticRegression 
import seaborn as sns 


In [2]:
PATH = 'Crop_recommendation.csv'

# Load the CSV file data into 
# data variable using pandas 
data = pd.read_csv(PATH) 


In [3]:
# Return the first five rows of CSV file 
data.head()


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [4]:
# Return information about the datatype, 
# NULL type of the columns of CSV file 
data.info() 


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB


In [5]:
# describe 
data.describe() 


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


In [6]:
# Return the number of unique labels 
data['label'].unique()


array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [7]:
# Return the count of each unique label 
data['label'].value_counts()


label
rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: count, dtype: int64

In [20]:
from sklearn.preprocessing import StandardScaler

# Define features and labels
features = data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
labels = data['label']

# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)



In [21]:
from sklearn.model_selection import train_test_split

# Split data
X_train, X_test, Y_train, Y_test = train_test_split(features_scaled, labels, test_size=0.2, random_state=42)



In [22]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

# Initialize and train the model
LogReg = LogisticRegression(random_state=42)
LogReg.fit(X_train, Y_train)

# Predict on test data
predicted_values = LogReg.predict(X_test)

# Evaluate the model
accuracy = metrics.accuracy_score(Y_test, predicted_values)
print("Logistic Regression accuracy: ", accuracy)

# Detailed metrics
print(metrics.classification_report(Y_test, predicted_values))


Logistic Regression accuracy:  0.9636363636363636
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       0.90      0.95      0.93        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       0.94      1.00      0.97        17
      cotton       0.94      1.00      0.97        17
      grapes       1.00      1.00      1.00        14
        jute       0.83      0.87      0.85        23
 kidneybeans       0.95      0.95      0.95        20
      lentil       0.85      1.00      0.92        11
       maize       1.00      0.95      0.98        21
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      0.92      0.96        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.

In [23]:
import pickle

# Save the trained model
filename = 'LogisticRegresion.pkl'
with open(filename, 'wb') as file:
    pickle.dump(LogReg, file)

# Save the scaler as well if needed for future predictions
scaler_filename = 'scaler.pkl'
with open(scaler_filename, 'wb') as file:
    pickle.dump(scaler, file)



In [32]:
# Load the model and scaler
with open('LogisticRegresion.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

with open('scaler.pkl', 'rb') as file:
    loaded_scaler = pickle.load(file)

# New input data
new_data = pd.DataFrame({
    'N': [32],
    'P': [76],
    'K': [15],
    'temperature': [28.05153602],
    'humidity': [63.49802189],
    'ph': [7.604110177],
    'rainfall': [43.35795377]
})

# Scale the new data
new_data_scaled = loaded_scaler.transform(new_data)

# Predict using the loaded model
predicted_label = loaded_model.predict(new_data_scaled)
print("Predicted crop label: ", predicted_label[0])



Predicted crop label:  lentil
