In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the dataset
data = pd.read_csv('crop_recommendation.csv')

# Check for missing values
print(data.isnull().sum())

# Handle missing values if necessary
# data.fillna(method='ffill', inplace=True)  # Example for filling missing values

# Scale the features
scaler = StandardScaler()
features = data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
scaled_features = scaler.fit_transform(features)

# Encode the target labels
le = LabelEncoder()
labels = le.fit_transform(data['label'])


N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64


In [7]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(scaled_features, labels, test_size=0.2, random_state=42)


In [8]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the KNN model
knn = KNeighborsClassifier(n_neighbors=5)  # You can tune 'n_neighbors' later

# Train the model
knn.fit(X_train, y_train)

# Make predictions
y_pred = knn.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print(classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy: 95.68%
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       0.95      0.95      0.95        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       0.94      1.00      0.97        17
      cotton       0.89      1.00      0.94        17
      grapes       1.00      1.00      1.00        14
        jute       0.81      0.96      0.88        23
 kidneybeans       0.91      1.00      0.95        20
      lentil       0.69      1.00      0.81        11
       maize       1.00      0.90      0.95        21
       mango       0.90      1.00      0.95        19
   mothbeans       1.00      0.83      0.91        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.00      1.00        14
      papa

In [9]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_neighbors': list(range(3, 21)),
    'weights': ['uniform', 'distance']
}

# Grid search for the best parameters
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best parameters
print(f"Best Parameters: {grid_search.best_params_}")

# Train with the best parameters
knn_best = grid_search.best_estimator_
y_pred_best = knn_best.predict(X_test)

# Evaluate the tuned model
accuracy_best = accuracy_score(y_test, y_pred_best)
print(f"Optimized Accuracy: {accuracy_best * 100:.2f}%")
print(classification_report(y_test, y_pred_best, target_names=le.classes_))


Best Parameters: {'n_neighbors': 4, 'weights': 'distance'}
Optimized Accuracy: 97.05%
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      0.95      0.97        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       0.94      1.00      0.97        17
      cotton       0.94      1.00      0.97        17
      grapes       1.00      1.00      1.00        14
        jute       0.85      0.96      0.90        23
 kidneybeans       0.91      1.00      0.95        20
      lentil       0.73      1.00      0.85        11
       maize       1.00      0.95      0.98        21
       mango       0.95      1.00      0.97        19
   mothbeans       1.00      0.88      0.93        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00      

In [10]:
import joblib

# Save the model
joblib.dump(knn_best, 'knn_crop_recommendation_model.pkl')


['knn_crop_recommendation_model.pkl']

In [11]:
import joblib
import numpy as np

# Load the trained model
model = joblib.load('knn_crop_recommendation_model.pkl')

# Define a sample test case (input features)
# Example values: N=90, P=42, K=43, temperature=20.5°C, humidity=80%, ph=6.5, rainfall=200mm
sample_test_case = np.array([[90, 42, 43, 20.5, 80, 6.5, 200]])
# here un above array user can enter the input requirement...

# Scale the features using the same scaler used during training
scaler = StandardScaler()
sample_test_case_scaled = scaler.fit_transform(sample_test_case)

# Predict the crop using the loaded model
predicted_crop_index = model.predict(sample_test_case_scaled)
predicted_crop = le.inverse_transform(predicted_crop_index)

# Output the result
print(f"Predicted Crop: {predicted_crop[0]}")


Predicted Crop: maize
