In [228]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import joblib
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

In [229]:
crop = pd.read_csv("Crop_recommendation.csv")
crop.dropna(inplace=True)

In [230]:
X = crop.drop(['label'], axis=1)
y = crop['label']


In [231]:
le = LabelEncoder()
y = le.fit_transform(y)

In [232]:
crop['label'].value_counts()

label
rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: count, dtype: int64

In [233]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [234]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [235]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [236]:
rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)


In [237]:
print("Best parameters:", grid_search.best_params_)

Best parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}


In [238]:
best_rf = grid_search.best_estimator_
y_pred = best_rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9931818181818182


In [239]:
# Generate classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        20
           3       1.00      1.00      1.00        26
           4       1.00      1.00      1.00        27
           5       1.00      1.00      1.00        17
           6       1.00      1.00      1.00        17
           7       1.00      1.00      1.00        14
           8       0.92      1.00      0.96        23
           9       1.00      1.00      1.00        20
          10       0.92      1.00      0.96        11
          11       1.00      1.00      1.00        21
          12       1.00      1.00      1.00        19
          13       1.00      0.96      0.98        24
          14       1.00      1.00      1.00        19
          15       1.00      1.00      1.00        17
          16       1.00      1.00      1.00        14
   

In [240]:
# conf_matrix = confusion_matrix(y_test, y_pred)
# plt.figure(figsize=(10, 8))
# plt.imshow(conf_matrix, cmap='Blues', interpolation='nearest')
# plt.title('Confusion Matrix')
# plt.colorbar()
# plt.xticks(np.arange(len(le.classes_)), le.classes_, rotation=90)
# plt.yticks(np.arange(len(le.classes_)), le.classes_)
# plt.xlabel('Predicted Label')
# plt.ylabel('True Label')
# for i in range(len(le.classes_)):
#     for j in range(len(le.classes_)):
#         plt.text(j, i, str(conf_matrix[i, j]), ha='center', va='center', color='white')
# plt.show()

In [241]:
joblib.dump(best_rf, 'crop_recommendation_model.pkl')
joblib.dump(scaler,'scaler.pkl')

['scaler.pkl']

In [242]:
def get_crop_recommendation(N, P, K, temperature, humidity, ph, rainfall, model):
    input_data = np.array([[N, P, K, temperature, humidity, ph, rainfall]])
    input_data_scaled = scaler.transform(input_data)
    print("Input data scaled:", input_data_scaled)
    predicted_label = model.predict(input_data_scaled)
    print("Predicted label:", predicted_label)
    crop_label = le.inverse_transform(predicted_label)[0]
    print("Crop label:", crop_label)
    return crop_label



In [243]:
recommended_crop = get_crop_recommendation(60,54,19,18.74826712,62.49878458,6.417820493,70.23401597, best_rf)
print("Recommended Crop:", recommended_crop)

Input data scaled: [[ 0.2497802   0.01777133 -0.57058952 -1.34247956 -0.41154123 -0.0751648
  -0.59026894]]
Predicted label: [11]
Crop label: maize
Recommended Crop: maize




In [244]:
def predict_specific():
    N = 60
    P = 54
    K = 19
    temp = 18
    humidity = 62
    ph = 6
    rainfall = 70
    recommended_crop = get_crop_recommendation(N, P, K, temp, humidity, ph, rainfall, best_rf)

print(recommended_crop)

maize


In [245]:
predicted_labels = best_rf.predict(X_test)
predicted_crops = le.inverse_transform(predicted_labels)
predictions_df = pd.DataFrame({'Predicted Label': predicted_labels, 'Crop Name': predicted_crops})
predictions_df_sorted = predictions_df.sort_values(by='Predicted Label')
predictions_dict = {}
for index, row in predictions_df_sorted.iterrows():
    if row['Predicted Label'] not in predictions_dict:
        predictions_dict[row['Predicted Label']] = row['Crop Name']
print(predictions_dict)


{0: 'apple', 1: 'banana', 2: 'blackgram', 3: 'chickpea', 4: 'coconut', 5: 'coffee', 6: 'cotton', 7: 'grapes', 8: 'jute', 9: 'kidneybeans', 10: 'lentil', 11: 'maize', 12: 'mango', 13: 'mothbeans', 14: 'mungbean', 15: 'muskmelon', 16: 'orange', 17: 'papaya', 18: 'pigeonpeas', 19: 'pomegranate', 20: 'rice', 21: 'watermelon'}
