In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
import joblib

# --- 1. Load Data ---
try:
    df = pd.read_csv('Crop_recommendation.csv')
except FileNotFoundError:
    print("Error: 'Crop_recommendation.csv' not found.")
    # You might want to exit or raise an error here
    exit()

# --- 2. Separate Features and Target ---
X = df.drop('label', axis=1) # Features (N, P, K, etc.)
y = df['label'] # Target (Crop Name)

# --- 3. Preprocessing: Label Encoding ---
# Convert crop names (strings) into numerical labels (0, 1, 2, ...)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Store the classes for future decoding
crop_classes = label_encoder.classes_
print(f"Encoded Crop Classes: {crop_classes}")


# --- 4. Split Data ---
# Split the data into 80% for training and 20% for testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# --- 5. Preprocessing: Feature Scaling ---
# Scale the numerical features so they all have a mean of 0 and a std dev of 1
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# --- 6. Model Training: Random Forest Classifier ---
print("\nTraining the Random Forest model...")
model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_rf.fit(X_train_scaled, y_train)
print("Random Forest training complete.")

# --- 7. Model Evaluation: Random Forest ---
y_pred_rf = model_rf.predict(X_test_scaled)
accuracy_rf = accuracy_score(y_test, y_pred_rf)

print(f"\n--- Random Forest Model Evaluation ---")
print(f"Accuracy: {accuracy_rf * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred_rf, target_names=crop_classes))


# --- 8. Model Training: Support Vector Machine (SVM) ---
print("\nTraining the SVM model...")
model_svm = SVC(kernel='rbf', random_state=42)
model_svm.fit(X_train_scaled, y_train)
print("SVM training complete.")

# --- 9. Model Evaluation: SVM ---
y_pred_svm = model_svm.predict(X_test_scaled)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

print(f"\n--- SVM Model Evaluation ---")
print(f"Accuracy: {accuracy_svm * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred_svm, target_names=crop_classes))


# --- 10. Model Training: Gradient Boosting Classifier ---
print("\nTraining the Gradient Boosting model...")
model_gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
model_gb.fit(X_train_scaled, y_train)
print("Gradient Boosting training complete.")

# --- 11. Model Evaluation: Gradient Boosting ---
y_pred_gb = model_gb.predict(X_test_scaled)
accuracy_gb = accuracy_score(y_test, y_pred_gb)

print(f"\n--- Gradient Boosting Model Evaluation ---")
print(f"Accuracy: {accuracy_gb * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred_gb, target_names=crop_classes))


# --- 12. Print All Models Accuracy ---
print("\n" + "="*70)
print("ALL MODELS ACCURACY")
print("="*70)
print(f"1. Random Forest Accuracy:      {accuracy_rf * 100:.2f}%")
print(f"2. SVM Accuracy:                {accuracy_svm * 100:.2f}%")
print(f"3. Gradient Boosting Accuracy:  {accuracy_gb * 100:.2f}%")
print("="*70)

# --- 13. Determine and Print Best Model ---
accuracies = {
    'Random Forest': accuracy_rf,
    'SVM': accuracy_svm,
    'Gradient Boosting': accuracy_gb
}
best_model_name = max(accuracies, key=accuracies.get)

print("\n" + "="*70)
print("BEST MODEL")
print("="*70)
print(f"Model Name: {best_model_name}")
print(f"Accuracy:   {accuracies[best_model_name] * 100:.2f}%")
print("="*70)


# --- 14. Save Best Model and Preprocessors for Deployment ---
# Save the best performing model
if best_model_name == 'Random Forest':
    best_model = model_rf
elif best_model_name == 'SVM':
    best_model = model_svm
else:
    best_model = model_gb

joblib.dump(best_model, 'random_forest_crop_model.joblib')
joblib.dump(scaler, 'scaler.joblib')
joblib.dump(label_encoder, 'label_encoder.joblib')

print(f"\n✓ Best Model ({best_model_name}) and Preprocessors saved successfully!")

# --- 15. Example Prediction Function ---
def recommend_crop(N, P, K, temp, hum, ph, rain):
    """
    Predicts the best crop given environmental parameters.
    """
    # Create a DataFrame from the input features
    input_data = pd.DataFrame([[N, P, K, temp, hum, ph, rain]], 
                              columns=X.columns)

    # Scale the input data using the trained scaler
    input_scaled = scaler.transform(input_data)

    # Make the prediction using the best model
    prediction_encoded = best_model.predict(input_scaled)

    # Decode the prediction back to the crop name
    recommended_crop = label_encoder.inverse_transform(prediction_encoded)[0]

    return recommended_crop

# Example Usage:
# Let's use the parameters for Rice (from the dataset's head)
# N=90, P=42, K=43, temp=20.88, hum=82.00, ph=6.50, rain=202.94
sample_N, sample_P, sample_K, sample_temp, sample_hum, sample_ph, sample_rain = 90, 42, 43, 20.88, 82.00, 6.50, 202.94

recommended = recommend_crop(sample_N, sample_P, sample_K, 
                             sample_temp, sample_hum, sample_ph, sample_rain)

print(f"\n--- Example Recommendation ---")
print(f"Input: N={sample_N}, P={sample_P}, K={sample_K}, Temp={sample_temp}, Hum={sample_hum}, pH={sample_ph}, Rain={sample_rain}")
print(f"Recommended Crop: {recommended}")


Encoded Crop Classes: ['apple' 'banana' 'blackgram' 'chickpea' 'coconut' 'coffee' 'cotton'
 'grapes' 'jute' 'kidneybeans' 'lentil' 'maize' 'mango' 'mothbeans'
 'mungbean' 'muskmelon' 'orange' 'papaya' 'pigeonpeas' 'pomegranate'
 'rice' 'watermelon']

Training the Random Forest model...
Random Forest training complete.

--- Random Forest Model Evaluation ---
Accuracy: 99.55%
Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       1.00      1.00      1.00        20
   blackgram       1.00      0.95      0.97        20
    chickpea       1.00      1.00      1.00        20
     coconut       1.00      1.00      1.00        20
      coffee       1.00      1.00      1.00        20
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        20
        jute       0.95      1.00      0.98        20
 kidneybeans       1.00      1.00      1.00        20
     