In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

data = pd.read_csv('Crop_recommendationV2.csv')

# Map Crop Labels to Numbers
crop_dict = {
    'rice': 1, 'maize': 2, 'jute': 3, 'cotton': 4, 'coconut': 5, 'papaya': 6,
    'orange': 7, 'apple': 8, 'muskmelon': 9, 'watermelon': 10, 'grapes': 11,
    'mango': 12, 'banana': 13, 'pomegranate': 14, 'lentil': 15, 'blackgram': 16,
    'mungbean': 17, 'mothbeans': 18, 'pigeonpeas': 19, 'kidneybeans': 20,
    'chickpea': 21, 'coffee': 22
}
data['label'] = data['label'].map(crop_dict)

#Features suggested by Permutation Feature Importance and dropping others.
columns_to_drop = [
    'soil_moisture', 'soil_type', 'sunlight_exposure', 'wind_speed', 'co2_concentration',
    'organic_matter', 'irrigation_frequency', 'crop_density', 'pest_pressure',
    'fertilizer_usage', 'growth_stage', 'urban_area_proximity', 'water_source_type',
    'frost_risk', 'water_usage_efficiency'
]
data_crop = data.drop(columns=columns_to_drop)

# Save and verify new dataset
data_crop.to_csv('data_crop.csv', index=False)
print(data_crop)

# Correlation Heatmap (Excluding Class Label)
sns.heatmap(data_crop.drop(columns=['label']).corr(), annot=True, cbar=True)
plt.show()

# Prepare Data for Training
X = data_crop.drop('label', axis=1)
y = data_crop['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Normalize Features
mx = MinMaxScaler()
X_train = mx.fit_transform(X_train)
X_test = mx.transform(X_test)

# Define Models
models = {
    'LogisticRegression': LogisticRegression(),
    'GaussianNB': GaussianNB(),
    'SVC': SVC(),
    'KNeighborsClassifier': KNeighborsClassifier(),
    'DecisionTreeClassifier': DecisionTreeClassifier(),
    'RandomForestClassifier': RandomForestClassifier()
}

# Evaluate Models
metrics = {
    "accuracy": {},
    "precision": {},
    "recall": {},
    "f1_score": {}
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    metrics["accuracy"][name] = accuracy_score(y_test, y_pred)
    metrics["precision"][name] = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    metrics["recall"][name] = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    metrics["f1_score"][name] = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    print(f"{name} Model Performance:")
    print(f"Accuracy: {metrics['accuracy'][name]:.4f}")
    print(f"Precision: {metrics['precision'][name]:.4f}")
    print(f"Recall: {metrics['recall'][name]:.4f}")
    print(f"F1-Score: {metrics['f1_score'][name]:.4f}")
    print("-" * 30)
    print("Detailed Classification Report:")
    print(classification_report(y_test, y_pred, zero_division=0))
    print("=" * 50)

    # Plot Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=crop_dict.keys(), yticklabels=crop_dict.keys())
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Confusion Matrix for {name}')
    plt.show()

# Plot Accuracy Comparison
plt.figure(figsize=(10, 6))
sns.barplot(x=list(metrics["accuracy"].keys()), y=list(metrics["accuracy"].values()), palette="husl")
plt.xlabel('Model', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.title('Accuracy Comparison of Different Models', fontsize=14)
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.ylim(0, 1)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

# Plot Precision Comparison
plt.figure(figsize=(10, 6))
sns.barplot(x=list(metrics["precision"].keys()), y=list(metrics["precision"].values()), palette="coolwarm")
plt.xlabel('Model', fontsize=12)
plt.ylabel('Precision Score', fontsize=12)
plt.title('Precision Comparison Across Models', fontsize=14)
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.ylim(0, 1)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

# Plot Recall Comparison
plt.figure(figsize=(10, 6))
sns.barplot(x=list(metrics["recall"].keys()), y=list(metrics["recall"].values()), palette="crest")
plt.xlabel('Model', fontsize=12)
plt.ylabel('Recall Score', fontsize=12)
plt.title('Recall Comparison Across Models', fontsize=14)
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.ylim(0, 1)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

# Plot F1-Score Comparison
plt.figure(figsize=(10, 6))
sns.barplot(x=list(metrics["f1_score"].keys()), y=list(metrics["f1_score"].values()), palette="magma")
plt.xlabel('Model', fontsize=12)
plt.ylabel('F1-Score', fontsize=12)
plt.title('F1-Score Comparison Across Models', fontsize=14)
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.ylim(0, 1)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
