# IEEE Compliant Confusion Matrix Generation

**Project**: AgroIntelligence 2.0  
**Objective**: Generate high-quality, IEEE-style confusion matrix visualizations for a 5-class crop recommendation model.

## Selected Classes
1. Rice
2. Maize
3. Cotton
4. Groundnut
5. Sugarcane

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import warnings

warnings.filterwarnings('ignore')

# Set IEEE standard plotting style
plt.rcParams.update({
    "font.family": "serif",
    "font.serif": ["Times New Roman", "Arial"],
    "font.size": 10,
    "axes.labelsize": 10,
    "axes.titlesize": 10,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "figure.dpi": 600
})


## 1. Data Loading and Preprocessing
Filtering for the top 5 classes and preparing the data.

In [None]:
# Load dataset
df = pd.read_csv('apcrop_dataset_realistic_enhanced.csv')

# Filter for 5 specific classes
target_crops = ['Rice', 'Maize', 'Cotton', 'Groundnut', 'Sugarcane']
df_filtered = df[df['Primary_Crop'].isin(target_crops)].copy()

print(f"Original shape: {df.shape}")
print(f"Filtered shape: {df_filtered.shape}")
print("Class distribution:\n", df_filtered['Primary_Crop'].value_counts())

# Drop non-predictive columns
exclude_cols = ['Year', 'Suitable_Crops', 'Fertilizer_Plan', 'Irrigation_Plan', 
                'Market_Price_Index', 'Previous_Crop']
df_filtered = df_filtered.drop(columns=exclude_cols, errors='ignore')

# Preprocessing
X = df_filtered.drop(columns='Primary_Crop')
y = df_filtered['Primary_Crop']

# Encode categorical variables
categorical_cols = X.select_dtypes(exclude=np.number).columns.tolist()
X = pd.get_dummies(X, columns=categorical_cols, drop_first=True)

# Encode target
le = LabelEncoder()
y_encoded = le.fit_transform(y)
classes = le.classes_

# Split Data
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)


## 2. Model Training
Using Random Forest to ensure >90% accuracy.

In [None]:
model = RandomForestClassifier(
    n_estimators=100,
    max_depth=15,
    random_state=42,
    n_jobs=-1
)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {acc*100:.2f}%")

# Verify accuracy is > 90%
if acc < 0.9:
    print("Warning: Accuracy is below 90%!")
else:
    print("Success: Accuracy is above 90%.")

## 3. Confusion Matrix Generation (IEEE Style)

In [None]:
cm = confusion_matrix(y_test, y_pred)
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100

def plot_ieee_confusion_matrix(matrix, classes, title, cbar_label, link_fmt, filename, cmap='Blues', norm=False):
    # Standard IEEE column width is approx 3.5 inches. We use slightly larger for readability allowing scaling.
    fig, ax = plt.subplots(figsize=(5, 4))
    
    sns.heatmap(
        matrix, 
        annot=True, 
        fmt=link_fmt,
        cmap=cmap, 
        xticklabels=classes, 
        yticklabels=classes,
        cbar_kws={'label': cbar_label},
        square=True,
        linewidths=0.5,
        linecolor='black',
        ax=ax
    )
    
    plt.ylabel('Actual Label', fontweight='bold')
    plt.xlabel('Predicted Label', fontweight='bold')
    # plt.title(title, pad=15) # IEEE usually puts captions below, typically title in plotting is omitted or minimal.
    
    plt.tight_layout()
    plt.savefig(filename, dpi=600, bbox_inches='tight')
    plt.show()
    print(f"Saved {filename}")

# 1. Raw Counts
plot_ieee_confusion_matrix(
    cm, 
    classes, 
    'Confusion Matrix (Sample Counts)', 
    'Sample Count', 
    'd', 
    'ieee_confusion_matrix_counts.png',
    cmap='Blues'
)

# 2. Normalized (Probability/Accuracy)
plot_ieee_confusion_matrix(
    cm_norm, 
    classes, 
    'Confusion Matrix (Normalized Accuracy)', 
    'Accuracy (%)', 
    '.1f', 
    'ieee_confusion_matrix_normalized.png',
    cmap='Oranges'
)