In [7]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from typing import Dict, Any, Tuple
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    classification_report, 
    confusion_matrix, 
    accuracy_score, 
    precision_recall_fscore_support,
    roc_curve, 
    auc
)

class QRCodeClassifier:
    def __init__(self, verbose: bool = True):
        logging.basicConfig(
            level=logging.INFO if verbose else logging.WARNING,
            format='%(asctime)s - %(levelname)s: %(message)s'
        )
        self.logger = logging.getLogger(__name__)
        self.models = {
            'random_forest': Pipeline([
                ('scaler', StandardScaler()),
                ('classifier', RandomForestClassifier(random_state=42))
            ]),
            'svm': Pipeline([
                ('scaler', StandardScaler()),
                ('classifier', SVC(probability=True, random_state=42))
            ])
        }
        self.best_models = {}
        self.evaluation_results = {}

    def train_models(self, X_train: np.ndarray, y_train: np.ndarray) -> None:
        model_params = {
            'random_forest': {
                'classifier__n_estimators': [100, 200, 300, 500],
                'classifier__max_depth': [None, 10, 20, 30],
                'classifier__min_samples_split': [2, 5, 10],
                'classifier__class_weight': [None, 'balanced']
            },
            'svm': {
                'classifier__C': [0.1, 1, 10, 100],
                'classifier__kernel': ['linear', 'rbf', 'poly'],
                'classifier__gamma': ['scale', 'auto'],
                'classifier__class_weight': [None, 'balanced']
            }
        }
        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        for name, model in self.models.items():
            self.logger.info(f"Tuning {name.replace('_', ' ').title()} Model")
            grid_search = GridSearchCV(
                estimator=model,
                param_grid=model_params[name],
                cv=cv,
                scoring='f1_weighted',
                n_jobs=-1
            )
            try:
                grid_search.fit(X_train, y_train)
                self.best_models[name] = grid_search.best_estimator_
                self.logger.info(f"Best {name} Parameters: {grid_search.best_params_}")
            except Exception as e:
                self.logger.error(f"Training failed for {name}: {e}")

    def evaluate_models(self, X_test: np.ndarray, y_test: np.ndarray) -> Dict[str, Dict[str, float]]:
        for name, model in self.best_models.items():
            try:
                y_pred = model.predict(X_test)
                y_pred_proba = model.predict_proba(X_test)
                precision, recall, f1, _ = precision_recall_fscore_support(
                    y_test, y_pred, average='weighted'
                )
                self._plot_roc_curve(y_test, y_pred_proba, name)
                results = {
                    'accuracy': accuracy_score(y_test, y_pred),
                    'precision': precision,
                    'recall': recall,
                    'f1_score': f1
                }
                self._plot_confusion_matrix(y_test, y_pred, name)
                self.evaluation_results[name] = results
                self.logger.info(f"\n{name.replace('_', ' ').title()} Classification Report:\n" + 
                                 classification_report(y_test, y_pred))
            except Exception as e:
                self.logger.error(f"Evaluation failed for {name}: {e}")
        return self.evaluation_results

    def _plot_confusion_matrix(self, y_true: np.ndarray, y_pred: np.ndarray, model_name: str) -> None:
        cm = confusion_matrix(y_true, y_pred)
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                    xticklabels=np.unique(y_true), 
                    yticklabels=np.unique(y_true))
        plt.title(f'{model_name.replace("_", " ").title()} Confusion Matrix')
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')
        plt.tight_layout()
        plt.savefig(f'{model_name}_confusion_matrix.png')
        plt.close()

    def _plot_roc_curve(self, y_true: np.ndarray, y_pred_proba: np.ndarray, model_name: str) -> None:
        plt.figure(figsize=(10, 8))
        for i, label in enumerate(np.unique(y_true)):
            fpr, tpr, _ = roc_curve((y_true == label).astype(int), y_pred_proba[:, i])
            roc_auc = auc(fpr, tpr)
            plt.plot(fpr, tpr, label=f'ROC curve (class {label}, AUC = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'{model_name.replace("_", " ").title()} Receiver Operating Characteristic')
        plt.legend(loc="lower right")
        plt.savefig(f'{model_name}_roc_curve.png')
        plt.close()

def main():
    try:
        from data_preprocessing import QRCodeDataProcessor
        from sklearn.model_selection import train_test_split
        processor = QRCodeDataProcessor('./data')
        X_features, y_labels = processor.feature_extraction()
        X_train, X_test, y_train, y_test = train_test_split(
            X_features, y_labels, 
            test_size=0.2, 
            random_state=42, 
            stratify=y_labels
        )
        classifier = QRCodeClassifier(verbose=True)
        classifier.train_models(X_train, y_train)
        results = classifier.evaluate_models(X_test, y_test)
        for model_name, metrics in results.items():
            print(f"\n{model_name.replace('_', ' ').title()} Model Results:")
            for metric, value in metrics.items():
                print(f"{metric.replace('_', ' ').title()}: {value}")
    except Exception as e:
        print(f"An error occurred in the main workflow: {e}")

if __name__ == "__main__":
    main()



Random Forest Model Results:
Accuracy: 0.975
Precision: 0.9761904761904763
Recall: 0.975
F1 Score: 0.9749843652282676

Svm Model Results:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
