In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification, load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.metrics import roc_auc_score, roc_curve
from scipy.stats import randint, uniform
import warnings
warnings.filterwarnings('ignore')

class ModelEvaluationTuning:
    def __init__(self, X, y, test_size=0.2, random_state=42):
        """
        Initialize the model evaluation and tuning system

        Parameters:
        X: Feature matrix
        y: Target variable
        test_size: Size of test set
        random_state: Random state for reproducibility
        """
        self.X = X
        self.y = y
        self.random_state = random_state

        # Split the data
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            X, y, test_size=test_size, random_state=random_state, stratify=y
        )

        # Scale the features
        self.scaler = StandardScaler()
        self.X_train_scaled = self.scaler.fit_transform(self.X_train)
        self.X_test_scaled = self.scaler.transform(self.X_test)

        # Initialize models
        self.models = {
            'Logistic Regression': LogisticRegression(random_state=random_state),
            'Random Forest': RandomForestClassifier(random_state=random_state),
            'SVM': SVC(random_state=random_state),
            'K-Nearest Neighbors': KNeighborsClassifier(),
            'Gradient Boosting': GradientBoostingClassifier(random_state=random_state)
        }

        # Store results
        self.results = {}
        self.best_models = {}

    def evaluate_baseline_models(self):
        """
        Evaluate baseline models without hyperparameter tuning
        """
        print("=== Baseline Model Evaluation ===")
        baseline_results = {}

        for name, model in self.models.items():
            # Use scaled data for models that benefit from scaling
            if name in ['Logistic Regression', 'SVM', 'K-Nearest Neighbors']:
                X_train, X_test = self.X_train_scaled, self.X_test_scaled
            else:
                X_train, X_test = self.X_train, self.X_test

            # Train model
            model.fit(X_train, self.y_train)

            # Make predictions
            y_pred = model.predict(X_test)
            y_pred_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None

            # Calculate metrics
            metrics = {
                'accuracy': accuracy_score(self.y_test, y_pred),
                'precision': precision_score(self.y_test, y_pred, average='weighted'),
                'recall': recall_score(self.y_test, y_pred, average='weighted'),
                'f1': f1_score(self.y_test, y_pred, average='weighted')
            }

            if y_pred_proba is not None:
                metrics['roc_auc'] = roc_auc_score(self.y_test, y_pred_proba)

            # Cross-validation score
            cv_scores = cross_val_score(model, X_train, self.y_train, cv=5, scoring='accuracy')
            metrics['cv_mean'] = cv_scores.mean()
            metrics['cv_std'] = cv_scores.std()

            baseline_results[name] = metrics

            print(f"\n{name}:")
            print(f"  Accuracy: {metrics['accuracy']:.4f}")
            print(f"  Precision: {metrics['precision']:.4f}")
            print(f"  Recall: {metrics['recall']:.4f}")
            print(f"  F1-Score: {metrics['f1']:.4f}")
            if 'roc_auc' in metrics:
                print(f"  ROC AUC: {metrics['roc_auc']:.4f}")
            print(f"  CV Score: {metrics['cv_mean']:.4f} (+/- {metrics['cv_std']*2:.4f})")

        self.results['baseline'] = baseline_results
        return baseline_results

    def hyperparameter_tuning_grid_search(self):
        """
        Perform hyperparameter tuning using GridSearchCV
        """
        print("\n=== Grid Search Hyperparameter Tuning ===")

        # Define parameter grids for each model
        param_grids = {
            'Logistic Regression': {
                'C': [0.1, 1, 10, 100],
                'solver': ['liblinear', 'lbfgs'],
                'max_iter': [1000]
            },
            'Random Forest': {
                'n_estimators': [50, 100, 200],
                'max_depth': [None, 10, 20, 30],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4]
            },
            'SVM': {
                'C': [0.1, 1, 10, 100],
                'kernel': ['rbf', 'linear'],
                'gamma': ['scale', 'auto']
            },
            'K-Nearest Neighbors': {
                'n_neighbors': [3, 5, 7, 9, 11],
                'weights': ['uniform', 'distance'],
                'metric': ['euclidean', 'manhattan']
            },
            'Gradient Boosting': {
                'n_estimators': [50, 100, 200],
                'learning_rate': [0.01, 0.1, 0.2],
                'max_depth': [3, 5, 7]
            }
        }

        grid_results = {}

        for name, model in self.models.items():
            print(f"\nTuning {name}...")

            # Use appropriate data
            if name in ['Logistic Regression', 'SVM', 'K-Nearest Neighbors']:
                X_train, X_test = self.X_train_scaled, self.X_test_scaled
            else:
                X_train, X_test = self.X_train, self.X_test

            # Grid search
            grid_search = GridSearchCV(
                model,
                param_grids[name],
                cv=5,
                scoring='accuracy',
                n_jobs=-1,
                verbose=0
            )

            grid_search.fit(X_train, self.y_train)

            # Best model evaluation
            best_model = grid_search.best_estimator_
            y_pred = best_model.predict(X_test)
            y_pred_proba = best_model.predict_proba(X_test)[:, 1] if hasattr(best_model, 'predict_proba') else None

            # Calculate metrics
            metrics = {
                'accuracy': accuracy_score(self.y_test, y_pred),
                'precision': precision_score(self.y_test, y_pred, average='weighted'),
                'recall': recall_score(self.y_test, y_pred, average='weighted'),
                'f1': f1_score(self.y_test, y_pred, average='weighted'),
                'best_params': grid_search.best_params_,
                'best_cv_score': grid_search.best_score_
            }

            if y_pred_proba is not None:
                metrics['roc_auc'] = roc_auc_score(self.y_test, y_pred_proba)

            grid_results[name] = metrics
            self.best_models[f'{name}_grid'] = best_model

            print(f"  Best parameters: {grid_search.best_params_}")
            print(f"  Best CV score: {grid_search.best_score_:.4f}")
            print(f"  Test accuracy: {metrics['accuracy']:.4f}")

        self.results['grid_search'] = grid_results
        return grid_results

    def hyperparameter_tuning_random_search(self):
        """
        Perform hyperparameter tuning using RandomizedSearchCV
        """
        print("\n=== Randomized Search Hyperparameter Tuning ===")

        # Define parameter distributions for randomized search
        param_distributions = {
            'Logistic Regression': {
                'C': uniform(0.1, 100),
                'solver': ['liblinear', 'lbfgs'],
                'max_iter': [1000]
            },
            'Random Forest': {
                'n_estimators': randint(50, 300),
                'max_depth': [None] + list(randint(10, 50).rvs(5)),
                'min_samples_split': randint(2, 20),
                'min_samples_leaf': randint(1, 10)
            },
            'SVM': {
                'C': uniform(0.1, 100),
                'kernel': ['rbf', 'linear'],
                'gamma': ['scale', 'auto']
            },
            'K-Nearest Neighbors': {
                'n_neighbors': randint(3, 15),
                'weights': ['uniform', 'distance'],
                'metric': ['euclidean', 'manhattan', 'minkowski']
            },
            'Gradient Boosting': {
                'n_estimators': randint(50, 300),
                'learning_rate': uniform(0.01, 0.3),
                'max_depth': randint(3, 10)
            }
        }

        random_results = {}

        for name, model in self.models.items():
            print(f"\nTuning {name}...")

            # Use appropriate data
            if name in ['Logistic Regression', 'SVM', 'K-Nearest Neighbors']:
                X_train, X_test = self.X_train_scaled, self.X_test_scaled
            else:
                X_train, X_test = self.X_train, self.X_test

            # Random search
            random_search = RandomizedSearchCV(
                model,
                param_distributions[name],
                n_iter=50,
                cv=5,
                scoring='accuracy',
                n_jobs=-1,
                random_state=self.random_state,
                verbose=0
            )

            random_search.fit(X_train, self.y_train)

            # Best model evaluation
            best_model = random_search.best_estimator_
            y_pred = best_model.predict(X_test)
            y_pred_proba = best_model.predict_proba(X_test)[:, 1] if hasattr(best_model, 'predict_proba') else None

            # Calculate metrics
            metrics = {
                'accuracy': accuracy_score(self.y_test, y_pred),
                'precision': precision_score(self.y_test, y_pred, average='weighted'),
                'recall': recall_score(self.y_test, y_pred, average='weighted'),
                'f1': f1_score(self.y_test, y_pred, average='weighted'),
                'best_params': random_search.best_params_,
                'best_cv_score': random_search.best_score_
            }

            if y_pred_proba is not None:
                metrics['roc_auc'] = roc_auc_score(self.y_test, y_pred_proba)

            random_results[name] = metrics
            self.best_models[f'{name}_random'] = best_model

            print(f"  Best parameters: {random_search.best_params_}")
            print(f"  Best CV score: {random_search.best_score_:.4f}")
            print(f"  Test accuracy: {metrics['accuracy']:.4f}")

        self.results['random_search'] = random_results
        return random_results

    def compare_results(self):
        """
        Compare results from different tuning methods
        """
        print("\n=== Model Comparison ===")

        # Create comparison dataframe
        comparison_data = []

        for method in ['baseline', 'grid_search', 'random_search']:
            if method in self.results:
                for model_name, metrics in self.results[method].items():
                    row = {
                        'Model': model_name,
                        'Method': method,
                        'Accuracy': metrics['accuracy'],
                        'Precision': metrics['precision'],
                        'Recall': metrics['recall'],
                        'F1-Score': metrics['f1']
                    }
                    if 'roc_auc' in metrics:
                        row['ROC AUC'] = metrics['roc_auc']
                    comparison_data.append(row)

        comparison_df = pd.DataFrame(comparison_data)

        # Find best model overall
        best_model_idx = comparison_df['F1-Score'].idxmax()
        best_model_info = comparison_df.loc[best_model_idx]

        print(f"\nBest Overall Model: {best_model_info['Model']} ({best_model_info['Method']})")
        print(f"F1-Score: {best_model_info['F1-Score']:.4f}")
        print(f"Accuracy: {best_model_info['Accuracy']:.4f}")

        return comparison_df, best_model_info

    def plot_results(self):
        """
        Create visualizations of the results
        """
        # Create comparison dataframe
        comparison_data = []
        for method in ['baseline', 'grid_search', 'random_search']:
            if method in self.results:
                for model_name, metrics in self.results[method].items():
                    comparison_data.append({
                        'Model': model_name,
                        'Method': method,
                        'Accuracy': metrics['accuracy'],
                        'Precision': metrics['precision'],
                        'Recall': metrics['recall'],
                        'F1-Score': metrics['f1']
                    })

        df = pd.DataFrame(comparison_data)

        # Create subplots
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        fig.suptitle('Model Performance Comparison', fontsize=16)

        # Accuracy comparison
        pivot_acc = df.pivot(index='Model', columns='Method', values='Accuracy')
        pivot_acc.plot(kind='bar', ax=axes[0, 0], title='Accuracy Comparison')
        axes[0, 0].set_ylabel('Accuracy')
        axes[0, 0].legend()
        axes[0, 0].tick_params(axis='x', rotation=45)

        # Precision comparison
        pivot_prec = df.pivot(index='Model', columns='Method', values='Precision')
        pivot_prec.plot(kind='bar', ax=axes[0, 1], title='Precision Comparison')
        axes[0, 1].set_ylabel('Precision')
        axes[0, 1].legend()
        axes[0, 1].tick_params(axis='x', rotation=45)

        # Recall comparison
        pivot_rec = df.pivot(index='Model', columns='Method', values='Recall')
        pivot_rec.plot(kind='bar', ax=axes[1, 0], title='Recall Comparison')
        axes[1, 0].set_ylabel('Recall')
        axes[1, 0].legend()
        axes[1, 0].tick_params(axis='x', rotation=45)

        # F1-Score comparison
        pivot_f1 = df.pivot(index='Model', columns='Method', values='F1-Score')
        pivot_f1.plot(kind='bar', ax=axes[1, 1], title='F1-Score Comparison')
        axes[1, 1].set_ylabel('F1-Score')
        axes[1, 1].legend()
        axes[1, 1].tick_params(axis='x', rotation=45)

        plt.tight_layout()
        plt.show()

        # Heatmap of best results
        best_results = df.loc[df.groupby('Model')['F1-Score'].idxmax()]
        metrics_for_heatmap = best_results.set_index('Model')[['Accuracy', 'Precision', 'Recall', 'F1-Score']]

        plt.figure(figsize=(10, 6))
        sns.heatmap(metrics_for_heatmap.T, annot=True, cmap='YlOrRd', fmt='.3f')
        plt.title('Best Model Performance Heatmap')
        plt.ylabel('Metrics')
        plt.xlabel('Models')
        plt.tight_layout()
        plt.show()

    def detailed_analysis(self, model_name):
        """
        Perform detailed analysis of the best model
        """
        # Find the best version of the specified model
        best_method = None
        best_score = 0

        for method in ['baseline', 'grid_search', 'random_search']:
            if method in self.results and model_name in self.results[method]:
                if self.results[method][model_name]['f1'] > best_score:
                    best_score = self.results[method][model_name]['f1']
                    best_method = method

        if best_method is None:
            print(f"Model {model_name} not found in results")
            return

        print(f"\n=== Detailed Analysis: {model_name} ({best_method}) ===")

        # Get the best model
        if best_method == 'baseline':
            model = self.models[model_name]
            # Retrain with appropriate data
            if model_name in ['Logistic Regression', 'SVM', 'K-Nearest Neighbors']:
                model.fit(self.X_train_scaled, self.y_train)
                y_pred = model.predict(self.X_test_scaled)
            else:
                model.fit(self.X_train, self.y_train)
                y_pred = model.predict(self.X_test)
        else:
            model = self.best_models[f'{model_name}_{best_method.split("_")[0]}']
            if model_name in ['Logistic Regression', 'SVM', 'K-Nearest Neighbors']:
                y_pred = model.predict(self.X_test_scaled)
            else:
                y_pred = model.predict(self.X_test)

        # Classification report
        print("\nClassification Report:")
        print(classification_report(self.y_test, y_pred))

        # Confusion matrix
        cm = confusion_matrix(self.y_test, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title(f'Confusion Matrix - {model_name}')
        plt.ylabel('Actual')
        plt.xlabel('Predicted')
        plt.show()

        # ROC Curve (if applicable)
        if hasattr(model, 'predict_proba'):
            if model_name in ['Logistic Regression', 'SVM', 'K-Nearest Neighbors']:
                y_pred_proba = model.predict_proba(self.X_test_scaled)[:, 1]
            else:
                y_pred_proba = model.predict_proba(self.X_test)[:, 1]

            fpr, tpr, _ = roc_curve(self.y_test, y_pred_proba)
            auc = roc_auc_score(self.y_test, y_pred_proba)

            plt.figure(figsize=(8, 6))
            plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.3f})')
            plt.plot([0, 1], [0, 1], 'k--', label='Random')
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title(f'ROC Curve - {model_name}')
            plt.legend()
            plt.show()

# Example usage
def main():
    # Load sample data (you can replace this with your own dataset)
    print("Loading sample dataset...")
    data = load_breast_cancer()
    X, y = data.data, data.target

    # Alternative: Create synthetic dataset
    # X, y = make_classification(n_samples=1000, n_features=20, n_informative=10,
    #                            n_redundant=10, n_classes=2, random_state=42)

    # Initialize the evaluation system
    evaluator = ModelEvaluationTuning(X, y)

    # Run baseline evaluation
    evaluator.evaluate_baseline_models()

    # Run grid search tuning
    evaluator.hyperparameter_tuning_grid_search()

    # Run randomized search tuning
    evaluator.hyperparameter_tuning_random_search()

    # Compare results
    comparison_df, best_model_info = evaluator.compare_results()

    # Plot results
    evaluator.plot_results()

    # Detailed analysis of the best model
    evaluator.detailed_analysis(best_model_info['Model'])

    return evaluator

if __name__ == "__main__":
    evaluator = main()

Loading sample dataset...
=== Baseline Model Evaluation ===

Logistic Regression:
  Accuracy: 0.9825
  Precision: 0.9825
  Recall: 0.9825
  F1-Score: 0.9825
  ROC AUC: 0.9954
  CV Score: 0.9802 (+/- 0.0256)

Random Forest:
  Accuracy: 0.9561
  Precision: 0.9561
  Recall: 0.9561
  F1-Score: 0.9560
  ROC AUC: 0.9937
  CV Score: 0.9538 (+/- 0.0469)

SVM:
  Accuracy: 0.9825
  Precision: 0.9825
  Recall: 0.9825
  F1-Score: 0.9825
  CV Score: 0.9714 (+/- 0.0357)

K-Nearest Neighbors:
  Accuracy: 0.9561
  Precision: 0.9561
  Recall: 0.9561
  F1-Score: 0.9560
  ROC AUC: 0.9788
  CV Score: 0.9670 (+/- 0.0417)

Gradient Boosting:
  Accuracy: 0.9561
  Precision: 0.9569
  Recall: 0.9561
  F1-Score: 0.9558
  ROC AUC: 0.9907
  CV Score: 0.9560 (+/- 0.0278)

=== Grid Search Hyperparameter Tuning ===

Tuning Logistic Regression...
  Best parameters: {'C': 0.1, 'max_iter': 1000, 'solver': 'lbfgs'}
  Best CV score: 0.9802
  Test accuracy: 0.9737

Tuning Random Forest...
