In [16]:
#This packages should be load first
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Optional, List, Dict, Union
from abc import ABC, abstractmethod
import warnings

In [17]:
# ABSTRACT BASE CLASS 

class VisualizationBase(ABC):
    """
    Abstract Base Class (ABC) for all visualization components.
    This class serves as a blueprint for concrete visualization classes (like BarChart, ScatterPlot, etc.),
    ensuring they all share common initialization, data handling, theme management, and core methods.

    It demonstrates the following Object-Oriented Programming (OOP) concepts:
     1. Inheritance: It is designed to be subclassed.
     2. Encapsulation: Data and theme are protected attributes (`_data`, `_theme`) managed via public methods.
     3. Polymorphism: The abstract `render()` method forces subclasses to provide their own distinct visualization implementation.
     4. Dunder Methods: Customizing built-in operations like string representation (`__repr__`) and equality (`__eq__`).

    Attributes:
        _data (pd.DataFrame): The protected attribute holding the data to be visualized.
        _theme (str): The protected attribute holding the name of the active visualization theme.
    """
    
    def __init__(self, data: pd.DataFrame, theme: str = 'default'):
        """
        Initializes the base visualization component with data and a theme.
        It immediately validates the input data to ensure it's a non-empty DataFrame.
        
        Args:
            data: The pandas DataFrame containing the data to be plotted.
            theme: The visual theme to apply. Supported built-in themes include
                   'default', 'minimal', 'dark', and 'colorful'.
        
        Raises:
            TypeError: If `data` is not a pandas DataFrame.
            ValueError: If `data` is an empty DataFrame.
        """
        self._data = data  # Protected attribute (encapsulation)
        self._theme = theme  # Protected attribute
        self._validate_data()
        self._apply_theme() # Apply the theme immediately upon initialization
    
    def _validate_data(self) -> bool:
        """
        Protected method to validate that the input data is a non-empty pandas DataFrame.
        This ensures the visualization component only proceeds with valid data structures.

        Raises:
            TypeError: If the stored data is not an instance of pd.DataFrame.
            ValueError: If the stored DataFrame is empty.
        """
        if not isinstance(self._data, pd.DataFrame):
            raise TypeError("Data must be a pandas DataFrame")
        if self._data.empty:
            raise ValueError("DataFrame cannot be empty")
        return True
    
    # Getter and Setter methods (Encapsulation)
    def get_data(self) -> pd.DataFrame:
        """
        Retrieves the underlying pandas DataFrame used for visualization.
        
        Returns:
            The protected `_data` DataFrame.
        """
        return self._data
    
    def set_theme(self, theme: str):
        """
        Sets a new visualization theme and immediately applies it.
        The theme is applied by calling the protected `_apply_theme` method.

        Args:
            theme: The new visual theme name.
        """
        self._theme = theme
        self._apply_theme()
    

    def _apply_theme(self):
        """
        Protected method to map the internal theme name to a Matplotlib style
        and apply the style globally.

        The supported internal themes are mapped to Matplotlib style sheets.
        If an invalid theme is set via `set_theme`, this method will default
        to 'seaborn-v0_8-darkgrid'.
        """
        themes = {
            'default': 'seaborn-v0_8-darkgrid',
            'minimal': 'seaborn-v0_8-whitegrid',
            'dark': 'dark_background',
            'colorful': 'seaborn-v0_8-bright'
        }
        
        # Get the corresponding Matplotlib style name, defaulting if the theme is unknown
        style_name = themes.get(self._theme, 'default')
        plt.style.use(style_name)
    
    @abstractmethod
    def render(self):
        """
        Abstract method Polymorphism that must be implemented by all concrete subclasses.
        This method is responsible for generating and displaying the specific visualization
         based on the component's data and theme.

        Subclasses must override this method.
        """
        pass
    
    #  Dunder Methods (Operator Overloading)
    
    def __repr__(self) -> str:
        """
        Returns a developer-friendly, informative string representation of the object.
        """
        return f"{self.__class__.__name__}(rows={len(self._data)}, cols={len(self._data.columns)}, theme='{self._theme}')"
    
    def __eq__(self, other) -> bool:
        """
        Implements the equality operator (==) for VisualizationBase objects.

        Two objects are considered equal if they are instances of the base class
        and their underlying dataframes and themes are identical.

        Args:
            other: The object to compare against.

        Returns:
            True if the objects are of the same type and have identical data/theme, False otherwise.
        """
        if not isinstance(other, VisualizationBase):
            return False
        # pd.DataFrame.equals() compares data, index, and column names
        return self._data.equals(other._data) and self._theme == other._theme
    
    def __len__(self) -> int:
        """
        Implements the built-in `len()` function, allowing the object to report its size.

        Returns:
            The number of rows in the visualization's data DataFrame.
        """
        return len(self._data)

In [18]:
class SummaryGenerator:
    """
    Generates comprehensive data summaries
    Demonstrates: Encapsulation, Dunder Methods
    """

    def __init__(self, data: pd.DataFrame):
        self._data = data

    def summarize_numeric(self) -> List[Dict]:
        """Summarize numeric columns"""
        summaries = []
        numeric_cols = self._data.select_dtypes(include=[np.number]).columns

        for col in numeric_cols:
            summary = {
                'Column': col,
                'Type': 'Numeric',
                # NOTE: Corrected self.data[col] to self._data[col] to use the protected attribute
                'Count': self._data[col].count(), 
                'Missing': self._data[col].isnull().sum(),
                'Missing %': f"{self._data[col].isnull().sum() / len(self._data) * 100:.1f}%",
                'Mean': f"{self._data[col].mean():.2f}",
                'Std': f"{self._data[col].std():.2f}",
                'Min': f"{self._data[col].min():.2f}",
                'Max': f"{self._data[col].max():.2f}",
                'Unique': self._data[col].nunique()
            }
            summaries.append(summary)
        return summaries

    def summarize_categorical(self) -> List[Dict]:
        """Summarize categorical columns"""
        summaries = []
        categorical_cols = self._data.select_dtypes(include=['object', 'category']).columns

        for col in categorical_cols:
            top_val = self._data[col].mode()[0] if not self._data[col].mode().empty else 'N/A'
            
            # Use value_counts() to safely get top frequency
            value_counts = self._data[col].value_counts()
            top_freq = value_counts.iloc[0] if not value_counts.empty else 0
            
            summary = {
                'Column': col,
                'Type': 'Categorical',
                'Count': self._data[col].count(),
                'Missing': self._data[col].isnull().sum(),
                'Missing %': f"{self._data[col].isnull().sum() / len(self._data) * 100:.1f}%",
                'Unique': self._data[col].nunique(),
                'Top Value': str(top_val),
                'Top Freq': top_freq
            }
            summaries.append(summary)
        return summaries

    def tabular_summary(self, style: str = 'full') -> pd.DataFrame:
        """
        Generate comprehensive tabular summary
        Args:
            style: 'full, 'numeric', or 'categorical'

        Returns:
            DataFrame with summary statistics
        """
        summaries = []

        if style in ['full', 'numeric']:
            # FIX: Corrected method name from summaries_numeric to summarize_numeric
            summaries.extend(self.summarize_numeric())

        if style in ['full', 'categorical']:
            summaries.extend(self.summarize_categorical())
            
        # FIX: Added missing assignment operator '='
        summary_df = pd.DataFrame(summaries) 

        print("\n" + "="*80)
        print(f"DATASET SUMMARY - {style.upper()} VIEW")
        print("="*80)
        print(f"Total Rows: {len(self._data):,}")
        # FIX: Added missing comma in memory_usage argument list
        memory_mb = self._data.memory_usage(deep=True).sum() / 1024**2
        print(f"Memory Usage: {memory_mb:.2f} MB")
        print("="*80 + "\n")

        return summary_df

    # Dunder Methods
    def __repr__(self) -> str:
        """String representation"""
        return f"SummaryGenerator(rows={len(self._data)}, cols={len(self._data.columns)})"

    def __len__(self) -> int:
        """Return number of rows"""
        return len(self._data)

In [19]:
class PlotEase(VisualizationBase):
    """
    Main facade class that integrates all features
    Demonstrates: Composition, Inheritance, Polymorphism
    """
    
    def __init__(self, data: pd.DataFrame, theme: str = 'default'):
        super().__init__(data, theme)
        
        # Composition - PlotEase HAS-A these components
        self._diagnostic = DiagnosticPlotter(data, theme)
        self._summary = SummaryGenerator(data)
        self._comparator = None  # Will be initialized when needed
        self._plotter = QuickPlotter(data, theme)
        
        self._apply_theme()
    
    def autoplot(self, target: Optional[str] = None, max_plots: int = 6):
        """Delegate to DiagnosticPlotter"""
        self._diagnostic.autoplot(target, max_plots)
    
    def tabular_summary(self, style: str = 'full') -> pd.DataFrame:
        """Delegate to SummaryGenerator"""
        return self._summary.tabular_summary(style)
    
    def compare_models(self, models_results: Dict[str, Dict[str, float]], 
                      metrics: Optional[List[str]] = None):
        """Delegate to ModelComparator"""
        self._comparator = ModelComparator(models_results)
        self._comparator.compare_models(metrics)
    
    def quick_plot(self, x: str, y: Optional[str] = None, **kwargs):
        """Delegate to QuickPlotter"""
        self._plotter.quick_plot(x, y, **kwargs)
    
    def set_style(self, style_dict: Dict[str, any]):
        """Apply custom styling"""
        self._plotter.set_style(style_dict)
    
    def render(self):
        """Override abstract method - Polymorphism"""
        print("PlotEase Main Interface")
        print("Available methods:")
        print("  - autoplot(): Generate diagnostic plots")
        print("  - tabular_summary(): Generate data summary")
        print("  - compare_models(): Compare ML models")
        print("  - quick_plot(): Create quick visualizations")
    
    # Additional Dunder Methods
    def __repr__(self) -> str:
        """String representation - Method Overriding"""
        return f"PlotEase(rows={len(self._data)}, cols={len(self._data.columns)}, theme='{self._theme}')"
    
    def __lt__(self, other) -> bool:
        """Less than comparison based on data size"""
        if not isinstance(other, PlotEase):
            return NotImplemented
        return len(self._data) < len(other._data)

In [20]:
class DiagnosticPlotter(VisualizationBase):
    """
    Handles automatic diagnostic plot generation
    Demonstrates: Inheritance from VisualizationBase
    """
    
    def __init__(self, data: pd.DataFrame, theme: str = 'default'):
        super().__init__(data, theme)
        self._apply_theme()
    
    def create_distributions(self, ax, numeric_cols: List[str]):
        """Create distribution plots for numeric variables"""
        for col in numeric_cols[:3]:
            self._data[col].hist(alpha=0.5, label=col, bins=30, ax=ax)
        ax.set_title('Distribution of Numeric Variables', fontsize=14, fontweight='bold')
        ax.set_xlabel('Value')
        ax.set_ylabel('Frequency')
        ax.legend()
        ax.grid(alpha=0.3)
    
    def create_correlations(self, ax, numeric_cols: List[str]):
        """Create correlation heatmap"""
        corr = self._data[numeric_cols].corr()
        sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', 
                   center=0, ax=ax, cbar_kws={'shrink': 0.8})
        ax.set_title('Correlation Matrix', fontsize=14, fontweight='bold')
    
    def create_missing_data(self, ax):
        """Create missing data visualization"""
        missing = self._data.isnull().sum()
        missing = missing[missing > 0].sort_values(ascending=False)
        if len(missing) > 0:
            missing.plot(kind='barh', ax=ax, color='coral')
            ax.set_title('Missing Values by Column', fontsize=14, fontweight='bold')
            ax.set_xlabel('Count')
        else:
            ax.text(0.5, 0.5, 'No Missing Values', ha='center', va='center', fontsize=16)
            ax.set_title('Missing Values Check', fontsize=14, fontweight='bold')
            ax.axis('off')
    
    def create_outliers(self, ax, numeric_cols: List[str]):
        """Create boxplot for outlier detection"""
        self._data[numeric_cols[:4]].boxplot(ax=ax)
        ax.set_title('Outlier Detection (Boxplots)', fontsize=14, fontweight='bold')
        ax.set_ylabel('Value')
        plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
    
    def autoplot(self, target: Optional[str] = None, max_plots: int = 6):
        """
        Automatically generate diagnostic plots based on data types
        
        Args:
            target: Optional target variable for focused analysis
            max_plots: Maximum number of plots to generate
        """
        numeric_cols = self._data.select_dtypes(include=[np.number]).columns.tolist()
        categorical_cols = self._data.select_dtypes(include=['object', 'category']).columns.tolist()
        
        plots_created = 0
        n_rows = min(3, (max_plots + 1) // 2)
        fig = plt.figure(figsize=(15, 5 * n_rows))
        
        # Plot 1: Distribution of numeric variables
        if numeric_cols and plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            self.create_distributions(ax, numeric_cols)
        
        # Plot 2: Correlation heatmap
        if len(numeric_cols) > 1 and plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            self.create_correlations(ax, numeric_cols)
        
        # Plot 3: Missing data
        if plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            self.create_missing_data(ax)
        
        # Plot 4: Target variable analysis
        if target and target in self._data.columns and plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            
            if self._data[target].dtype in [np.number]:
                self._data[target].hist(bins=30, ax=ax, color='steelblue', edgecolor='black')
                ax.set_title(f'Target Distribution: {target}', fontsize=14, fontweight='bold')
                ax.set_xlabel(target)
                ax.set_ylabel('Frequency')
            else:
                self._data[target].value_counts().plot(kind='bar', ax=ax, color='steelblue')
                ax.set_title(f'Target Distribution: {target}', fontsize=14, fontweight='bold')
                ax.set_xlabel(target)
                ax.set_ylabel('Count')
                plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
        
        # Plot 5: Categorical distribution
        if categorical_cols and plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            col = categorical_cols[0]
            value_counts = self._data[col].value_counts().head(10)
            value_counts.plot(kind='barh', ax=ax, color='teal')
            ax.set_title(f'Top Categories: {col}', fontsize=14, fontweight='bold')
            ax.set_xlabel('Count')
        
        # Plot 6: Outliers
        if len(numeric_cols) >= 1 and plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            self.create_outliers(ax, numeric_cols)
        
        plt.tight_layout()
        plt.show()
        
        print(f"✓ Generated {plots_created} diagnostic plots")
    
    def render(self):
        """Implementation of abstract method"""
        self.autoplot()
    
    def __repr__(self) -> str:
        """String representation"""
        return f"DiagnosticPlotter(rows={len(self._data)}, theme='{self._theme}')"

In [21]:
class QuickPlotter(VisualizationBase):
    """
    Quick plotting with minimal syntax
    Demonstrates: Inheritance from VisualizationBase
    """
    
    def __init__(self, data: pd.DataFrame, theme: str = 'default'):
        super().__init__(data, theme)
        self._style_config = {}  # Protected attribute for custom styles
        self._apply_theme()
    
    def detect_plot_type(self, x: str, y: Optional[str]) -> str:
        """Automatically detect appropriate plot type"""
        if y is None:
            if self._data[x].dtype in [np.number]:
                return 'hist'
            else:
                return 'bar'
        else:
            if self._data[x].dtype in [np.number] and self._data[y].dtype in [np.number]:
                return 'scatter'
            else:
                return 'bar'
    
    def quick_plot(self, x: str, y: Optional[str] = None, 
                   kind: str = 'auto', 
                   color: str = 'steelblue',
                   title: Optional[str] = None,
                   figsize: tuple = (10, 6),
                   **kwargs):
        """
        Create plots with minimal syntax
        
        Args:
            x: Column name for x-axis
            y: Column name for y-axis (optional)
            kind: Plot type ('auto', 'scatter', 'line', 'bar', 'box', 'hist')
            color: Color for the plot
            title: Custom title
            figsize: Figure size tuple
        """
        plt.figure(figsize=figsize)
        
        if kind == 'auto':
            kind = self.detect_plot_type(x, y)
        
        # Create the plot
        if kind == 'scatter' and y:
            plt.scatter(self._data[x], self._data[y], alpha=0.6, color=color, **kwargs)
            plt.xlabel(x, fontsize=12)
            plt.ylabel(y, fontsize=12)
            
        elif kind == 'line' and y:
            plt.plot(self._data[x], self._data[y], color=color, linewidth=2, **kwargs)
            plt.xlabel(x, fontsize=12)
            plt.ylabel(y, fontsize=12)
            
        elif kind == 'bar':
            if y:
                self._data.groupby(x)[y].mean().plot(kind='bar', color=color, **kwargs)
                plt.ylabel(f'Mean {y}', fontsize=12)
            else:
                self._data[x].value_counts().plot(kind='bar', color=color, **kwargs)
                plt.ylabel('Count', fontsize=12)
            plt.xlabel(x, fontsize=12)
            plt.xticks(rotation=45, ha='right')
            
        elif kind == 'hist':
            plt.hist(self._data[x], bins=30, color=color, edgecolor='black', alpha=0.7, **kwargs)
            plt.xlabel(x, fontsize=12)
            plt.ylabel('Frequency', fontsize=12)
            
        elif kind == 'box':
            if y:
                self._data.boxplot(column=y, by=x, ax=plt.gca(), patch_artist=True)
            else:
                self._data[[x]].boxplot(patch_artist=True)
            plt.xlabel(x, fontsize=12)
        
        # Styling
        if title:
            plt.title(title, fontsize=16, fontweight='bold', pad=20)
        else:
            plt.title(f'{kind.capitalize()} Plot: {x}' + (f' vs {y}' if y else ''), 
                     fontsize=16, fontweight='bold', pad=20)
        
        plt.grid(alpha=0.3)
        plt.tight_layout()
        plt.show()
    
    def set_style(self, style_dict: Dict[str, any]):
        """Apply custom styling globally"""
        if 'font_size' in style_dict:
            plt.rcParams['font.size'] = style_dict['font_size']
        if 'figure_facecolor' in style_dict:
            plt.rcParams['figure.facecolor'] = style_dict['figure_facecolor']
        if 'axes_facecolor' in style_dict:
            plt.rcParams['axes.facecolor'] = style_dict['axes_facecolor']
        if 'grid_alpha' in style_dict:
            plt.rcParams['grid.alpha'] = style_dict['grid_alpha']
        
        self._style_config.update(style_dict)
        print("✓ Custom styling applied")
    
    def render(self):
        """Implementation of abstract method"""
        print("Use quick_plot() method to render specific plots")
    
    def __repr__(self) -> str:
        """String representation"""
        return f"QuickPlotter(rows={len(self._data)}, theme='{self._theme}')"

In [22]:
print ("testing")

testing


In [24]:
class ModelComparator:
    """
    Compares machine learning model performance across various metrics.

    This class demonstrates several core object-oriented programming (OOP) principles:
     1. Encapsulation: The model results are stored in a protected attribute (_models_results).
     2. Operator Overloading (Dunder Methods): Customizing behavior for operators like == (equality) and > (greater than).

    Attributes:
        _models_results (Dict[str, Dict[str, float]]): The raw input dictionary
            where keys are model names and values are dictionaries of {metric: score}.
        _df (pd.DataFrame): A pandas DataFrame representation of the results,
            with models as the index and metrics as the columns.
    """
    
    def __init__(self, models_results: Dict[str, Dict[str, float]]):
        """
        Initializes the ModelComparator with a dictionary of model results.

        Args:
            models_results: A dictionary containing model performance scores.
                            Format: {'Model_A': {'Metric1': 0.9, 'Metric2': 0.8}, ...}
        """
        # Protected attribute to store the raw results
        self._models_results = models_results  
        # Convert results to a DataFrame for easier processing (models as index, metrics as columns)
        self._df = pd.DataFrame(models_results).T
    
    def create_bar_chart(self, ax, df: pd.DataFrame):
        """
        Creates and formats a grouped bar chart for model performance.

        Visualizes the performance scores, grouping bars by metric for each model.

        Args:
            ax: A Matplotlib Axes object to draw the plot on.
            df: The performance DataFrame to plot.
        """
        df.plot(kind='bar', ax=ax, width=0.8, edgecolor='black')
        ax.set_title('Model Performance Comparison (Bar Chart)', fontsize=16, fontweight='bold')
        ax.set_xlabel('Models', fontsize=12)
        ax.set_ylabel('Score', fontsize=12)
        ax.legend(title='Metrics', bbox_to_anchor=(1.05, 1), loc='upper left')
        ax.grid(axis='y', alpha=0.3)
        ax.set_xticklabels(df.index, rotation=45, ha='right')
        ax.set_ylim(0, 1.0)
        
        # Add score labels on top of each bar
        for container in ax.containers:
            ax.bar_label(container, fmt='%.3f', padding=3, fontsize=9)
    
    def create_radar_chart(self, ax, df: pd.DataFrame):
        """
        Creates and formats a radar  chart for model performance.

        This chart is useful for comparing models based on multiple metrics simultaneously,
        showing which model 'dominates' or balances performance across all axes.

        Args:
            ax: A Matplotlib Polar Axes object (must be created with projection='polar').
            df: The performance DataFrame  to plot.
        """
        n_metrics = len(df.columns)
        # Calculate angles for the radar axes
        angles = np.linspace(0, 2 * np.pi, n_metrics, endpoint=False).tolist()
        # Complete the loop for plotting
        angles += angles[:1]
        
        # Plot each model's performance on the radar chart
        for model_name, values in df.iterrows():
            values_list = values.tolist()
            values_list += values_list[:1]
            ax.plot(angles, values_list, 'o-', linewidth=2, label=model_name)
            ax.fill(angles, values_list, alpha=0.15)
        
        # Set the labels for each metric
        ax.set_xticks(angles[:-1])
        ax.set_xticklabels(df.columns, fontsize=10)
        ax.set_ylim(0, 1.0)
        ax.set_title('Model Performance Radar', fontsize=16, fontweight='bold', pad=20)
        ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
        ax.grid(True)
    
    def compare_models(self, metrics: Optional[List[str]] = None):
        """
        Generates and displays two visualizations 
        comparing the performance of the models.

        If `metrics` is provided, only those metrics will be plotted.

        Args:
            metrics: Optional list of specific metric names to include
                     in the visualization. If None, all available metrics are used.
        """
        if not self._models_results:
            print("Error: No model results provided")
            return
        
        df = self._df.copy()
        if metrics:
            df = df[metrics]
        
        # Create a figure with two subplots side-by-side
        fig, axes = plt.subplots(1, 2, figsize=(15, 5))
        
        # Bar chart on the left
        self.create_bar_chart(axes[0], df)
        
        # Radar chart on the right (requires 'polar' projection)
        ax_radar = plt.subplot(1, 2, 2, projection='polar')
        self.create_radar_chart(ax_radar, df)
        
        plt.tight_layout()
        plt.show() 
        
        print("\nModel Performance Summary:")
        print("="*60)
        print(df.to_string())
        print("="*60)
    
    def get_best_model(self, metric: str) -> str:
        """
        Retrieves the name of the model with the highest score for a specified metric.

        Args:
            metric: The name of the performance metric.

        Returns:
            The string name of the best-performing model for that metric.

        Raises:
            ValueError: If the specified metric is not present in the model results.
        """
        if metric not in self._df.columns:
            raise ValueError(f"Metric '{metric}' not found. Available metrics: {list(self._df.columns)}")
        # idxmax() returns the index (model name) of the maximum value in the specified column
        return self._df[metric].idxmax()
    
    # --- Dunder Methods (Operator Overloading) ---
    
    def __repr__(self) -> str:
        """
        Returns a developer-friendly, official string representation of the object.
        """
        return f"ModelComparator(models={len(self._models_results)}, metrics={list(self._df.columns)})"
    
    def __eq__(self, other) -> bool:
        """
        Implements the equality operator (==) for ModelComparator objects.

        Two ModelComparator objects are considered equal if they are of the same
        type and their underlying performance DataFrames contain identical data.

        Args:
            other: The object to compare against.

        Returns:
            True if the objects are equal in value and type, False otherwise.
        """
        if not isinstance(other, ModelComparator):
            return False
        # pd.DataFrame.equals() performs a rigorous comparison of data and index/column names
        return self._df.equals(other._df)
    
    def __gt__(self, other) -> bool:
        """
        Implements the greater than operator (>) for ModelComparator objects.

        Comparison is based on the average overall performance, which is calculated
        as the mean of all metric means across all models in each object's dataset.
        (i.e., mean of all scores).

        Args:
            other: The ModelComparator object to compare against.

        Returns:
            True if this object's mean performance is strictly greater than the other's.
        """
        if not isinstance(other, ModelComparator):
            return NotImplemented
        # Calculate the mean of all metrics for all models, then take the mean of those means
        return self._df.mean().mean() > other._df.mean().mean()