In [1]:
#This packages should be load first
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Optional, List, Dict, Union
from abc import ABC, abstractmethod
import warnings

In [2]:
# ABSTRACT BASE CLASS 

class VisualizationBase(ABC):
    """
    Abstract base class for the visualization components
    This will demonstrates: Inheritance, Encapsulation, Polymorphism and Dunder Methods
    """
    
    def __init__(self, data: pd.DataFrame, theme: str = 'default'):
        """
        Initialize base visualization component
        
        Args:
            data: pandas DataFrame
            theme: visual theme ('default', 'minimal', 'dark', 'colorful')
        """
        self._data = data  # Protected attribute (encapsulation)
        self._theme = theme  # Protected attribute
        self._validate_data()
    
    def _validate_data(self) -> bool:
        """Protected method to validate data"""
        if not isinstance(self._data, pd.DataFrame):
            raise TypeError("Data must be a pandas DataFrame")
        if self._data.empty:
            raise ValueError("DataFrame cannot be empty")
    
    # Getter and Setter methods (Encapsulation)
    def get_data(self) -> pd.DataFrame:
        """Get the data"""
        return self._data
    
    def set_theme(self, theme: str):
        """
        Set the visualization theme
        NOTE: This calls _apply_theme, which may raise an exception if the theme is invalid.
        """
        self._theme = theme
        self._apply_theme()
    

    def _apply_theme(self):
        """
        Apply the selected visual theme.
        This method attempts to apply a matplotlib style.
        """ 
        themes = {
            'default': 'seaborn-v0_8-darkgrid',
            'minimal': 'seaborn-v0_8-whitegrid',
            'dark': 'dark_background',
            'colorful': 'seaborn-v0_8-bright'
        }
        
        style_name = themes.get(self._theme, 'default')
        plt.style.use(style_name)
    
    @abstractmethod
    def render(self):
        """Abstract method - must be implemented by subclasses like Polymorphism"""
        pass
    
    # Dunder Methods
    def __repr__(self) -> str:
        """String representation"""
        return f"{self.__class__.__name__}(rows={len(self._data)}, cols={len(self._data.columns)}, theme='{self._theme}')"
    
    def __eq__(self, other) -> bool:
        """Equality comparison"""
        if not isinstance(other, VisualizationBase):
            return False
        return self._data.equals(other._data) and self._theme == other._theme
    
    def __len__(self) -> int:
        """Return number of rows in data"""
        return len(self._data)


In [3]:
class SummaryGenerator:
    """
    Generates comprehensive data summaries
    Demonstrates: Encapsulation, Dunder Methods
    """

    def __init__(self, data: pd.DataFrame):
        self._data = data

    def summarize_numeric(self) -> List[Dict]:
        """Summarize numeric columns"""
        summaries = []
        numeric_cols = self._data.select_dtypes(include=[np.number]).columns

        for col in numeric_cols:
            summary = {
                'Column': col,
                'Type': 'Numeric',
                'Count': self.data[col].count(),
                'Missing': self._data[col].isnull().sum(),
                'Missing %': f"{self._data[col].isnull().sum() / len(self._data) * 100:.1f}%",
                'Mean': f"{self._data[col].mean():.2f}",
                'Std': f"{self._data[col].std():.2f}",
                'Min': f"{self._data[col].min():.2f}",
                'Max': f"{self._data[col].max():.2f}",
                'Unique': self._data[col].nunique()
            }
            summaries.append(summary)
        return summaries

    def summarize_categorical(self) -> List[Dict]:
        """Summarize categorical columns"""
        summaries = []
        categorical_cols = self._data.select_dtypes(include=['object', 'category']).columns

        for col in categorical_cols:
            top_val = self._data[col].mode()[0] if len(self._data[col].mode()) > 0 else 'N/A'
            summary = {
                'Column': col,
                'Type': 'Categorical',
                'Count': self._data[col].count(),
                'Missing': self._data[col].isnull().sum(),
                'Missing %': f"{self._data[col].isnull().sum() / len(self._data) * 100:.1f}%",
                'Unique': self._data[col].nunique(),
                'Top Value': str(top_val),
                'Top Freq': self._data[col].value_counts().iloc[0] if len(self._data[col]) > 0 else 0
            }
            summaries.append(summary)
        return summaries

    def tabular_summary(self, style: str = 'full') -> pd.DataFrame:
        """
        Generate comprehensive tabular summary
        Args:
            style: 'full, 'numeric', or 'categorical'

        Returns:
            DataFrame with summary statistics
        """
        summaries = []

        if style in ['full', 'numeric']:
            summaries.extend(self.summaries_numeric())

        if style in ['full', 'categorical']:
            summaries.extend(self.summarize_categorical())
        summary_df pd.DataFrame(summaries)

        print("\n" + "="*80)
        print(f"DATASET SUMMARY - {style.upper()} VIEW")
        print("="*80)
        print(f"Total Rows: {len(self._data):,}")
        print(f"Memory Usage: {self._data.memory_usage(deep=True.sum() / 1024**2:.2f} MB")
        print("="*80 + "\n")

        return summary_df

    # Dunder Methods
    def __repr__(self) -> str:
        """String representation"""
        return f"SummaryGenerator(rows={len(self._data)}, cols={len(self._data.columns)})"

        def __len__(self) -> int:
            """Return number of rows"""
            return len(self._data)

SyntaxError: invalid syntax (2071112758.py, line 67)

In [None]:
class PlotEase(VisualizationBase):
    """
    Main facade class that integrates all features
    Demonstrates: Composition, Inheritance, Polymorphism
    """
    
    def __init__(self, data: pd.DataFrame, theme: str = 'default'):
        super().__init__(data, theme)
        
        # Composition - PlotEase HAS-A these components
        self._diagnostic = DiagnosticPlotter(data, theme)
        self._summary = SummaryGenerator(data)
        self._comparator = None  # Will be initialized when needed
        self._plotter = QuickPlotter(data, theme)
        
        self._apply_theme()
    
    def autoplot(self, target: Optional[str] = None, max_plots: int = 6):
        """Delegate to DiagnosticPlotter"""
        self._diagnostic.autoplot(target, max_plots)
    
    def tabular_summary(self, style: str = 'full') -> pd.DataFrame:
        """Delegate to SummaryGenerator"""
        return self._summary.tabular_summary(style)
    
    def compare_models(self, models_results: Dict[str, Dict[str, float]], 
                      metrics: Optional[List[str]] = None):
        """Delegate to ModelComparator"""
        self._comparator = ModelComparator(models_results)
        self._comparator.compare_models(metrics)
    
    def quick_plot(self, x: str, y: Optional[str] = None, **kwargs):
        """Delegate to QuickPlotter"""
        self._plotter.quick_plot(x, y, **kwargs)
    
    def set_style(self, style_dict: Dict[str, any]):
        """Apply custom styling"""
        self._plotter.set_style(style_dict)
    
    def render(self):
        """Override abstract method - Polymorphism"""
        print("PlotEase Main Interface")
        print("Available methods:")
        print("  - autoplot(): Generate diagnostic plots")
        print("  - tabular_summary(): Generate data summary")
        print("  - compare_models(): Compare ML models")
        print("  - quick_plot(): Create quick visualizations")
    
    # Additional Dunder Methods
    def __repr__(self) -> str:
        """String representation - Method Overriding"""
        return f"PlotEase(rows={len(self._data)}, cols={len(self._data.columns)}, theme='{self._theme}')"
    
    def __lt__(self, other) -> bool:
        """Less than comparison based on data size"""
        if not isinstance(other, PlotEase):
            return NotImplemented
        return len(self._data) < len(other._data)

In [None]:
class DiagnosticPlotter(VisualizationBase):
    """
    Handles automatic diagnostic plot generation
    Demonstrates: Inheritance from VisualizationBase
    """
    
    def __init__(self, data: pd.DataFrame, theme: str = 'default'):
        super().__init__(data, theme)
        self._apply_theme()
    
    def create_distributions(self, ax, numeric_cols: List[str]):
        """Create distribution plots for numeric variables"""
        for col in numeric_cols[:3]:
            self._data[col].hist(alpha=0.5, label=col, bins=30, ax=ax)
        ax.set_title('Distribution of Numeric Variables', fontsize=14, fontweight='bold')
        ax.set_xlabel('Value')
        ax.set_ylabel('Frequency')
        ax.legend()
        ax.grid(alpha=0.3)
    
    def create_correlations(self, ax, numeric_cols: List[str]):
        """Create correlation heatmap"""
        corr = self._data[numeric_cols].corr()
        sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', 
                   center=0, ax=ax, cbar_kws={'shrink': 0.8})
        ax.set_title('Correlation Matrix', fontsize=14, fontweight='bold')
    
    def create_missing_data(self, ax):
        """Create missing data visualization"""
        missing = self._data.isnull().sum()
        missing = missing[missing > 0].sort_values(ascending=False)
        if len(missing) > 0:
            missing.plot(kind='barh', ax=ax, color='coral')
            ax.set_title('Missing Values by Column', fontsize=14, fontweight='bold')
            ax.set_xlabel('Count')
        else:
            ax.text(0.5, 0.5, 'No Missing Values', ha='center', va='center', fontsize=16)
            ax.set_title('Missing Values Check', fontsize=14, fontweight='bold')
            ax.axis('off')
    
    def create_outliers(self, ax, numeric_cols: List[str]):
        """Create boxplot for outlier detection"""
        self._data[numeric_cols[:4]].boxplot(ax=ax)
        ax.set_title('Outlier Detection (Boxplots)', fontsize=14, fontweight='bold')
        ax.set_ylabel('Value')
        plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
    
    def autoplot(self, target: Optional[str] = None, max_plots: int = 6):
        """
        Automatically generate diagnostic plots based on data types
        
        Args:
            target: Optional target variable for focused analysis
            max_plots: Maximum number of plots to generate
        """
        numeric_cols = self._data.select_dtypes(include=[np.number]).columns.tolist()
        categorical_cols = self._data.select_dtypes(include=['object', 'category']).columns.tolist()
        
        plots_created = 0
        n_rows = min(3, (max_plots + 1) // 2)
        fig = plt.figure(figsize=(15, 5 * n_rows))
        
        # Plot 1: Distribution of numeric variables
        if numeric_cols and plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            self.create_distributions(ax, numeric_cols)
        
        # Plot 2: Correlation heatmap
        if len(numeric_cols) > 1 and plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            self.create_correlations(ax, numeric_cols)
        
        # Plot 3: Missing data
        if plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            self.create_missing_data(ax)
        
        # Plot 4: Target variable analysis
        if target and target in self._data.columns and plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            
            if self._data[target].dtype in [np.number]:
                self._data[target].hist(bins=30, ax=ax, color='steelblue', edgecolor='black')
                ax.set_title(f'Target Distribution: {target}', fontsize=14, fontweight='bold')
                ax.set_xlabel(target)
                ax.set_ylabel('Frequency')
            else:
                self._data[target].value_counts().plot(kind='bar', ax=ax, color='steelblue')
                ax.set_title(f'Target Distribution: {target}', fontsize=14, fontweight='bold')
                ax.set_xlabel(target)
                ax.set_ylabel('Count')
                plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
        
        # Plot 5: Categorical distribution
        if categorical_cols and plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            col = categorical_cols[0]
            value_counts = self._data[col].value_counts().head(10)
            value_counts.plot(kind='barh', ax=ax, color='teal')
            ax.set_title(f'Top Categories: {col}', fontsize=14, fontweight='bold')
            ax.set_xlabel('Count')
        
        # Plot 6: Outliers
        if len(numeric_cols) >= 1 and plots_created < max_plots:
            plots_created += 1
            ax = plt.subplot(n_rows, 2, plots_created)
            self.create_outliers(ax, numeric_cols)
        
        plt.tight_layout()
        plt.show()
        
        print(f"✓ Generated {plots_created} diagnostic plots")
    
    def render(self):
        """Implementation of abstract method"""
        self.autoplot()
    
    def __repr__(self) -> str:
        """String representation"""
        return f"DiagnosticPlotter(rows={len(self._data)}, theme='{self._theme}')"

In [None]:
class QuickPlotter(VisualizationBase):
    """
    Quick plotting with minimal syntax
    Demonstrates: Inheritance from VisualizationBase
    """
    
    def __init__(self, data: pd.DataFrame, theme: str = 'default'):
        super().__init__(data, theme)
        self._style_config = {}  # Protected attribute for custom styles
        self._apply_theme()
    
    def detect_plot_type(self, x: str, y: Optional[str]) -> str:
        """Automatically detect appropriate plot type"""
        if y is None:
            if self._data[x].dtype in [np.number]:
                return 'hist'
            else:
                return 'bar'
        else:
            if self._data[x].dtype in [np.number] and self._data[y].dtype in [np.number]:
                return 'scatter'
            else:
                return 'bar'
    
    def quick_plot(self, x: str, y: Optional[str] = None, 
                   kind: str = 'auto', 
                   color: str = 'steelblue',
                   title: Optional[str] = None,
                   figsize: tuple = (10, 6),
                   **kwargs):
        """
        Create plots with minimal syntax
        
        Args:
            x: Column name for x-axis
            y: Column name for y-axis (optional)
            kind: Plot type ('auto', 'scatter', 'line', 'bar', 'box', 'hist')
            color: Color for the plot
            title: Custom title
            figsize: Figure size tuple
        """
        plt.figure(figsize=figsize)
        
        if kind == 'auto':
            kind = self.detect_plot_type(x, y)
        
        # Create the plot
        if kind == 'scatter' and y:
            plt.scatter(self._data[x], self._data[y], alpha=0.6, color=color, **kwargs)
            plt.xlabel(x, fontsize=12)
            plt.ylabel(y, fontsize=12)
            
        elif kind == 'line' and y:
            plt.plot(self._data[x], self._data[y], color=color, linewidth=2, **kwargs)
            plt.xlabel(x, fontsize=12)
            plt.ylabel(y, fontsize=12)
            
        elif kind == 'bar':
            if y:
                self._data.groupby(x)[y].mean().plot(kind='bar', color=color, **kwargs)
                plt.ylabel(f'Mean {y}', fontsize=12)
            else:
                self._data[x].value_counts().plot(kind='bar', color=color, **kwargs)
                plt.ylabel('Count', fontsize=12)
            plt.xlabel(x, fontsize=12)
            plt.xticks(rotation=45, ha='right')
            
        elif kind == 'hist':
            plt.hist(self._data[x], bins=30, color=color, edgecolor='black', alpha=0.7, **kwargs)
            plt.xlabel(x, fontsize=12)
            plt.ylabel('Frequency', fontsize=12)
            
        elif kind == 'box':
            if y:
                self._data.boxplot(column=y, by=x, ax=plt.gca(), patch_artist=True)
            else:
                self._data[[x]].boxplot(patch_artist=True)
            plt.xlabel(x, fontsize=12)
        
        # Styling
        if title:
            plt.title(title, fontsize=16, fontweight='bold', pad=20)
        else:
            plt.title(f'{kind.capitalize()} Plot: {x}' + (f' vs {y}' if y else ''), 
                     fontsize=16, fontweight='bold', pad=20)
        
        plt.grid(alpha=0.3)
        plt.tight_layout()
        plt.show()
    
    def set_style(self, style_dict: Dict[str, any]):
        """Apply custom styling globally"""
        if 'font_size' in style_dict:
            plt.rcParams['font.size'] = style_dict['font_size']
        if 'figure_facecolor' in style_dict:
            plt.rcParams['figure.facecolor'] = style_dict['figure_facecolor']
        if 'axes_facecolor' in style_dict:
            plt.rcParams['axes.facecolor'] = style_dict['axes_facecolor']
        if 'grid_alpha' in style_dict:
            plt.rcParams['grid.alpha'] = style_dict['grid_alpha']
        
        self._style_config.update(style_dict)
        print("✓ Custom styling applied")
    
    def render(self):
        """Implementation of abstract method"""
        print("Use quick_plot() method to render specific plots")
    
    def __repr__(self) -> str:
        """String representation"""
        return f"QuickPlotter(rows={len(self._data)}, theme='{self._theme}')"

In [8]:
print ("testing")

testing
