In [2]:
#This packages should be load first
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Optional, List, Dict, Union
from abc import ABC, abstractmethod
import warnings

ModuleNotFoundError: No module named 'pandas'

In [None]:
# ABSTRACT BASE CLASS 

class VisualizationBase(ABC):
    """
    Abstract base class for the visualization components
    This will demonstrates: Inheritance, Encapsulation, Polymorphism and Dunder Methods
    """
    
    def __init__(self, data: pd.DataFrame, theme: str = 'default'):
        """
        Initialize base visualization component
        
        Args:
            data: pandas DataFrame
            theme: visual theme ('default', 'minimal', 'dark', 'colorful')
        """
        self._data = data  # Protected attribute (encapsulation)
        self._theme = theme  # Protected attribute
        self._validate_data()
    
    def _validate_data(self) -> bool:
        """Protected method to validate data"""
        if not isinstance(self._data, pd.DataFrame):
            raise TypeError("Data must be a pandas DataFrame")
        if self._data.empty:
            raise ValueError("DataFrame cannot be empty")
    
    # Getter and Setter methods (Encapsulation)
    def get_data(self) -> pd.DataFrame:
        """Get the data"""
        return self._data
    
    def set_theme(self, theme: str):
        """
        Set the visualization theme
        NOTE: This calls _apply_theme, which may raise an exception if the theme is invalid.
        """
        self._theme = theme
        self._apply_theme()
    

    def _apply_theme(self):
        """
        Apply the selected visual theme.
        This method attempts to apply a matplotlib style.
        """ 
        themes = {
            'default': 'seaborn-v0_8-darkgrid',
            'minimal': 'seaborn-v0_8-whitegrid',
            'dark': 'dark_background',
            'colorful': 'seaborn-v0_8-bright'
        }
        
        style_name = themes.get(self._theme, 'default')
        plt.style.use(style_name)
    
    @abstractmethod
    def render(self):
        """Abstract method - must be implemented by subclasses like Polymorphism"""
        pass
    
    # Dunder Methods
    def __repr__(self) -> str:
        """String representation"""
        return f"{self.__class__.__name__}(rows={len(self._data)}, cols={len(self._data.columns)}, theme='{self._theme}')"
    
    def __eq__(self, other) -> bool:
        """Equality comparison"""
        if not isinstance(other, VisualizationBase):
            return False
        return self._data.equals(other._data) and self._theme == other._theme
    
    def __len__(self) -> int:
        """Return number of rows in data"""
        return len(self._data)


In [None]:
class SummaryGenerator:
    """
    Generates comprehensive data summaries
    Demonstrates: Encapsulation, Dunder Methods
    """

    def __init__(self, data: pd.DataFrame):
        self._data = data

    def summarize_numeric(self) -> List[Dict]:
        """Summarize numeric columns"""
        summaries = []
        numeric_cols = self._data.select_dtypes(include=[np.number]).columns

        for col in numeric_cols:
            summary = {
                'Column': col,
                'Type': 'Numeric',
                'Count': self.data[col].count(),
                'Missing': self._data[col].isnull().sum(),
                'Missing %': f"{self._data[col].isnull().sum() / len(self._data) * 100:.1f}%",
                'Mean': f"{self._data[col].mean():.2f}",
                'Std': f"{self._data[col].std():.2f}",
                'Min': f"{self._data[col].min():.2f}",
                'Max': f"{self._data[col].max():.2f}",
                'Unique': self._data[col].nunique()
            }
            summaries.append(summary)
        return summaries

    def summarize_categorical(self) -> List[Dict]:
        """Summarize categorical columns"""
        summaries = []
        categorical_cols = self._data.select_dtypes(include=['object', 'category']).columns

        for col in categorical_cols:
            top_val = self._data[col].mode()[0] if len(self._data[col].mode()) > 0 else 'N/A'
            summary = {
                'Column': col,
                'Type': 'Categorical',
                'Count': self._data[col].count(),
                'Missing': self._data[col].isnull().sum(),
                'Missing %': f"{self._data[col].isnull().sum() / len(self._data) * 100:.1f}%",
                'Unique': self._data[col].nunique(),
                'Top Value': str(top_val),
                'Top Freq': self._data[col].value_counts().iloc[0] if len(self._data[col]) > 0 else 0
            }
            summaries.append(summary)
        return summaries

    def tabular_summary(self, style: str = 'full') -> pd.DataFrame:
        """
        Generate comprehensive tabular summary
        Args:
            style: 'full, 'numeric', or 'categorical'

        Returns:
            DataFrame with summary statistics
        """
        summaries = []

        if style in ['full', 'numeric']:
            summaries.extend(self.summaries_numeric())

        if style in ['full', 'categorical']:
            summaries.extend(self.summarize_categorical())
        summary_df pd.DataFrame(summaries)

        print("\n" + "="*80)
        print(f"DATASET SUMMARY - {style.upper()} VIEW")
        print("="*80)
        print(f"Total Rows: {len(self._data):,}")
        print(f"Memory Usage: {self._data.memory_usage(deep=True.sum() / 1024**2:.2f} MB")
        print("="*80 + "\n")

        return summary_df

    # Dunder Methods
    def __repr__(self) -> str:
        """String representation"""
        return f"SummaryGenerator(rows={len(self._data)}, cols={len(self._data.columns)})"

        def __len__(self) -> int:
            """Return number of rows"""
            return len(self._data)