In [None]:
"""
Pandas Analyzer & Data Visualization - Console Program

Features:
- SalesDataAnalyzer class encapsulating data loading, cleaning, analysis, visualization, and export.
- Menu-driven console interface.
- Uses pandas, numpy, matplotlib, seaborn.

Note: The script does not create files unless you call save_visualization from the menu.
"""

import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Optional, List, Tuple

# Set seaborn style
sns.set_theme(style="darkgrid")

class SalesDataAnalyzer:
    """
    Encapsulates all functionalities for loading, exploring, cleaning, analyzing, and visualizing sales data.
    """
    def __init__(self, file_path: Optional[str] = None):
        self.data: Optional[pd.DataFrame] = None
        self.current_plot = None  # store last matplotlib figure
        if file_path:
            self.load_data(file_path)

    def load_data(self, file_path: str, index_col: Optional[str] = None) -> None:
        """
        Load data from a CSV file into a pandas DataFrame.
        """
        try:
            self.data = pd.read_csv(file_path, index_col=index_col)
            print(f"Dataset loaded successfully! Shape: {self.data.shape}")
        except FileNotFoundError:
            print(f"File not found: {file_path}")
        except pd.errors.ParserError as e:
            print(f"Parsing error: {e}")
        except Exception as e:
            print(f"Error loading dataset: {e}")

    # -------------------------
    # Exploration / Information
    # -------------------------
    def head(self, n: int = 5) -> pd.DataFrame:
        self._ensure_data_loaded()
        return self.data.head(n)

    def tail(self, n: int = 5) -> pd.DataFrame:
        self._ensure_data_loaded()
        return self.data.tail(n)

    def columns(self) -> List[str]:
        self._ensure_data_loaded()
        return list(self.data.columns)

    def dtypes(self) -> pd.Series:
        self._ensure_data_loaded()
        return self.data.dtypes

    def info(self) -> None:
        self._ensure_data_loaded()
        print(self.data.info())

    def describe(self) -> pd.DataFrame:
        self._ensure_data_loaded()
        return self.data.describe(include='all')

    # -------------------------
    # Cleaning / Missing Data
    # -------------------------
    def missing_summary(self) -> pd.Series:
        self._ensure_data_loaded()
        return self.data.isnull().sum()

    def rows_with_missing(self) -> pd.DataFrame:
        self._ensure_data_loaded()
        return self.data[self.data.isnull().any(axis=1)]

    def fill_missing_with_mean(self, columns: Optional[List[str]] = None) -> None:
        self._ensure_data_loaded()
        cols = columns or self.data.select_dtypes(include=[np.number]).columns.tolist()
        for c in cols:
            if self.data[c].isnull().any():
                mean_val = self.data[c].mean()
                self.data[c].fillna(mean_val, inplace=True)
                print(f"Filled missing values in '{c}' with mean: {mean_val}")

    def drop_rows_with_missing(self) -> None:
        self._ensure_data_loaded()
        before = self.data.shape
        self.data.dropna(inplace=True)
        after = self.data.shape
        print(f"Dropped rows with missing values. Shape before: {before}, after: {after}")

    def replace_missing_with_value(self, value, columns: Optional[List[str]] = None) -> None:
        self._ensure_data_loaded()
        cols = columns or self.data.columns.tolist()
        self.data[cols] = self.data[cols].fillna(value)
        print(f"Replaced missing values in columns {cols} with '{value}'.")

    def convert_dtype(self, column: str, dtype) -> None:
        self._ensure_data_loaded()
        try:
            self.data[column] = self.data[column].astype(dtype)
            print(f"Converted column '{column}' to dtype {dtype}.")
        except Exception as e:
            print(f"Failed to convert column '{column}': {e}")

    # -------------------------
    # Data Manipulation
    # -------------------------
    def mathematical_operations(self, op: str, columns: List[str], scalar: Optional[float] = None) -> pd.DataFrame:
        """
        Perform element-wise mathematical operations on specified numeric columns.
        op: 'add', 'subtract', 'multiply', 'divide', 'log', 'sqrt'
        """
        self._ensure_data_loaded()
        df = self.data.copy()
        for col in columns:
            if col not in df.columns:
                print(f"Column not found: {col}")
                continue
            if op == 'add':
                df[col] = df[col] + (scalar or 0)
            elif op == 'subtract':
                df[col] = df[col] - (scalar or 0)
            elif op == 'multiply':
                df[col] = df[col] * (scalar or 1)
            elif op == 'divide':
                df[col] = df[col] / (scalar or 1)
            elif op == 'log':
                df[col] = np.log(df[col].replace(0, np.nan))
            elif op == 'sqrt':
                df[col] = np.sqrt(df[col].clip(lower=0))
            else:
                print(f"Operation '{op}' not supported.")
        return df

    def combine_data(self, other: pd.DataFrame, how: str = 'concat', on: Optional[str] = None) -> None:
        """
        Combine current DataFrame with another.
        how: 'concat', 'merge', 'join'
        """
        self._ensure_data_loaded()
        if how == 'concat':
            before = self.data.shape
            self.data = pd.concat([self.data, other], ignore_index=True, sort=False)
            after = self.data.shape
            print(f"Concatenated data. Shape before: {before}, after: {after}")
        elif how == 'merge' and on:
            before = self.data.shape
            self.data = pd.merge(self.data, other, on=on, how='inner')
            after = self.data.shape
            print(f"Merged data on '{on}'. Shape before: {before}, after: {after}")
        elif how == 'join':
            self.data = self.data.join(other)
            print("Joined dataframes.")
        else:
            raise ValueError("Invalid combine method or missing 'on' for merge.")

    def split_data(self, by: str) -> dict:
        """
        Split DataFrame into groups based on the values in column 'by'.
        Returns a dictionary {group_value: DataFrame}
        """
        self._ensure_data_loaded()
        groups = {}
        grouped = self.data.groupby(by)
        for key, group in grouped:
            groups[key] = group.copy()
        print(f"Split data into {len(groups)} groups by '{by}'.")
        return groups

    # -------------------------
    # Search / Sort / Filter / Aggregation
    # -------------------------
    def search(self, column: str, value) -> pd.DataFrame:
        self._ensure_data_loaded()
        return self.data[self.data[column] == value]

    def filter_range(self, column: str, low=None, high=None) -> pd.DataFrame:
        self._ensure_data_loaded()
        df = self.data
        if low is not None:
            df = df[df[column] >= low]
        if high is not None:
            df = df[df[column] <= high]
        return df

    def sort_by(self, column: str, ascending: bool = True) -> pd.DataFrame:
        self._ensure_data_loaded()
        return self.data.sort_values(by=column, ascending=ascending)

    def aggregate_functions(self, group_by: Optional[str], agg_map: dict) -> pd.DataFrame:
        """
        agg_map: {'Sales': 'sum', 'Profit': 'mean', ...}
        If group_by is None, apply aggregate to whole df.
        """
        self._ensure_data_loaded()
        if group_by:
            result = self.data.groupby(group_by).agg(agg_map)
        else:
            result = self.data.agg(agg_map)
        return result

    # -------------------------
    # Statistical Analysis / Pivot
    # -------------------------
    def statistical_analysis(self, columns: Optional[List[str]] = None) -> pd.DataFrame:
        self._ensure_data_loaded()
        cols = columns or self.data.select_dtypes(include=[np.number]).columns.tolist()
        stats = pd.DataFrame({
            'mean': self.data[cols].mean(),
            'std': self.data[cols].std(),
            'var': self.data[cols].var(),
            'min': self.data[cols].min(),
            '25%': self.data[cols].quantile(0.25),
            '50%': self.data[cols].quantile(0.50),
            '75%': self.data[cols].quantile(0.75),
            'max': self.data[cols].max(),
        })
        return stats

    def create_pivot_table(self, index: str, columns: Optional[str], values: str, aggfunc='sum') -> pd.DataFrame:
        self._ensure_data_loaded()
        pivot = pd.pivot_table(self.data, index=index, columns=columns, values=values, aggfunc=aggfunc, fill_value=0)
        return pivot

    # -------------------------
    # Numpy Array Conversions
    # -------------------------
    def to_numpy(self, columns: Optional[List[str]] = None) -> np.ndarray:
        self._ensure_data_loaded()
        cols = columns or self.data.columns.tolist()
        return self.data[cols].to_numpy()

    def demonstrate_numpy_indexing(self, columns: Optional[List[str]] = None) -> None:
        arr = self.to_numpy(columns)
        print("Numpy array shape:", arr.shape)
        if arr.size == 0:
            print("Empty array; nothing to demonstrate.")
            return
        # show a few indexing/slicing examples
        print("First row:", arr[0])
        if arr.shape[0] > 1:
            print("First column (all rows):", arr[:, 0])
        if arr.shape[1] > 1:
            print("First 2 columns of first 3 rows:\n", arr[:3, :2])

    # -------------------------
    # Visualization
    # -------------------------
    def visualize_data(self, kind: str, x: Optional[str] = None, y: Optional[str] = None,
                       by: Optional[str] = None, bins: int = 10, stacked: bool = False,
                       title: Optional[str] = None, figsize: Tuple[int, int] = (10, 6)) -> None:
        """
        kind: 'bar', 'line', 'scatter', 'pie', 'hist', 'stack', 'box', 'heatmap'
        """
        self._ensure_data_loaded()
        plt.close('all')
        fig, ax = plt.subplots(figsize=figsize)
        title = title or f"{kind.capitalize()} plot"
        try:
            if kind == 'bar':
                if by:
                    aggregated = self.data.groupby(x)[y].sum() if y else self.data[x].value_counts()
                    aggregated.plot(kind='bar', ax=ax)
                    ax.set_ylabel(y if y else 'count')
                else:
                    self.data.set_index(x)[y].plot(kind='bar', ax=ax)
            elif kind == 'line':
                if x and y:
                    ax.plot(self.data[x], self.data[y], marker='o')
                    ax.set_xlabel(x); ax.set_ylabel(y)
                else:
                    self.data.plot(ax=ax)
            elif kind == 'scatter':
                if x is None or y is None:
                    raise ValueError("Scatter requires x and y")
                ax.scatter(self.data[x], self.data[y])
                ax.set_xlabel(x); ax.set_ylabel(y)
            elif kind == 'pie':
                if x is None:
                    raise ValueError("Pie requires x (column to count values of)")
                counts = self.data[x].value_counts()
                counts.plot(kind='pie', autopct='%1.1f%%', ax=ax)
            elif kind == 'hist':
                if x is None:
                    numeric_cols = self.data.select_dtypes(include=[np.number]).columns
                    self.data[numeric_cols].hist(bins=bins, ax=ax)
                else:
                    self.data[x].hist(bins=bins, ax=ax)
            elif kind == 'stack':
                # stacked area plot by group
                if by and x:
                    pivot = self.data.pivot_table(index=x, columns=by, values=y, aggfunc='sum', fill_value=0)
                    pivot.plot(kind='area', stacked=stacked, ax=ax)
                else:
                    raise ValueError("Stack requires 'x' and 'by' arguments.")
            elif kind == 'box':
                if x:
                    sns.boxplot(x=self.data[x], ax=ax)
                else:
                    sns.boxplot(data=self.data.select_dtypes(include=[np.number]), ax=ax)
            elif kind == 'heatmap':
                numeric = self.data.select_dtypes(include=[np.number])
                sns.heatmap(numeric.corr(), annot=True, fmt=".2f", ax=ax)
            else:
                raise ValueError(f"Visualization kind '{kind}' not supported.")
            ax.set_title(title)
            fig.tight_layout()
            self.current_plot = fig
            plt.show()
            print(f"{kind.capitalize()} plot displayed successfully!")
        except Exception as e:
            print(f"Failed to generate {kind} plot: {e}")
            plt.close(fig)

    def save_visualization(self, filename: str) -> None:
        """
        Save last plotted figure to filename.
        """
        if self.current_plot is None:
            print("No plot available to save. Please generate a plot first.")
            return
        try:
            # ensure extension
            ext = os.path.splitext(filename)[1]
            if ext.lower() not in ['.png', '.jpg', '.jpeg', '.pdf', '.svg']:
                filename = filename + '.png'
            self.current_plot.savefig(filename, bbox_inches='tight')
            print(f"Visualization saved as {filename} successfully!")
        except Exception as e:
            print(f"Error saving visualization: {e}")

    # -------------------------
    # Helpers
    # -------------------------
    def _ensure_data_loaded(self) -> None:
        if self.data is None:
            raise ValueError("No dataset loaded. Use load_data(file_path) first.")

# -------------------------
# Console UI Functions
# -------------------------

def print_main_menu():
    print("\n========== Data Analysis & Visualization Program ==========")
    print("Please select an option:")
    print("1. Load Dataset")
    print("2. Explore Data")
    print("3. Perform DataFrame Operations")
    print("4. Handle Missing Data")
    print("5. Generate Descriptive Statistics")
    print("6. Data Visualization")
    print("7. Save Visualization")
    print("8. Exit")
    print("=========================================================")

def explore_menu(analyzer: SalesDataAnalyzer):
    while True:
        print("\n== Explore Data ==")
        print("1. Display the first 5 rows")
        print("2. Display the last 5 rows")
        print("3. Display column names")
        print("4. Display data types")
        print("5. Display basic info")
        print("6. Display describe()")
        print("7. Convert to numpy and demonstrate indexing")
        print("8. Back to main menu")
        choice = input("Enter your choice: ").strip()
        try:
            if choice == '1':
                print(analyzer.head())
            elif choice == '2':
                print(analyzer.tail())
            elif choice == '3':
                print(analyzer.columns())
            elif choice == '4':
                print(analyzer.dtypes())
            elif choice == '5':
                analyzer.info()
            elif choice == '6':
                print(analyzer.describe())
            elif choice == '7':
                analyzer.demonstrate_numpy_indexing()
            elif choice == '8':
                break
            else:
                print("Invalid choice.")
        except Exception as e:
            print("Error:", e)

def df_operations_menu(analyzer: SalesDataAnalyzer):
    while True:
        print("\n== Perform DataFrame Operations ==")
        print("1. Mathematical operations")
        print("2. Combine with another CSV")
        print("3. Split DataFrame by column")
        print("4. Search/Sort/Filter")
        print("5. Aggregate functions")
        print("6. Create pivot table")
        print("7. Back to main menu")
        c = input("Enter your choice: ").strip()
        try:
            if c == '1':
                print("Math ops: add, subtract, multiply, divide, log, sqrt")
                op = input("Operation: ").strip()
                cols = input("Columns (comma separated): ").strip().split(',')
                cols = [col.strip() for col in cols if col.strip()]
                scalar = None
                if op in ['add', 'subtract', 'multiply', 'divide']:
                    scalar = float(input("Scalar value: ").strip())
                result = analyzer.mathematical_operations(op, cols, scalar)
                print("Result preview:")
                print(result.head())
            elif c == '2':
                other_path = input("Enter path to other CSV to combine: ").strip()
                method = input("Method (concat/merge/join): ").strip().lower()
                if method == 'merge':
                    on = input("Enter column name to merge on: ").strip()
                    other = pd.read_csv(other_path)
                    analyzer.combine_data(other, how='merge', on=on)
                else:
                    other = pd.read_csv(other_path)
                    analyzer.combine_data(other, how=method)
            elif c == '3':
                col = input("Split by column name: ").strip()
                groups = analyzer.split_data(col)
                print(f"Groups: {list(groups.keys())[:10]} (showing up to 10 keys)")
            elif c == '4':
                sub = input("Choose operation: 1) search 2) filter range 3) sort\nEnter 1/2/3: ").strip()
                if sub == '1':
                    col = input("Column to search: ").strip()
                    val = input("Value to search for (exact match): ").strip()
                    try:
                        val_cast = pd.to_numeric(val)
                        results = analyzer.search(col, val_cast)
                    except:
                        results = analyzer.search(col, val)
                    print(results.head())
                elif sub == '2':
                    col = input("Column to filter range: ").strip()
                    low = input("Low (leave blank for no lower bound): ").strip()
                    high = input("High (leave blank for no upper bound): ").strip()
                    low_val = float(low) if low else None
                    high_val = float(high) if high else None
                    print(analyzer.filter_range(col, low_val, high_val).head())
                elif sub == '3':
                    col = input("Column to sort by: ").strip()
                    asc = input("Ascending? (y/n): ").strip().lower() == 'y'
                    print(analyzer.sort_by(col, ascending=asc).head())
            elif c == '5':
                gb = input("Group by column (leave blank to aggregate whole df): ").strip() or None
                print("Enter aggregation mapping, e.g., Sales:sum,Profit:mean")
                mapping_text = input("Agg map: ").strip()
                agg_map = {}
                for pair in mapping_text.split(','):
                    if ':' in pair:
                        k, v = pair.split(':', 1)
                        agg_map[k.strip()] = v.strip()
                print(analyzer.aggregate_functions(gb, agg_map))
            elif c == '6':
                idx = input("Pivot index: ").strip()
                cols = input("Pivot columns (leave blank if none): ").strip() or None
                vals = input("Values column: ").strip()
                agg = input("Aggfunc (sum/mean/count): ").strip() or 'sum'
                print(analyzer.create_pivot_table(idx, cols, vals, agg))
            elif c == '7':
                break
            else:
                print("Invalid choice.")
        except Exception as e:
            print("Error:", e)

def missing_data_menu(analyzer: SalesDataAnalyzer):
    while True:
        print("\n== Handle Missing Data ==")
        print("1. Display rows with missing values")
        print("2. Fill missing values with mean")
        print("3. Drop rows with missing values")
        print("4. Replace missing values with a specific value")
        print("5. Missing values summary")
        print("6. Back to main menu")
        choice = input("Enter your choice: ").strip()
        try:
            if choice == '1':
                rows = analyzer.rows_with_missing()
                print(rows if not rows.empty else "No rows with missing values.")
            elif choice == '2':
                cols = input("Columns to fill (comma separated, leave blank for all numeric): ").strip()
                cols_list = [c.strip() for c in cols.split(',')] if cols else None
                analyzer.fill_missing_with_mean(columns=cols_list)
            elif choice == '3':
                analyzer.drop_rows_with_missing()
            elif choice == '4':
                val = input("Enter replacement value: ")
                cols = input("Columns to replace (comma separated, leave blank for all): ").strip()
                cols_list = [c.strip() for c in cols.split(',')] if cols else None
                analyzer.replace_missing_with_value(val, columns=cols_list)
            elif choice == '5':
                print(analyzer.missing_summary())
            elif choice == '6':
                break
            else:
                print("Invalid choice.")
        except Exception as e:
            print("Error:", e)

def descriptive_stats_menu(analyzer: SalesDataAnalyzer):
    while True:
        print("\n== Generate Descriptive Statistics ==")
        print("1. Stat summary (describe)")
        print("2. Statistical analysis (mean/std/var/etc)")
        print("3. Back to main menu")
        choice = input("Enter your choice: ").strip()
        try:
            if choice == '1':
                print(analyzer.describe())
            elif choice == '2':
                cols = input("Columns for stats (comma sep, leave blank for numeric): ").strip()
                cols_list = [c.strip() for c in cols.split(',')] if cols else None
                print(analyzer.statistical_analysis(cols_list))
            elif choice == '3':
                break
            else:
                print("Invalid choice.")
        except Exception as e:
            print("Error:", e)

def visualization_menu(analyzer: SalesDataAnalyzer):
    while True:
        print("\n== Data Visualization ==")
        print("1. Bar Plot")
        print("2. Line Plot")
        print("3. Scatter Plot")
        print("4. Pie Chart")
        print("5. Histogram")
        print("6. Stack Plot (area)")
        print("7. Box Plot")
        print("8. Heatmap (correlation)")
        print("9. Back to main menu")
        choice = input("Enter your choice: ").strip()
        try:
            if choice == '1':
                x = input("Enter x-axis column name: ").strip()
                y = input("Enter y-axis column name (or leave blank for counts): ").strip() or None
                analyzer.visualize_data('bar', x=x, y=y, title=f"Bar of {x}")
            elif choice == '2':
                x = input("Enter x-axis column name: ").strip()
                y = input("Enter y-axis column name: ").strip()
                analyzer.visualize_data('line', x=x, y=y, title=f"Line {y} vs {x}")
            elif choice == '3':
                x = input("Enter x-axis column name: ").strip()
                y = input("Enter y-axis column name: ").strip()
                analyzer.visualize_data('scatter', x=x, y=y, title=f"Scatter {y} vs {x}")
            elif choice == '4':
                x = input("Enter column name to count for pie chart: ").strip()
                analyzer.visualize_data('pie', x=x, title=f"Pie of {x}")
            elif choice == '5':
                x = input("Enter column name for histogram (leave blank for numeric cols): ").strip() or None
                bins = int(input("Number of bins (default 10): ").strip() or 10)
                analyzer.visualize_data('hist', x=x, bins=bins, title=f"Histogram {x}")
            elif choice == '6':
                x = input("Enter x-axis index column name (e.g., Year): ").strip()
                by = input("Enter grouping column for stacked area (e.g., Region): ").strip()
                y = input("Enter values column to aggregate (e.g., Sales): ").strip()
                analyzer.visualize_data('stack', x=x, by=by, y=y, stacked=True, title=f"Stack Plot {y} by {by}")
            elif choice == '7':
                x = input("Column for box plot (leave blank for numeric columns): ").strip() or None
                analyzer.visualize_data('box', x=x, title=f"Boxplot {x if x else 'numeric cols'}")
            elif choice == '8':
                analyzer.visualize_data('heatmap', title="Correlation Heatmap")
            elif choice == '9':
                break
            else:
                print("Invalid choice.")
        except Exception as e:
            print("Error:", e)

def save_visual_menu(analyzer: SalesDataAnalyzer):
    filename = input("Enter file name to save the plot (e.g., scatter_plot.png): ").strip()
    analyzer.save_visualization(filename)

# -------------------------
# Main Program Flow
# -------------------------

def main():
    analyzer: Optional[SalesDataAnalyzer] = None
    while True:
        print_main_menu()
        choice = input("Enter your choice: ").strip()
        if choice == '1':
            path = input("Enter the path of the dataset (CSV file): ").strip()
            try:
                analyzer = SalesDataAnalyzer(path)
            except Exception as e:
                print("Failed to load dataset:", e)
        elif choice == '2':
            if not analyzer:
                print("Please load a dataset first.")
                continue
            explore_menu(analyzer)
        elif choice == '3':
            if not analyzer:
                print("Please load a dataset first.")
                continue
            df_operations_menu(analyzer)
        elif choice == '4':
            if not analyzer:
                print("Please load a dataset first.")
                continue
            missing_data_menu(analyzer)
        elif choice == '5':
            if not analyzer:
                print("Please load a dataset first.")
                continue
            descriptive_stats_menu(analyzer)
        elif choice == '6':
            if not analyzer:
                print("Please load a dataset first.")
                continue
            visualization_menu(analyzer)
        elif choice == '7':
            if not analyzer:
                print("Please load a dataset first.")
                continue
            save_visual_menu(analyzer)
        elif choice == '8':
            print("Exiting the program. Goodbye!")
            break
        else:
            print("Invalid choice. Please enter a number between 1 and 8.")

if __name__ == "__main__":
    main()
