In [18]:
import tkinter as tk
from tkinter import ttk, filedialog, simpledialog, messagebox
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
import seaborn as sns
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

In [26]:
class DataDashboard:
    def __init__(self, root):
        self.root = root
        self.root.title("Data Handling Dashboard")
        self.root.geometry("1400x800") 
        self.data = None
        
        

        # Main layout
        self.create_main_layout()

    def create_main_layout(self):
        # Main container with three sections
        main_container = tk.PanedWindow(self.root, orient=tk.HORIZONTAL)
        main_container.pack(fill=tk.BOTH, expand=True)

        # Left section: Control Panel
        control_frame = tk.Frame(main_container, width=300)
        main_container.add(control_frame)

        # Create control panel elements
        self.create_control_panel(control_frame)

        # Middle section: Data Display
        data_frame = tk.Frame(main_container, width=500)
        main_container.add(data_frame)

        # Create data display elements
        self.create_data_display(data_frame)

        # Right section: Analysis and Plots
        analysis_frame = tk.Frame(main_container, width=600)
        main_container.add(analysis_frame)

        # Create analysis and plot sections
        self.create_analysis_section(analysis_frame)

        # Error display area
        self.error_message = tk.StringVar(self.root)
        self.error_label = tk.Label(self.root, textvariable=self.error_message, fg="red", anchor="w")
        self.error_label.pack(side=tk.BOTTOM, fill=tk.X)

    def create_control_panel(self, parent):
        # Title for control panel
        tk.Label(parent, text="Control Panel", font=("Helvetica", 12, "bold")).pack(side=tk.TOP, pady=10)
        
        # Load Data Button
        tk.Button(parent, text="Load Data", command=self.load_data).pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Describe Data Button
        tk.Button(parent, text="Describe Data", command=self.describe_data).pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Reset Button
        tk.Button(parent, text="Reset", command=self.reset_screen).pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
    
        # Create a frame with a visual separator for data selection and visualization
        selection_frame = tk.LabelFrame(parent, text="Data Visualization", labelanchor="n", 
                                        font=("Helvetica", 10, "bold"), 
                                        borderwidth=2, 
                                        relief=tk.RIDGE)
        selection_frame.pack(side=tk.TOP, padx=10, pady=10, fill=tk.X)
    
        # Graph Selection
        tk.Label(selection_frame, text="Select Visualization:").pack(side=tk.TOP, padx=10, pady=(10,0))
        self.selected_graph = tk.StringVar(parent)
        graph_options = ["Histogram", "Pie Chart", "Correlation Matrix", "Cluster Map"]
        self.graph_dropdown = ttk.Combobox(selection_frame, textvariable=self.selected_graph, 
                                           values=graph_options, state="readonly")
        self.graph_dropdown.pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Column Selection
        tk.Label(selection_frame, text="Select Column (Optional):").pack(side=tk.TOP, padx=10, pady=(10,0))
        self.selected_column = tk.StringVar(parent)
        self.column_dropdown = ttk.Combobox(selection_frame, textvariable=self.selected_column, state="readonly")
        self.column_dropdown.pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Add trace to update column dropdown and clear selection
        def on_graph_select(*args):
            graph = self.selected_graph.get()
            
            # Clear column options if no data loaded
            if self.data is None:
                self.column_dropdown['values'] = []
                return
            
            # Update column dropdown based on graph selection
            if graph in ["Correlation Matrix", "Cluster Map"]:
                # Clear column selection for these graph types
                self.selected_column.set("")
                self.column_dropdown['state'] = 'disabled'
            else:
                # Enable column dropdown and populate with numeric columns
                self.column_dropdown['state'] = 'readonly'
                # Assuming you want only numeric columns for visualization
                numeric_columns = self.data.select_dtypes(include=['int64', 'float64']).columns.tolist()
                self.column_dropdown['values'] = numeric_columns
        
        # Trace the graph selection
        self.selected_graph.trace('w', on_graph_select)
        
        # Generate Plot Button
        tk.Button(selection_frame, text="Generate Plot", command=self.confirm_selection).pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Data Actions Frame
        action_frame = tk.LabelFrame(parent, text="Data Actions", labelanchor="n", 
                                     font=("Helvetica", 10, "bold"), 
                                     borderwidth=2, 
                                     relief=tk.RIDGE)
        action_frame.pack(side=tk.TOP, padx=10, pady=10, fill=tk.X)
        
        # Data Manipulation Dropdown
        tk.Label(action_frame, text="Data Manipulation:").pack(side=tk.TOP, padx=10, pady=(10,0))
        self.manipulation_type = tk.StringVar(parent)
        manipulation_options = ["dropna", "fillna"]
        self.manipulation_dropdown = ttk.Combobox(action_frame, 
                                                  textvariable=self.manipulation_type, 
                                                  values=manipulation_options, 
                                                  state="readonly")
        self.manipulation_dropdown.pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Column Selection for Manipulation
        tk.Label(action_frame, text="Select Column (Optional):").pack(side=tk.TOP, padx=10, pady=(10,0))
        self.manipulation_column = tk.StringVar(parent)
        self.manipulation_column_dropdown = ttk.Combobox(action_frame, 
                                                         textvariable=self.manipulation_column, 
                                                         state="readonly")
        self.manipulation_column_dropdown.pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Fill NA Value Entry (will be shown/hidden based on selection)
        self.fill_value_frame = tk.Frame(action_frame)  # Keep the frame for fill value entry fixed in place
        self.fill_value_frame.pack(side=tk.TOP, padx=10, pady=(10,0), fill=tk.X)
        
        tk.Label(self.fill_value_frame, text="Fill Value:").pack(side=tk.TOP, padx=10, pady=(10,0))
        self.fill_value = tk.StringVar(parent)
        self.fill_value_entry = tk.Entry(self.fill_value_frame, textvariable=self.fill_value)
        self.fill_value_entry.pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)

        
        # Manipulate Data Button
        tk.Button(action_frame, text="Execute Manipulation", command=self.execute_data_manipulation).pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)


    def create_data_display(self, parent):
        # Title for data display
        tk.Label(parent, text="Data Display", font=("Helvetica", 12, "bold")).pack(side=tk.TOP, pady=10)
    
        # Create a notebook (tabbed interface)
        self.data_notebook = ttk.Notebook(parent)
        self.data_notebook.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
    
        # Raw Data Tab
        raw_data_frame = tk.Frame(self.data_notebook)
        self.data_notebook.add(raw_data_frame, text="Raw Data")
    
        # Scrollbars for raw data
        y_scrollbar = ttk.Scrollbar(raw_data_frame, orient=tk.VERTICAL)
        y_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
    
        x_scrollbar = ttk.Scrollbar(raw_data_frame, orient=tk.HORIZONTAL)
        x_scrollbar.pack(side=tk.BOTTOM, fill=tk.X)
    
        # Treeview for Data Display
        self.data_frame = ttk.Treeview(raw_data_frame, 
                                       yscrollcommand=y_scrollbar.set, 
                                       xscrollcommand=x_scrollbar.set)
        self.data_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
    
        # Configure Scrollbars
        y_scrollbar.config(command=self.data_frame.yview)
        x_scrollbar.config(command=self.data_frame.xview)
    
        # Description Tab with Enhanced Scrollability
        desc_frame = tk.Frame(self.data_notebook)
        self.data_notebook.add(desc_frame, text="Description")
    
        # Create a frame to hold both text and scrollbar
        desc_content_frame = tk.Frame(desc_frame)
        desc_content_frame.pack(fill=tk.BOTH, expand=True)
    
        # Vertical Scrollbar
        desc_y_scrollbar = ttk.Scrollbar(desc_content_frame, orient=tk.VERTICAL)
        desc_y_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
    
        # Horizontal Scrollbar
        desc_x_scrollbar = ttk.Scrollbar(desc_content_frame, orient=tk.HORIZONTAL)
        desc_x_scrollbar.pack(side=tk.BOTTOM, fill=tk.X)
    
        # Text widget with both vertical and horizontal scrolling
        self.desc_text = tk.Text(desc_content_frame, 
                                 wrap=tk.NONE,  # Allow horizontal scrolling
                                 yscrollcommand=desc_y_scrollbar.set,
                                 xscrollcommand=desc_x_scrollbar.set)
        self.desc_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
    
        # Configure scrollbars
        desc_y_scrollbar.config(command=self.desc_text.yview)
        desc_x_scrollbar.config(command=self.desc_text.xview)
        
    def create_analysis_section(self, parent):
        # Title for analysis section
        tk.Label(parent, text="Analysis and Plots", font=("Helvetica", 12, "bold")).pack(side=tk.TOP, pady=10)
    
        # Create a notebook for plots
        self.plot_notebook = ttk.Notebook(parent)
        self.plot_notebook.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

    def describe_data(self):
        if self.data is not None:
            # Get the shape of the DataFrame
            data_shape = f"Shape of the DataFrame: {self.data.shape[0]} rows, {self.data.shape[1]} columns\n\n"
            
            # Get the data types of each column
            data_types = self.data.dtypes
            
            # Get the count of NaNs per column
            nan_count = self.data.isna().sum()
    
            # Create a table-like structure for the data types and NaN counts (transposed)
            table_header = "dfColumns:\t" + "\t".join(self.data.columns) + "\n"
            table_rows = [
                f"Data Type:\t" + "\t".join(data_types.astype(str)),
                f"NaN Count:\t" + "\t".join(nan_count.astype(str))
            ]
            
            table_content = "\n".join(table_rows) + "\n\n"
            
            # Get a more comprehensive description
            # Use to_string() with max_cols to ensure all columns are shown
            pd.set_option('display.max_columns', None)
            pd.set_option('display.width', None)
    
            # Combine description with shape, transposed table, and other stats
            desc_stats = data_shape + table_header + table_content + self.data.describe(include='all').to_string()
    
            # Clear and insert description
            self.desc_text.delete(1.0, tk.END)
            self.desc_text.insert(tk.END, desc_stats)
    
            # Reset pandas display options
            pd.reset_option('display.max_columns')
            pd.reset_option('display.width')
            
            
            # Switch to description tab
            self.data_notebook.select(1)
        else:
            self.error_message.set("Please load data first.")
   
   
    def load_data(self):
        file_path = filedialog.askopenfilename(filetypes=[("CSV Files", "*.csv"), ("Excel Files", "*.xlsx")])
        if file_path:
            try:
                # Read the data
                if file_path.endswith(".csv"):
                    self.data = pd.read_csv(file_path)
                elif file_path.endswith(".xlsx"):
                    self.data = pd.read_excel(file_path)
                
                # Update column dropdowns
                columns = list(self.data.columns)
                self.column_dropdown['values'] = columns
                self.manipulation_column_dropdown['values'] = columns
                
                # Display raw data
                self.display_dataframe()
                
                # Clear any existing plot tabs
                for tab in self.plot_notebook.winfo_children():
                    tab.destroy()
                
                self.error_message.set("Data loaded successfully.")
            except Exception as e:
                self.error_message.set(f"Error loading data: {str(e)}")

    def display_dataframe(self):
        
        # Clear existing items
        self.data_frame.delete(*self.data_frame.get_children())
        
        # Configure columns
        self.data_frame["columns"] = list(self.data.columns)
        self.data_frame["show"] = "headings"

        # Create column headings
        for column in self.data.columns:
            self.data_frame.heading(column, text=column)
        
        # Insert data
        for _, row in self.data.iterrows():
            self.data_frame.insert("", "end", values=list(row))

    def confirm_selection(self):
        # Check if data and selections are valid
        if self.data is None:
            self.error_message.set("Please load data first.")
            return
        
        column = self.selected_column.get()
        graph = self.selected_graph.get()
        
        if not graph:
            self.error_message.set("Please select a graph type.")
            return
        
        # Automatically clear column for Correlation Matrix and Cluster Map
        if graph in ["Correlation Matrix", "Cluster Map"]:
            self.selected_column.set("")  # Clear column 
            column = ""  # Ensure column is empty 
        
        # Require column selection for other graph types
        if graph not in ["Correlation Matrix", "Cluster Map"] and not column:
            self.error_message.set("Please select a column.")
            return
        
        # Clear existing plot tabs
        for tab in self.plot_notebook.winfo_children():
            tab.destroy()
        
        try:
            # Create a new plot tab
            plot_frame = tk.Frame(self.plot_notebook)
            self.plot_notebook.add(plot_frame, text=f"{graph} - {column if column else 'All Columns'}")
        
            # Generate appropriate plot
            if graph == "Histogram":
                self.plot_histogram(plot_frame, column)
            elif graph == "Pie Chart":
                self.plot_pie_chart(plot_frame, column)
            elif graph == "Correlation Matrix":
                self.plot_correlation_matrix(plot_frame)
            elif graph == "Cluster Map":
                self.plot_clustermap(plot_frame)
            
            # Switch to the new plot tab
            self.plot_notebook.select(plot_frame)
        except Exception as e:
            self.error_message.set(f"Error generating plot: {str(e)}")
        
    def plot_histogram(self, parent, column):
        fig = Figure(figsize=(6, 4))
        ax = fig.add_subplot(111)
        
        # Plot histogram
        n, bins, patches = ax.hist(self.data[column], bins=20, color="blue", alpha=0.7, density=True, edgecolor="black")
        
        # Add kernel density estimation (distribution curve)
        from scipy.stats import gaussian_kde
        kde = gaussian_kde(self.data[column])
        x_range = np.linspace(self.data[column].min(), self.data[column].max(), 100)
        ax.plot(x_range, kde(x_range), color='red', linewidth=2)
        
        ax.set_title(f"Histogram of {column} with Density Curve")
        ax.set_xlabel(column)
        ax.set_ylabel("Density")
        
        self.display_plot(parent, fig)

    def plot_pie_chart(self, parent, column):
        fig = Figure(figsize=(6, 4))
        ax = fig.add_subplot(111)
        self.data[column].value_counts().plot.pie(autopct="%1.1f%%", ax=ax)
        ax.set_title(f"Pie Chart of {column}")
        ax.set_ylabel("")  # Remove default ylabel
        self.display_plot(parent, fig)

    def plot_correlation_matrix(self, parent):
        fig = Figure(figsize=(6, 4))
        ax = fig.add_subplot(111)
        sns.heatmap(self.data.corr(), annot=True, cmap="magma", ax=ax)
        ax.set_title("Correlation Matrix")
        self.display_plot(parent, fig)

    def plot_clustermap(self, parent):
        # Create clustermap
        g = sns.clustermap(
            self.data.corr(), 
            annot=True, 
            cmap="magma", 
            standard_scale=1,
            figsize=(10, 8),
            cbar_kws={'label': 'Correlation'}
        )
        
        # Close the plot to prevent duplicate display in Jupyter
        plt.close()
        
        # Set title on the heatmap axis
        g.ax_heatmap.set_title("Cluster Map")
        
        # Display the plot
        self.display_plot(parent, g.fig)
        
    def execute_data_manipulation(self):
        """Data manipulation method."""
        if self.data is None:
            self.error_message.set("Please load data first.")
            return
    
        manipulation = self.manipulation_type.get()
        column = self.manipulation_column.get() or None
    
        try:
            # Store the current tab index before switching
            current_tab_index = self.data_notebook.index(self.data_notebook.select())
    
            if manipulation == "dropna":
                # Drop NA values
                if column:
                    self.data = self.data.dropna(subset=[column])  # Reassign result to self.data
                else:
                    self.data = self.data.dropna()  # Reassign result to self.data
                self.error_message.set(f"Missing values dropped{' in ' + column if column else ''}.")
            
            elif manipulation == "fillna":
                fill_value = self.fill_value.get()
                if not fill_value:
                    self.error_message.set("Please enter a fill value.")
                    return
                
                # Try to convert fill_value to appropriate type
                try:
                    if column:
                        # Convert to column's dtype if possible
                        col_type = self.data[column].dtype
                        if col_type == 'int64':
                            fill_value = int(fill_value)
                        elif col_type == 'float64':
                            fill_value = float(fill_value)
                        
                        self.data[column] = self.data[column].fillna(fill_value)  # Avoid inplace
                    else:
                        # Convert to numeric if possible, otherwise keep as string
                        try:
                            fill_value = float(fill_value)
                        except ValueError:
                            pass
                        self.data = self.data.fillna(fill_value)  # Avoid inplace
                    
                    self.error_message.set(f"Missing values filled{' in ' + column if column else ''}.")
                
                except ValueError:
                    self.error_message.set("Invalid fill value type.")
                    return
            
            # Refresh displays
            self.display_dataframe()
            self.column_dropdown['values'] = list(self.data.columns)
            self.manipulation_column_dropdown['values'] = list(self.data.columns)
    
            # Refresh the description section
            self.describe_data()  # This will update the description after the manipulation
            
            # Switch back to the original tab after the update
            self.data_notebook.select(current_tab_index)  # Restore the original tab

        except Exception as e:
            self.error_message.set(f"Error during manipulation: {str(e)}")
        
    def display_plot(self, parent, fig):
        """Displays a Matplotlib figure in the Tkinter window."""
        canvas = FigureCanvasTkAgg(fig, master=parent)
        canvas.draw()
        canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)
    
    def reset_screen(self):
        """Reset method."""
        # Clear data
        self.data = None
        
        # Clear data frame
        self.data_frame.delete(*self.data_frame.get_children())
        
        # Clear description
        self.desc_text.delete(1.0, tk.END)
        
        # Clear plot tabs
        for tab in self.plot_notebook.winfo_children():
            tab.destroy()
        
        # Reset dropdowns and selections
        self.selected_column.set('')
        self.column_dropdown['values'] = []
        self.column_dropdown.set('')
        
        self.selected_graph.set('')
        self.graph_dropdown.set('')
        
        # Reset manipulation dropdowns
        self.manipulation_type.set('')
        self.manipulation_dropdown.set('')
        self.manipulation_column.set('')
        self.manipulation_column_dropdown.set('')
        self.fill_value.set('')
               
        # Clear error message
        self.error_message.set("Screen reset.")


if __name__ == "__main__":
    root = tk.Tk()
    app = DataDashboard(root)
    root.mainloop()

In [None]:
    def create_control_panel(self, parent):
        # Title for control panel
        tk.Label(parent, text="Control Panel", font=("Helvetica", 12, "bold")).pack(side=tk.TOP, pady=10)
        
        # Load Data Button
        tk.Button(parent, text="Load Data", command=self.load_data).pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Describe Data Button
        tk.Button(parent, text="Describe Data", command=self.describe_data).pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Reset Button
        tk.Button(parent, text="Reset", command=self.reset_screen).pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
    
        # Create a frame with a visual separator for data selection and visualization
        selection_frame = tk.LabelFrame(parent, text="Data Visualization", labelanchor="n", 
                                        font=("Helvetica", 10, "bold"), 
                                        borderwidth=2, 
                                        relief=tk.RIDGE)
        selection_frame.pack(side=tk.TOP, padx=10, pady=10, fill=tk.X)
    
        # Graph Selection
        tk.Label(selection_frame, text="Select Visualization:").pack(side=tk.TOP, padx=10, pady=(10,0))
        self.selected_graph = tk.StringVar(parent)
        graph_options = ["Histogram", "Pie Chart", "Correlation Matrix", "Cluster Map"]
        self.graph_dropdown = ttk.Combobox(selection_frame, textvariable=self.selected_graph, 
                                           values=graph_options, state="readonly")
        self.graph_dropdown.pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Column Selection
        tk.Label(selection_frame, text="Select Column (Optional):").pack(side=tk.TOP, padx=10, pady=(10,0))
        self.selected_column = tk.StringVar(parent)
        self.column_dropdown = ttk.Combobox(selection_frame, textvariable=self.selected_column, state="readonly")
        self.column_dropdown.pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Add trace to update column dropdown and clear selection
        def on_graph_select(*args):
            graph = self.selected_graph.get()
            
            # Clear column options if no data loaded
            if self.data is None:
                self.column_dropdown['values'] = []
                return
            
            # Update column dropdown based on graph selection
            if graph in ["Correlation Matrix", "Cluster Map"]:
                # Clear column selection for these graph types
                self.selected_column.set("")
                self.column_dropdown['state'] = 'disabled'
            else:
                # Enable column dropdown and populate with numeric columns
                self.column_dropdown['state'] = 'readonly'
                # Assuming you want only numeric columns for visualization
                numeric_columns = self.data.select_dtypes(include=['int64', 'float64']).columns.tolist()
                self.column_dropdown['values'] = numeric_columns
        
        # Trace the graph selection
        self.selected_graph.trace('w', on_graph_select)
        
        # Generate Plot Button
        tk.Button(selection_frame, text="Generate Plot", command=self.confirm_selection).pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Data Actions Frame
        action_frame = tk.LabelFrame(parent, text="Data Actions", labelanchor="n", 
                                     font=("Helvetica", 10, "bold"), 
                                     borderwidth=2, 
                                     relief=tk.RIDGE)
        action_frame.pack(side=tk.TOP, padx=10, pady=10, fill=tk.X)
        
        # Data Manipulation Dropdown
        tk.Label(action_frame, text="Data Manipulation:").pack(side=tk.TOP, padx=10, pady=(10,0))
        self.manipulation_type = tk.StringVar(parent)
        manipulation_options = ["dropna", "fillna"]
        self.manipulation_dropdown = ttk.Combobox(action_frame, 
                                                  textvariable=self.manipulation_type, 
                                                  values=manipulation_options, 
                                                  state="readonly")
        self.manipulation_dropdown.pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Column Selection for Manipulation
        tk.Label(action_frame, text="Select Column (Optional):").pack(side=tk.TOP, padx=10, pady=(10,0))
        self.manipulation_column = tk.StringVar(parent)
        self.manipulation_column_dropdown = ttk.Combobox(action_frame, 
                                                         textvariable=self.manipulation_column, 
                                                         state="readonly")
        self.manipulation_column_dropdown.pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
        
        # Fill NA Value Entry (will be shown/hidden based on selection)
        self.fill_value_frame = tk.Frame(action_frame)  # Keep the frame for fill value entry fixed in place
        self.fill_value_frame.pack(side=tk.TOP, padx=10, pady=(10,0), fill=tk.X)
        
        tk.Label(self.fill_value_frame, text="Fill Value:").pack(side=tk.TOP, padx=10, pady=(10,0))
        self.fill_value = tk.StringVar(parent)
        self.fill_value_entry = tk.Entry(self.fill_value_frame, textvariable=self.fill_value)
        self.fill_value_entry.pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)

        
        # Manipulate Data Button
        tk.Button(action_frame, text="Execute Manipulation", command=self.execute_data_manipulation).pack(side=tk.TOP, padx=10, pady=5, fill=tk.X)
