In [None]:
from tqdm import tqdm
import pandas as pd
import logging
import os

# Set up logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class DataProcessor:
    
    dataframes = pd.DataFrame()
    
    def __init__(self, filenames):
        """Initialize and process the data files."""
        if dataframes := self.load_CSV(filenames):
            self.dataframes = self.preprocess(dataframes)

    @staticmethod
    def load_CSV(filenames):
        """Loads CSV files and returns a dictionary of DataFrames."""
        dataframes = {}
        for filename in filenames:
            try:
                data = pd.read_csv(filename)
                dataframes[filename] = data
            except FileNotFoundError:
                logger.error(f"File '{filename}' returned 'FileNotFoundError'")
        return dataframes
    
    def preprocess(self, dataframes):

        def is_valid_dataframe(df):
            return df is not None and not df.empty
        
        """Preprocesses the data (e.g., cleaning, formatting)."""
        tqdm.pandas()  # Enable tqdm for progress bars with pandas
        
        # Preprocess USER_LOG
        if is_valid_dataframe(user_log := dataframes.get('USER_LOG.csv')):
            
            user_log['Date'] = user_log['Date'].apply(lambda x: x.split()[0])
            
            user_log['DateTime'] = pd.to_datetime(
                user_log['Date'] + " " + user_log['Time'],
                format="%d/%m/%Y %H:%M:%S"
            ).dt.strftime("%d/%m/%Y %H:%M")

            user_log.drop(columns=['Date', 'Time'], inplace=True)

            if is_valid_dataframe(activity_log := dataframes.get('ACTIVITY_LOG.csv')):
        
                merged_data = self.merge_on_user_id(user_log, activity_log)
                
                logger.info("User activity has been merged.")
        
                return merged_data

            else:
                logger.info("There is no ACTIVITY_LOG.csv")

        else:
            logger.info("There is no USER_LOG.csv")

    
    def merge_on_user_id(self, user_log, activity_log):
        """Merges user_log and activity_log assuming row-wise alignment on User_ID, skipping unwanted components."""
        
        # Rename columns for consistency
        for df in [user_log, activity_log]:
            df.rename(columns={'User Full Name *Anonymized': 'User_ID'}, inplace=True)
    
        # Initialize an empty DataFrame for results
        merged_data = pd.DataFrame()
    
        # Initialize a progress bar
        with tqdm(total=len(activity_log), desc="Merging on User_ID", unit="row") as pbar:
            
            for i, activity_row in activity_log.iterrows():
                
                '''# Skip rows with unwanted components
                if activity_row['Component'] in ['System', 'Folder']:
                    pbar.update(1)
                    continue''' # This has been moved to the QueryGUI application
    
                user_row = user_log.iloc[i].to_dict()

                if user_row['User_ID'] != activity_row['User_ID']: # Log any mismatches
                    logger.error(f"Mismatch in User_ID for row {i}: "
                                 f"user_log['User_ID']={user_row['User_ID']} "
                                 f"activity_log['User_ID']={activity_row['User_ID']}")
                
                merged_row = {**user_row, **activity_row.to_dict()}

                # Preliminary cleaning - ensure naming consistency
                for column in ['Component', 'Action', 'Target']:
                    merged_row[column] = merged_row[column].strip().title()
                    if merged_row[column] == "":
                        logger.info(f"Row {merged_row} of 'ACTIVITY_LOG.csv' has no valid {column} value")
                        break
                else:
                    merged_data = pd.concat([merged_data, pd.DataFrame([merged_row])], ignore_index=True)
    
                # Update the progress bar
                pbar.update(1)
    
        return merged_data


FileNames = ['ACTIVITY_LOG.csv', 'USER_LOG.csv', 'COMPONENT_CODES.csv']
dataframes = DataProcessor(FileNames).dataframes
print(f"Saving DataFrame with {len(dataframes)} rows to CSV.")
dataframes.to_csv('dataframes.csv', index=False)


In [None]:
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
from sqlalchemy.orm import declarative_base, relationship

# Base for SQLAlchemy
Base = declarative_base()

class User(Base):
    __tablename__ = 'users'
    id = Column(Integer, primary_key=True)
    user_id = Column(Integer, unique=True, nullable=False)
    activities = relationship('UserActivity', back_populates='user')
    # user.activities  # Returns all UserActivity records for a given User

class Action(Base):
    __tablename__ = 'actions'
    id = Column(Integer, primary_key=True)
    name = Column(String, unique=True, nullable=False)
    activities = relationship('UserActivity', back_populates='action')

class Target(Base):
    __tablename__ = 'targets'
    id = Column(Integer, primary_key=True)
    name = Column(String, unique=True, nullable=False)
    activities = relationship('UserActivity', back_populates='target')

class Component(Base):
    __tablename__ = 'components'
    id = Column(Integer, primary_key=True)
    name = Column(String, unique=True, nullable=False)
    activities = relationship('UserActivity', back_populates='component')

class UserActivity(Base):
    __tablename__ = 'user_activities'
    id = Column(Integer, primary_key=True)
    datetime = Column(DateTime, nullable=False)
    user_id = Column(Integer, ForeignKey('users.id', ondelete='CASCADE'), nullable=False)
    component_id = Column(Integer, ForeignKey('components.id', ondelete='CASCADE'), nullable=False)
    action_id = Column(Integer, ForeignKey('actions.id', ondelete='CASCADE'), nullable=False)
    target_id = Column(Integer, ForeignKey('targets.id', ondelete='CASCADE'), nullable=False)

    user = relationship('User', back_populates='activities')
    # user_activity.user will return the User record for a given UserActivity record
    component = relationship('Component', back_populates='activities')
    action = relationship('Action', back_populates='activities')
    target = relationship('Target', back_populates='activities')

########################################################################################################################################################


In [None]:

from tqdm import tqdm
import pandas as pd
import os

from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

class InsertionTool:
    def __init__(self, dataframe: pd.DataFrame(), db_path='user_activities.db'):
        """Initialize the database and insert normalized data."""
        try:
            if Base:
                self.engine = create_engine(f'sqlite:///{db_path}')
                Base.metadata.create_all(self.engine)
                self.session = sessionmaker(bind=self.engine)()
                self.populate(dataframe)
                self.close()
        except Exception as e:
            print("There was an error connecting to the database: ", e)
            if "Base" in str(e):
                print("You need to run the cell which defines and establishes the schema") 

    def close(self):
        """Close the session and dispose of the engine."""
        self.session.close()  # Close the session
        self.engine.dispose()  # Dispose of the engine to release the connection
        print("Session and engine closed.")
    
    def _add_or_return_row(self, model, field, value):
        """Retrieve or create an entry in the database."""
        entry = self.session.query(model).filter_by(**{field: value}).first()
        if not entry:
            entry = model(**{field: value})
            self.session.add(entry)
            self.session.commit()
        return entry.id

    def populate(self, dataframe):
        """Insert data into normalized tables."""
        try:
            for _, row in tqdm(dataframe.iterrows(), total=len(dataframe), desc="Inserting Rows", unit="row"):
                user_id = self._add_or_return_row(User, 'user_id', row['User_ID'])
                component_id = self._add_or_return_row(Component, 'name', row['Component'])
                action_id = self._add_or_return_row(Action, 'name', row['Action'])
                target_id = self._add_or_return_row(Target, 'name', row['Target']) if pd.notna(row['Target']) else None
        
                activity = UserActivity(
                    user_id=user_id,
                    datetime=pd.to_datetime(row['DateTime'], format="%d/%m/%Y %H:%M"),
                    component_id=component_id,
                    action_id=action_id,
                    target_id=target_id
                )
                self.session.add(activity)
        
            self.session.commit()
            print("Data inserted successfully!")
            
        except Exception as e:
            # Roll back any uncommitted changes in the session
            self.session.rollback()
            print(f"An error occurred: {e}")
            # Call self.close() to clean up resources
            self.close()


def remove(path):
    if os.path.exists(path):
        os.remove(path)
        print(f"{path} removed.")


db_path = 'user_activities.db'
dataframe = pd.read_csv('dataframes.csv')
remove(db_path)
# sequential #
InsertionTool(dataframe, db_path)




This implementation, where only IDs are stored in the UserActivities table and names are kept in separate reference tables (Component, Action,  Target), prevents **data inconsistency**. If a name changes (e.g., renaming a Component), it only needs to be updated in the reference table, and all related rows in UserActivities will reflect this change through the foreign key relationship. This eliminates the need to update multiple rows in UserActivities, avoids potential mismatches, and ensures data consistency across the database. It also simplifies maintenance and reduces the risk of inconsistencies.

In [None]:
# Standard library 
import itertools
import colorsys
import tkinter as tk
from functools import partial
from tkinter import messagebox, ttk
from typing import List, Optional, Dict, Any
# Third-party
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.colors import CSS4_COLORS
import numpy as np
import pandas as pd
from sqlalchemy import create_engine, func, extract
from sqlalchemy.orm import sessionmaker

class QueryGUI:
    def __init__(self, db_path='user_activities.db'):
        """Initialize the QueryGUI class with the given database."""
        try:
            self.engine = create_engine(f'sqlite:///{db_path}')
            Base.metadata.create_all(self.engine)
            self.session = sessionmaker(bind=self.engine)()

            self.delete_components(("Folder","System"))
    
            # Initialize the main GUI
            self.root = tk.Tk()
            self.root.title("QueryGUI")
            self.df = self.fetch_data()
            
            # Add main buttons
            self.assign_plotting_cache()
            self.init_gui()
    
            # Run main GUI loop
            self.root.mainloop()
            
        except Exception as e:
            print("There was an error connecting to the database: ", e)
            if "Base" in str(e):
                print("You need to run the cell which defines and establishes the schema")

    def delete_components(self, names=("Folder", "System")):
        """Delete components and let cascading deletes handle UserActivity records."""
        try:
            if records := self.session.query(Component).filter(Component.name.in_(names)).all(): # represents the results all()
                print(records)
                records.delete(synchronize_session=False)
                self.session.commit()
                messagebox.showwarning("Components Removed", f"Components {names} have been removed from the database.")
        except SQLAlchemyError as e:
            self.session.rollback()  # Rollback the transaction
            print(f"Error deleting components: {e}")

    def fetch_data(self):
        """Fetch data from the database."""
        return pd.DataFrame(self.query_and_return(), columns=["datetime", "user_id", "component", "action", "target"])

    def data(self):
        '''Creates a copy of a cached dataframe'''
        return self.df.copy()
    
    def query_and_return(self):
        """Fetch the base query for user activity data."""
        query = (
            self.session.query(
                UserActivity.datetime,
                User.user_id.label("user_id"),
                Component.name.label("component"),
                Action.name.label("action"),
                Target.name.label("target")
            )
            .join(User, User.id == UserActivity.user_id)
            .join(Component, Component.id == UserActivity.component_id)
            .join(Action, Action.id == UserActivity.action_id)
            .join(Target, Target.id == UserActivity.target_id, isouter=True)
        )
        return query

    def fetch_pivotted_data(self, granularity='month'):

        query = self.query_and_return() # queries user activity rows with joins on dimensions / returns the query
        query = query.add_columns(func.count(UserActivity.id).label("interactions"))
        
        # Add datetime granularity-based grouping
        if granularity in ['day', 'month']:
            query = query.add_columns(
                extract(granularity, func.datetime(UserActivity.datetime)).label(granularity)
            )
            query = query.group_by(
                User.user_id,
                Component.name,
                extract(granularity, func.datetime(UserActivity.datetime))
            )
        else:
            query = query.group_by(
                User.user_id,
                Component.name
            )
        
        # Execute and fetch results
        results = query.all()
        reordered_results = [
            (row[1], row[2], row[5], row[0])  # Reorder as user_id, component, interactions, datetime
            for row in results
        ]
        
        # Define and reorder the columns
        columns = ["user_id", "component", "interactions", "datetime"]
    
        # Convert results into a Pandas DataFrame
        data = pd.DataFrame(reordered_results, columns=columns)
        if granularity == 'month':
            data['datetime'] = data['datetime'].dt.strftime('%b') # Month names truncated
            
        print(data)

    def create_button(self, container, text, command, padx=5, pady=5, side=None):
        """Create a button on the GUI interface with custom parameters."""
        button = ttk.Button(container, text=text, command=command)
        button.pack(side=side, padx=padx, pady=pady)
        return button

    def init_gui(self):
        """Initialize the main GUI interface."""

        buttons = [
            ("Fetch Pivotted Query", self.fetch_pivotted_data),
            ("View Pivot Table", self.draw_pivotted),
            ("View Statistics", self.view_statistics),
            ("View Stacked Bar Graph", self.stacked_bar_graph),
            ("View Correlation", self.view_correlation),
            ("Exit", self.root.destroy)
        ]

        for text, command in buttons:
            self.create_button(self.root, text, command)
    
    @staticmethod
    def create_frame(container, height=300, width=400, scrollable=False, vertical=True, title=None):
        """Creates a frame with optional scrollbars and a title."""
        frame = ttk.Frame(container)
        frame.pack(fill=tk.BOTH, expand=True)
        (frame, scrollable_region) = QueryGUI.scrolling_window(frame, height, width, vertical) if scrollable else (frame, None)
        # If vertical is False, the scrollbar will be horizontal
        if title: 
            QueryGUI.create_label(frame, title.upper(), size=11, padding=(10, 10))

        return (frame, scrollable_region) if scrollable else frame

    @staticmethod
    def scrolling_window(frame, height, width, vertical=True):
        """
        Add a scrollable region to a canvas within a frame with optional horizontal and vertical scrollbars.
        """
        canvas = tk.Canvas(frame, height=height, width=width)
        
        contents = ttk.Frame(canvas)
        canvas.create_window((0, 0), window=contents, anchor="nw")

        def update_scrollable_region():
            contents.update_idletasks()
            canvas.configure(scrollregion=canvas.bbox("all"))

        if vertical:
            scrollbar = ttk.Scrollbar(frame, orient=tk.VERTICAL, command=canvas.yview)
            canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
            scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
            canvas.configure(yscrollcommand=scrollbar.set)

        else:
            scrollbar = tk.Scrollbar(frame, orient=tk.HORIZONTAL, command=canvas.xview)
            canvas.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
            scrollbar.pack(side=tk.BOTTOM, fill=tk.X)
            canvas.configure(xscrollcommand=scrollbar.set)

        return contents, update_scrollable_region

    @staticmethod
    def create_label(container, text, size=11, anchor="w", padding=(10, 0), fill=tk.X):
        """Create and pack a labeled widget."""
        label = ttk.Label(container, text=text, font=("Arial", size, "bold"), anchor=anchor)
        label.pack(fill=fill, padx=padding[0], pady=padding[1])
        return label
    
    def draw_pivotted(self):
        """Display a cascading view of user interactions for each of the components."""
        try:
            data = self.data()
            data['YearMonth'] = pd.to_datetime(data['datetime'], format="%d/%m/%Y %H:%M").dt.to_period('M') # For aggregating dates by Month
            grouped_data = (
                data.groupby(['component', 'user_id', 'YearMonth'])
                .size() # Sums the number of unique interactions represented by each group
                .reset_index(name='interactions')
                .sort_values(by=['component', 'user_id', 'YearMonth'])
            )
            grouped_data['YearMonth'] = grouped_data['YearMonth'].dt.strftime("%b-%y") # %b abbreviates the Month name

            window = tk.Toplevel(self.root)
            window.title("Cascading User Interactions")

            # create a scrollable frame for the entire interface
            frame, update_scrollable_region = self.create_frame(
                window, height=600, width=500, scrollable=True
            )

            colors = [CSS4_COLORS[color] for color in ['steelblue', 'seagreen', 'saddlebrown']]
            color_cycle = itertools.cycle(colors)

            for name in grouped_data['component'].unique():

                records = grouped_data[grouped_data['component'] == name] 
                subframe = self.create_frame(frame, 200, 200, title=name) # create a container frame within the scrolling contents frame

                treeview = self.create_treeview( # create a new empty treeview
                    frame=subframe,
                    columns=["User_ID", "Year-Month", "Interactions"],
                    height=200
                )
                colorhex = next(color_cycle)
                self.populate_treeview(treeview, records, colorhex) # populate the treeview and color for aesthetics and readability
                self.create_searchbox(subframe, treeview)
                subframe.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

            update_scrollable_region() # update the scrollable region once we have added each component's frame and its treeview

        except Exception as e:
            messagebox.showerror("Error", f"An error occurred: {str(e)}")

    @staticmethod
    def brightness(hexcode, factor):
        """ A helper method for changing the brightness of a color by hex code"""
        rgb = tuple(int(hexcode.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
        hls = colorsys.rgb_to_hls(*(x/255.0 for x in rgb))
        l = max(0, min(1, hls[1] * factor))
        rgb_new = colorsys.hls_to_rgb(hls[0], l, hls[2])
        return '#{:02x}{:02x}{:02x}'.format(*(int(x*255) for x in rgb_new))

    def create_treeview(self, frame, columns, height):
        """Create a Treeview with fixed headers and rows in a scrollable frame."""
        # Create a frame for the Treeview
        frame = self.create_frame(frame, height)

        tree = ttk.Treeview(frame, columns=columns, show="headings")
        tree.pack(fill=tk.BOTH, expand=True)

        for col in columns:
            tree.heading(col, text=col)
            tree.column(col, width=100, anchor="w")

        return tree

    def populate_treeview(self, tree : ttk.Treeview, data, colorhex="#d3d3d3"):
        """Populate the Treeview with data and apply dynamic row colors."""

        tree.tag_configure("evenrow", background=colorhex) # alternate row colours for readability
        tree.tag_configure("oddrow", background=self.brightness(colorhex, 1.2))

        for i, user_id in enumerate(data["user_id"].unique()):
            # Row color tag based on user index (i)
            user_data = data[data["user_id"] == user_id].reset_index(drop=True)

            for j, (_, row) in enumerate(user_data.iterrows()):
                tag = "evenrow" if i % 2 == 0 else "oddrow"
                tree.insert(
                    "", # an empty string to create top-level item
                    "end", # specify where the new item will go
                    values=(
                        f"User {user_id}" if j == 0 else "", # "User X" on row 0 of entries for User X
                        str(row["YearMonth"]),
                        row["interactions"], # sum of User unique interactions within YearMonth
                    ),
                    tags=(tag,), # unique identifier for the row with color metadata
                )

    def create_searchbox(self, frame: ttk.Frame, treeview: ttk.Treeview):
        """Create a simple search box that directly passes parameters to the callback."""
        QueryGUI.create_label(frame, "User ID: ", size=10, padding=(10, 10))
        search_box = tk.Entry(frame)
        search_box.pack(side=tk.LEFT, padx=10, pady=10)
        search_box.bind("<KeyRelease>", lambda on_pressed: self.search_treeview(on_pressed, treeview))
        return search_box
    
    def search_treeview(self, search: tk.Event, treeview: ttk.Treeview):
        """Search and highlight all rows matching the User_ID, and un-highlight on mouse movement."""

        if user_search := search.widget.get():

            if not user_search.isdigit():
                return messagebox.showwarning("Invalid ID", "User ID needs to be an integer type.")
    
            user_id = int(user_search)
            matching_rows = []
    
            def clear_selection_on_mouse(event=None):
                """Clear selection and unbind the mouse."""
                treeview.selection_remove(treeview.selection())
                treeview.unbind("<Motion>")
    
            selection = False
    
            for child in treeview.get_children():
    
                if values := treeview.item(child, "values"):
    
                    if str(user_id) in values[0]:
                        selection = True
    
                    elif selection and "User" in values[0]:
                        if child: 
                            treeview.see(child)  
                            treeview.focus(child)
                        break
    
                if selection:
                    matching_rows.append(child)
    
            treeview.selection_set(matching_rows) # highlights all matching rows
            treeview.bind("<Motion>", clear_selection_on_mouse) # binds mouse to clear selection on move

    @staticmethod
    def create_textbox(frame: ttk.Frame, content: str) -> None:
        """Helper to create and pack a text widget."""
        textbox = tk.Text(frame, wrap="word", height=min(len(content.split("\n")) + 2, 30), width=35)
        textbox.insert("1.0", content) 
        textbox.pack(padx=10, pady=5) 

    @staticmethod
    def calculate_stats(data: pd.DataFrame, grouped_by: List[str], separate_by: Optional[str]) -> pd.DataFrame:
        grouped = data.groupby(grouped_by).size().reset_index(name='count')
        agg_funcs = {
            'Mean': ('count', lambda x: round(x.mean())),
            'Median': ('count', lambda x: round(x.median())),
            'Mode': ('count', lambda x: round(x.mode().iloc[0]) if not x.mode().empty else None),
        }
        group_columns = ['component'] + ([separate_by] if separate_by else [])
        return grouped.groupby(group_columns).agg(**agg_funcs).reset_index()

    def view_statistics(self):
        """Fetch data, compute statistics, and display them in a consolidated view."""
        try:
            data = self.data()
            window = tk.Toplevel(self.root)
            # Transform 'datetime' string values into Pandas datetime objects and truncate to monthly periods
            data['YearMonth'] = pd.to_datetime(data['datetime'], format="%d/%m/%Y %H:%M").dt.to_period('M')

            # Specify how the data should be grouped and displayed.
            stats_configs = [
                {"heading": "Monthly Statistics", "grouped_by": ['component', 'YearMonth', 'user_id'], "period": "YearMonth"},
                {"heading": "Semester Statistics", "grouped_by": ['component', 'user_id'], "period": None},
            ]

            def display_stats(window: tk.Toplevel, heading: str, stats: pd.DataFrame, period: Optional[str]) -> None:
                frame, update_scroll_region = QueryGUI.create_frame(window, 600, 500, scrollable=True, title=heading)

                def textbox(data: pd.DataFrame):
                    QueryGUI.create_textbox(frame, data.to_string(index=False, header=True))

                if period:
                    for month, group_data in stats.groupby(period):
                        QueryGUI.create_label(frame, month.strftime("%B"))
                        textbox(group_data.drop(columns=[period]))
                else: 
                    textbox(stats) # Aggregate stats across the whole timeframe

                update_scroll_region()

            for config in stats_configs:
                stats = QueryGUI.calculate_stats(data, config['grouped_by'], config['period'])
                display_stats(window, config['heading'], stats, config['period'])

        except Exception as e:
            messagebox.showerror("Error", f"An error occurred: {str(e)}")

    def assign_plotting_cache(self):
        self.graph_by = "Total" # starting value for sorting plots
        self.graph_width = None

    def stacked_bar_graph(self):
        """Display a dynamically sized stacked bar chart with sorting options and horizontal scrolling."""
        data = self.data()

        components = ['Assignment', 'Quiz', 'Lecture', 'Book', 'Project', 'Course']

        hexcolors = [CSS4_COLORS[color] for color in ['steelblue', 'darkorange', 'forestgreen', 'crimson', 'mediumpurple', 'saddlebrown']]
        hexcolors = dict(zip(components, hexcolors))

        interaction_counts = data.groupby(['user_id', 'component']).size().reset_index(name='interaction_counts')
        interaction_counts = interaction_counts[interaction_counts['component'].isin(components)] # Filter as per requirements and to avoid overloading graph memory

        draw_pivotted = interaction_counts.pivot(index='user_id', columns='component', values='interaction_counts').fillna(0)

        users = len(draw_pivotted.index)
        self.graph_width = max(10, users * 0.5) # dynamic width for the graph based on the number of users

        window = tk.Toplevel(self.root)
        window.title("(Stacked Bar Graph)")

        frame, update_scroll_region = QueryGUI.create_frame(window, width=1200, height=600, scrollable=True, vertical=False)

        button_frame = ttk.Frame(window)
        button_frame.pack(side="top", fill="x", padx=10, pady=5)
    
        # Sorting order dropdown
        order_tag = tk.StringVar(value="Descending") 

        order_dropdown = self.create_dropdown(button_frame, "Order by:", order_tag, options=["Descending", "Ascending"])

        order_dropdown.bind(
            "<<ComboboxSelected>>",
            lambda click: self.update_bargraph(
                draw_pivotted,
                frame,
                components,
                hexcolors,
                order_tag,
            )
        )

        order_dropdown.pack(side="left", padx=5)

        # By Total Interactions
        self.create_button(button_frame, "Order by Total Interactions",
            lambda: self.update_bargraph(draw_pivotted, frame, components, hexcolors, order_tag, graph_by="Total"))
            
        # By individual components
        for name in components:
            self.create_button(button_frame, f"Order by {name}",
                partial(self.update_bargraph, draw_pivotted, frame, components, hexcolors, order_tag, graph_by=name),
                side="left")

        self.update_bargraph(draw_pivotted, frame, components, hexcolors, order_tag)

        update_scroll_region()
    
    def update_bargraph(self, data: pd.DataFrame, frame, components, hexcolors, order_tag, graph_by=None):
        """Update the stacked bars with sorting."""

        if not graph_by:
               graph_by = self.graph_by

        else:
            self.graph_by = graph_by # Assuming "Total" is the load value

        is_ascending = (order_tag.get() == "Ascending")

        if self.graph_by == "Total":
            if "Total" not in data.columns:
                data = data.assign(Total=data.sum(axis=1))
            data = data.sort_values(by="Total", ascending=is_ascending)
        else:
            data = data.sort_values(by=self.graph_by, ascending=is_ascending)
            components = [self.graph_by] + [comp for comp in components if comp != self.graph_by]
    
        for widget in frame.winfo_children():
            widget.destroy()
    
        fig, ax = plt.subplots(figsize=(self.graph_width, 6))
        bottom = np.zeros(len(data))
    
        # Create x-axis positions for bars
        x_positions = np.arange(len(data))

        # Charting each component's interaction counts
        for column in components:
            if column in data.columns:
                counts = data[column].values
                ax.bar(
                    x_positions,
                    counts,
                    bottom=bottom,
                    label=column,
                    color=hexcolors[column]
                )
                bottom += counts
    
        # Customize the chart
        ax.set_title("User Interactions", loc="left")
        ax.set_xlabel("User IDs")
        ax.set_ylabel("Interactions")
        ax.legend(title="Components", loc="center left")
        ax.set_xticks(x_positions)
        ax.set_xticklabels(data.index, ha="right", rotation=0)
    
        # Add hover functionality
        self.add_hover_annotations(fig, ax, [ax.patches[i::len(components)] for i in range(len(components))])
    
        plt.tight_layout()
    
        # Embed the chart in the tkinter canvas
        canvas = FigureCanvasTkAgg(fig, master=frame)
        canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
        canvas.draw()
    
    def add_hover_annotations(self, fig, ax, bars):
        """Add hover annotations with caching for performance."""
        annotations_dict = {}

        def on_hover(event):
            if event.inaxes != ax:
                for ann in annotations_dict.values():
                    ann.set_visible(False)
                fig.canvas.draw_idle()
                return

            for bar_group in bars:
                for rect in bar_group:
                    if rect.contains(event)[0]:
                        if rect not in annotations_dict:
                            annotations_dict[rect] = ax.annotate(
                                f"{rect.get_height()}",
                                xy=(rect.get_x() + rect.get_width() / 2, rect.get_y() + rect.get_height()),
                                xytext=(15, -15),
                                textcoords="offset points",
                                ha="center",
                                va="bottom",
                                fontsize=8,
                                bbox=dict(boxstyle="round,pad=0.3", edgecolor="black", facecolor="white")
                            )
                        for ann in annotations_dict.values():
                            ann.set_visible(False)
                        annotations_dict[rect].set_visible(True)
                        fig.canvas.draw_idle()
                        return

        fig.canvas.mpl_connect("motion_notify_event", on_hover)

    def create_correlation_window(self):
        """Create and configure the main window, frame, UI components, and canvas for correlation analysis."""
        # Create main window and frame
        window = tk.Toplevel(self.root)
        window.title("Correlation Analysis")
        window.geometry("800x900")
        frame = ttk.Frame(window)
        frame.pack(side=tk.TOP, fill=tk.X, padx=10, pady=10)
    
        # Create UI components
        stats_label = self.create_stats_frame(window, "Correlation Statistics")
        button = self.create_button(frame, "ENTER", None, side="left")
        user1_box = self.create_labeled_entry(frame, "User ID 1:")
        user2_box = self.create_labeled_entry(frame, "User ID 2:")
    
        # Create canvas for Matplotlib
        canvas_frame = ttk.Frame(window)
        canvas_frame.pack(side=tk.TOP, fill=tk.BOTH, expand=True, padx=10, pady=10)
        fig, ax = plt.subplots(figsize=(8, 6))
        canvas = FigureCanvasTkAgg(fig, master=canvas_frame)
        canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
        canvas.draw()
    
        return window, frame, stats_label, user1_box, user2_box, fig, ax, canvas, button
    
    def view_correlation(self):
        """Opens a new window for correlation analysis with live updates to keyed inputs."""
        data = self.data()
    
        # Create the correlation analysis window and UI components 
        window, frame, stats_label, user1_box, user2_box, fig, ax, canvas, button = self.create_correlation_window()

        # Create dropdown menu for components
        components = [row[0] for row in self.session.query(Component.name).distinct().all()]
        component_var = tk.StringVar(value=components[0])
        self.create_dropdown(frame, "Component:", component_var, components)
    
        # Create granularity slider and label
        granularity_slider, granularity = self.create_granularity_slider(frame)
        
        # Process data
        data["YearMonth"] = pd.to_datetime(data["datetime"], format="%d/%m/%Y %H:%M")
    
        # Bind inputs to update graph
        update_graph_partial = partial(self.update_graph, data, user1_box, user2_box, component_var, granularity_slider, stats_label, ax, canvas)

        button.config(command=update_graph_partial)
        
        for box in [user1_box, user2_box]:
            box.bind("<Key>", lambda e: update_graph_partial())
            
        component_var.trace("w", lambda *args: update_graph_partial())
        
        granularity_slider.config(command=lambda value: self.update_granularity_label(value, granularity, update_graph_partial))
    
        # Initialize defaults and update graph
        granularity_slider.set(1)
        update_graph_partial()

    def create_stats_frame(self, parent, title):
        """Create a frame for displaying statistics."""
        stats_frame = ttk.LabelFrame(parent, text=title)
        stats_frame.pack(side=tk.TOP, fill=tk.X, padx=10, pady=5)
        stats_label = tk.Label(stats_frame, text="", font=("Arial", 10), justify=tk.LEFT)
        stats_label.pack(side=tk.LEFT, padx=5, pady=5)
        return stats_label
    
    def create_dropdown(self, parent, label_text, variable, options):
        """Create a labeled dropdown menu."""
        label = tk.Label(parent, text=label_text, font=("Arial", 10))
        label.pack(side=tk.LEFT, padx=5, pady=5)
        dropdown = ttk.Combobox(parent, textvariable=variable, values=options, state="readonly")
        dropdown.pack(side=tk.LEFT, padx=5, pady=5)
        return dropdown
    
    def create_granularity_slider(self, parent):
        """Create a granularity slider with dynamic label."""
        granularity = tk.StringVar(value="Month")
        granularity_display = tk.Label(parent, textvariable=granularity, font=("Arial", 10))
        granularity_display.pack(side=tk.LEFT, padx=5, pady=5)
    
        granularity_slider = tk.Scale(
            parent, from_=1, to=4, orient=tk.HORIZONTAL, length=200,
            label="Granularity"
        )
        granularity_slider.pack(side=tk.LEFT, padx=5, pady=5)
        return granularity_slider, granularity

    def create_labeled_entry(self, parent, label_text):
        """Helper method to create a labeled Entry widget."""
        label = tk.Label(parent, text=label_text, font=("Arial", 10))
        label.pack(side=tk.LEFT, padx=5, pady=5)
        entry = tk.Entry(parent)
        entry.pack(side=tk.LEFT, padx=5, pady=5)
        return entry
    
    def update_granularity_label(self, value, label_text_var, update_graph_func):
        """Update the granularity label and refresh the graph."""
        granularity_map = {1: "Month", 2: "Half-Month", 3: "Week", 4: "Day"}
        label_text_var.set(granularity_map[int(float(value))])
        update_graph_func()

    def build_scattergraph(self, ax, data, user_id, component, color, label_prefix, granularity_level):
        """Plots a user's data and returns correlation stats."""
        # Filter and preprocess data
        filtered_data = data[(data["user_id"] == user_id) & (data["component"] == component)].copy()
    
        if filtered_data.empty:
            return None, None  # Return None for insufficient data
    
        # Map granularity level to period
        if granularity_level == 2:  # Half-Month
            filtered_data['Period'] = filtered_data['YearMonth'].apply(
                lambda x: f"{x.strftime('%b %Y')} {'1-15' if x.day <= 15 else '16-31'}"
            )
        else:
            period = {1: 'M', 3: 'W', 4: 'D'}.get(granularity_level, 'M')
            filtered_data['Period'] = filtered_data['YearMonth'].dt.to_period(period).astype(str)
    
        grouped_data = filtered_data.groupby("Period").size().reset_index(name="Count")
        grouped_data["PeriodNumeric"] = range(len(grouped_data))
        x = grouped_data["PeriodNumeric"].to_numpy()
        y = grouped_data["Count"].to_numpy()
    
        # Handle insufficient data
        if len(x) < 2 or len(np.unique(y)) < 2:
            return None, None  # Return None for insufficient data
    
        # Calculate trend line
        slope, intercept = np.polyfit(x, y, 1)
        trend_line = slope * x + intercept
    
        # Calculate correlation
        corr_coefficient = np.corrcoef(x, y)[0, 1]
        n = len(x)
        p_value = self.calculate_p_value(corr_coefficient, n)
    
        # Plot data
        ax.scatter(x, y, color=color, s=100, label=f"{label_prefix} Points")
        ax.plot(x, trend_line, color=color, linestyle="--", label=f"{label_prefix} Trend")
    
        # Update x-axis with categorical labels
        ax.set_xticks(x)
        ax.set_xticklabels(grouped_data["Period"], rotation=45, ha="right")
    
        return corr_coefficient, p_value

    @staticmethod
    def calculate_p_value(corr_coefficient, n):
        """Calculate p-value using t-distribution approximation using numpy."""
        t_value = corr_coefficient * np.sqrt((n - 2) / (1 - corr_coefficient ** 2))
        p_value = 2 * (1 - np.abs(np.arctan(t_value / np.sqrt(n - 2))))
        return p_value

    def correlation_between_users(self, data, user1_id, user2_id, component, granularity_level):
        """Calculate correlation and p-value between two users."""
        def group_user_data(user_id):
            filtered_data = data[(data["user_id"] == user_id) & (data["component"] == component)].copy()
            # Work on a copy of the DataFrame
            if filtered_data.empty:
                return None
            if granularity_level == 2:  
                # Add 'Period' column based on YearMonth and day of the month (Half-Month)
                filtered_data['Period'] = filtered_data['YearMonth'].apply(
                    lambda x: f"{x.strftime('%b %Y')} {'1-15' if x.day <= 15 else '16-31'}"
                )
            else:
                # Add 'Period' column based on granularity level (Month, Week, or Day)
                period = {1: 'M', 3: 'W', 4: 'D'}.get(granularity_level, 'M')
                filtered_data['Period'] = filtered_data['YearMonth'].dt.to_period(period).astype(str)

            # Group data by 'Period' with a new column for the occurrences within each group
            grouped_data = filtered_data.groupby("Period").size().reset_index(name=f"User{user_id}_Count")
            
            return grouped_data

        # Retrieve grouped data for each of users 1 and 2
        user1_data = group_user_data(user1_id)
        user2_data = group_user_data(user2_id)
    
        if user1_data is None or user2_data is None:
            return None, None
    
        # Merge the data of the users on the 'Period' column
        merged_data = pd.merge(user1_data, user2_data, on="Period", how="inner")
    
        # Return None if the data points are too misaligned for the correlation to be ascertained
        if len(merged_data) < 2:
            return None, None
    
        # Work with Aligned User Data Column Counts as Numpy Array Structures
        x = merged_data[f"User{user1_id}_Count"].to_numpy()
        y = merged_data[f"User{user2_id}_Count"].to_numpy()

        # Calculate the Pearson correlation between period counts
        corr_coefficient = np.corrcoef(x, y)[0, 1]
        n = len(x)

        # Calculate the p-value (significance) of the correlation
        p_value = self.calculate_p_value(corr_coefficient, n) 
    
        return corr_coefficient, p_value
    
    def update_graph(self, data, user1_box, user2_box, component_var, granularity_slider, stats_label, ax, canvas):
        """Update the graph based on user input."""
        ax.clear()  # Clear the axes
        granularity_level = int(granularity_slider.get())
        component = component_var.get()
        user_colors = ["blue", "green"]
        user_labels = ["User 1", "User 2"]
        correlations = []
        plots_exist = False  # Flag to check if any plots were added
    
        # Plot data for individual users
        for user_box, color, label in zip([user1_box, user2_box], user_colors, user_labels):
            try:
                user_id = int(user_box.get())
                corr, p_value = self.build_scattergraph(ax, data, user_id, component, color, label, granularity_level)
                if corr is not None:
                    plots_exist = True
                    significance = "Significant" if p_value < 0.05 else "Not Significant"
                    correlations.append(f"{label}: r={corr:.2f}, p={p_value:.2f} ({significance})")
            except ValueError:
                continue
    
        # Calculate correlation between users if both are provided
        try:
            user1_id = int(user1_box.get())
            user2_id = int(user2_box.get())
    
            if user1_id == user2_id:
                messagebox.showerror("Error", "Please select two different User IDs for comparison.")
                return
    
            inter_corr, inter_p_value = self.correlation_between_users(data, user1_id, user2_id, component, granularity_level)
            if inter_corr is not None:
                plots_exist = True
                inter_significance = "Significant" if inter_p_value < 0.05 else "Not Significant"
                correlations.append(
                    f"Between Users: r={inter_corr:.2f}, p={inter_p_value:.2f} ({inter_significance})"
                )
        except ValueError:
            pass
    
        # Update graph
        ax.set_xlabel("Period")
        ax.set_ylabel("Interaction Count")
        ax.set_title(f"Interactions with {component}")
        ax.tick_params(axis="x", rotation=45)
    
        # Add legend if plots exist
        if plots_exist:
            ax.legend()
    
        stats_label.config(text="\n".join(correlations))
    
        # Redraw the canvas
        canvas.draw()

QueryGUI() # Run Tkinter GUI

