In [2]:
import pandas as pd 
from pymongo import MongoClient
from tkinter import *
from tkinter import filedialog, messagebox
from tkinter import Tk
from tkinter import ttk
from tkinter import PhotoImage, Label
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
def load_csv(text_widget):
  
    global data # to store the loaded dataset
    
    try:
        # File Dialog for a user to select a csv file
        file_path = filedialog.askopenfilename(
            title="Select a File",
            filetypes=[("CSV files", "*.csv")]
        )
        # Check if a user file was selected
        if not file_path:
            messagebox.showerror("File Selection Error", "No file was selected. Please upload the correct CSV file" )
            root.update() # makes sure the dialog box closes correctly in Jupyter
            root.destroy() # Destory the root window
            return False

        #print(f"File selected: {file_path}")
            
        # Read the CSV into Pandas Df 
        data = pd.read_csv(file_path)
                
        # Present the data in a text widget 
        text_widget.insert("end", f"File loaded successfully!\n\n{data.head(6)}")
        messagebox.showinfo("Success", "CSV file loaded sucessfully!")
        return data

        # clear previous output
        text_widget.delete('1.0','end')
        
    # Handle any unexpected errors      
    except FileNotFoundError as e:
        messagebox.showerror("Error", str(e))  
        return False
    except NameError as e:
        messagebox.showerror("Error", f"file path is not defined: {str(e)}")
        return False
    except Exception as e:
        messagebox.showerror("Error", f"An error occured: {str(e)}")
        return False

In [4]:
def clean_data(data, text_widget):
    try: 

        # clear
        text_widget.delete("1.0", "end")
        
        # Validation if the data is loaded
        if data is None: 
            messagebox.showerror("Data Cleaning Error", "No file was found!")  # When no data is loaded
            text_widget.delete(1.0, "end")
            text_widget.insert("end", "No file was loaded. Please load the correct file first.\n")
            return
            
        # Opening message
        text_widget.delete(1.0, "end")
        text_widget.insert("end", "Starting data cleaning process...\n\n")
        text_widget.insert("end", "Current columns: " + ", ".join(data.columns) + "\n\n")
        
        # Handling Missing values
        original_rows = data.shape[0]
        total_missing_values = data.isnull().sum().sum()       
        for col in data.columns:
            if data[col].dtype == 'object': #text column
                data[col].fillna("Unknown") #Assigment
            else:
                data[col].fillna(data[col].mean()) # Numeric data

        # Text Standardisation
        text_columns = data.select_dtypes(include='object').columns
        for col in data.select_dtypes(include='object').columns:
            data[col] = data[col].str.strip().str.lower()

        # User gui feedback 
        text_widget.delete(1.0, "end")
        text_widget.insert("end", f"""

          ━━━━━━━━━━━ DATA CLEANING COMPLETE ━━━━━━━━━━━
          
          ✓ Missing values handled: {total_missing_values}
          ✓ Text columns standardized: {len(text_columns):,}

          Cleaning Stage is completed

          ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

          Snippet of cleaned data:
          """)
        data_string = data.head(6).to_string(index=True, justify='left')
        indented_data = "\t" + data_string.replace("\n", "\n\t")
        text_widget.insert("end", f"\n{indented_data}")
        
    # Handle any unexpected errors   
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during data cleaning: {str(e)}")  
        text_widget.insert("end", f"An error occurred during data cleaning: {str(e)}\n")

In [5]:
def convert_to_json(data, text_widget):
    try: 

        # clear
        text_widget.delete("1.0", "end")
        
        # Checking if data exists
        if data is None:
            # Consistent comparisons 
            file_role = file_role.lower()
            
            messagebox.showerror("Error", "Unvailable data for conversion.")
            text_widget.insert("end", "Error: Unvailable data for conversion.\n")
            return
        
        # Using FileDialog to select the saved file
        save_path  = filedialog.asksaveasfilename(
            title="Save file as",
            defaultextension=".json",
            filetypes=[("JSON files", "*.json")]
        )
        
        # user cancels this operation 
        if not save_path:
            messagebox.showinfo("Save Cancelled", "unable to save file")
            text_widget.insert("end", "The Save operation has been cancelled by the user.\n")
            return
            
        # Determine file role based on file name or data structure
        file_name = save_path.split("/")[-1].lower()
        if "component_code" in file_name:
            file_role = "component_code"
        elif "activity_log" in file_name:
            file_role = "activity_log"
        elif "user_log" in file_name:
            file_role = "user_log"
        else:
            raise ValueError("Unknown file role for JSON conversion.")

        
        # DataFrame to json and Save it 
        data.to_json(save_path, orient="records", index=False)
        messagebox.showinfo("Success", f"File saved as {file_role.capitalize()} JSON at {save_path}\n\n")
        text_widget.insert("end", f"{file_role.capitalize()} saved as JSON at {save_path}\n\n")
        text_widget.insert("end", "Data successfully stored.\n")
        
    # Handle any unexpected errors     
    except Exception as e:
         messagebox.showerror("Error", f"An error occurred while saving to JSON: {str(e)}")
         text_widget.insert("end", f"Error: {str(e)}\n")

In [6]:
datasets = {}

def load_json(text_widget):
    try:
        
        # Ask the user to select a JSON file
        file_path = filedialog.askopenfilename(
            title="Select JSON file for",
            filetypes=[("JSON Files", "*.json")]
        )
        if not file_path:
            text_widget.insert("end", f"No file selected for {file_role}.\n")
            return False
            
        # Determine file role based on file name or data structure
        file_name = file_path.split("/")[-1].lower()
        if "component_code" in file_name:
            file_role = "component_code"
        elif "activity_log" in file_name:
            file_role = "activity_log"
        elif "user_log" in file_name:
            file_role = "user_log"
        else:
            raise ValueError("Unknown file role for JSON conversion.")

        # Load JSON file into a DataFrame
        data = pd.read_json(file_path)
 
        # Store the file_role in the dictionary
        datasets[file_role] = data

        # Presented in the GUI
        text_widget.insert("end", "Loaded file role: {:<15}\n".format(file_role))

        return data

    except ValueError as e:
        text_widget.insert("end", f"Error: {str(e)}\n")
        messagebox.showerror("Load JSON Error", f"Error: {str(e)}")
        return False
    except Exception as e:
        text_widget.insert("end", f"Unexpected error while loading JSON: {str(e)}\n")
        messagebox.showerror("Unexpected Error", f"Unexpected error: {str(e)}")
        return False

In [7]:
def remove_data(text_widget):
    try: 
        # Clear previous output
        text_widget.delete('1.0','end')
        
        # Check component_data 
        if "component_code" not in datasets:
            text_widget.insert("end", "Error: 'Component_Codes' data not loaded. Please load the data first.\n")
            print("Error: 'Component_Codes' data not loaded.")
            return False

        data = datasets["component_code"]
        
        if "component" not in data.columns.str.lower():
            text_widget.insert("end", "Error: 'Component' column not found in dataset.\n")
            print("Available columns:", data.columns.tolist())
            return False
            
        # Normalise the component column 
        component_col = [col for col in data.columns if col.lower() == 'component'][0]
        data['component'] = data[component_col].str.lower()
        
        # Record initial row count
        before_rows = len(data)
        
        # Removal of 'System' and 'Folder'
        removal_data = data[~data["component"].isin(["system", "folder"])]
        
        # Record final row count
        after_rows = len(removal_data)

        # Update the data in the dictionary
        datasets["component_codes"] = removal_data 
            
        # Display results in GUI 
        text_widget.insert("end", f"""
           ━━━━━━━━━━━ Remove STAGE ━━━━━━━━━━━ 
           
            REMOVE Step completed. 
            Removed 'System' and 'Folder' components
            Rows before removal: {before_rows}
            Rows after removal: {after_rows}
            Total removed: {before_rows - after_rows} 
              
           Please upload the following files:
           
            - activity_log
            
            - user_log
            
             Current progress:
            ✓ Step 1: Remove - Completed
            ► Step 2: Rename - Waiting for files
            
             ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
             
            """)
        
        return removal_data

    # Handling errors that can occur  
    except ValueError as e:
         messagebox.showerror("Error", f"Incorrect File Role: {str(e)}")
         text_widget.insert("end", f"Incorrect File Role{str(e)}\n")
         return False
    except Exception as e:
         messagebox.showerror("Error", f"An error occurred during Remove stage: {str(e)}")
         text_widget.insert("end", f"Error: {str(e)}\n")
         return False
    except KeyError as e:
         messagebox.showerror("Error", f"Remove failed, unable to Remove 'System' and 'Folder': {str(e)}")
         text_widget.insert("end", f"Remove failed, unable to Remove 'System' and 'Folder':{str(e)}\n")
         return False


def rename_columns(text_widget):
    try:
        # Clear previous output
        text_widget.delete('1.0','end')
        
        # Check files before loadind the next step 
        missing_files = [file for file in ["activity_log", "user_log"] if file not in datasets]  

        # Checking files 
        if missing_files:  
            text_widget.insert("end", "Warning: Missing required files for rename stage\n")
            return False
    
        activity_data = datasets["activity_log"].copy() 
        user_data = datasets["user_log"].copy()
        rename_count = 0 # counter checker
            
        # Applying rename changes to activity_log 
        if "User Full Name *Anonymized" in activity_data.columns:
            activity_data = activity_data.rename(columns={"User Full Name *Anonymized": "User_ID"})
            if "User_ID" in activity_data.columns: # Verifying rename 
                text_widget.insert("end", f"""
                   ━━━━━━━━━━━ RENAME STAGE  ━━━━━━━━━━━
                   
                ✓ Activity Log Changes:
                - Renamed: 'User Full Name *Anonymized' → 'User_ID'
                """)
                rename_count += 1
            else:
                text_widget.insert("end" "Activity Log: Rename failed\n")
        else:
            text_widget.insert("end", "Warning: Column user full name *anonymized not found in activity_log\n") 
        
        # Applying  rename changes to user_log
        if "User Full Name *Anonymized" in user_data.columns:
            user_data = user_data.rename( columns={"User Full Name *Anonymized": "User_ID"})
            if "User_ID" in user_data.columns: # Verifying rename 
                text_widget.insert("end", f"""
              
                 
                ✓ User Log Changes:
                 - Renamed: 'User Full Name *Anonymized' → 'User_ID '
                """)
                rename_count += 1
            else:
                text_widget.insert("end" "User Full Name *Anonymized: Rename failed\n")
        else:
            text_widget.insert("end", "Warning: Column user full name *anonymized not found in user_log\n") 
            
        # Updating the datasets with the modified data 
        datasets["activity_log"] = activity_data
        datasets["user_log"] = user_data
        
        # Message for gui
        if rename_count > 0:
            text_widget.insert("end", f"""
           
              ━━━━━━━━━━━ Current Progress  ━━━━━━━━━━━
              
                ✓ Step 1: Remove - Completed
                ✓ Step 2: Renamed - Completed:
                ► Step 3: Merge - Waiting -> click on Data Manipulation button <-
               
                """)
            return True
        else:
            text_widget.insert("end", """
           ━━━━━━━━━━━ RENAME STAGE - SUMMARY  ━━━━━━━━━━━
            ⚠ No changes made: Required columns not found
             ━━━━━━━━━━━ ━━━━━━━━━━━ ━━━━━━━━━━━ ━━━━━━━━━━━
            """)
            return False
            
   # Exception handling          
    except ValueError as e:
        messagebox.showerror("Error", f"Invalid file role: {str(e)}")
        text_widget.insert("end", f"Invalid file role: {str(e)}\n")
        return False
    except Exception as e:
        messagebox.showerror("Error", f"An unexpected error occurred during Rename: {str(e)}")
        text_widget.insert("end", f"Unexpected error during Rename: {str(e)}\n")
        return False

In [8]:
def merge_datasets(text_widget):
    try:
        # Clear previous output
        text_widget.delete('1.0','end')

        # Opener Message
        text_widget.insert("end", """
        ━━━━━━━━━━━ MERGE STAGE - STARTING  ━━━━━━━━━━━
        
        Please wait while datasets are being merged...
        Please wait it may few seconds due to the large amount of data.
        
        Current Status: Initialising...
        """)
        
        # Update text widget to show progress
        text_widget.update() 
        
        # Check datasets
        if 'activity_log' not in datasets or 'user_log' not in datasets:
            text_widget.insert("end", "Error: Missing datasets for merging.\n")
            return False
            
        text_widget.update()
        
        # Get the data 
        activity_data = datasets["activity_log"].copy() 
        user_data = datasets["user_log"].copy()
        component_data = datasets["component_code"].copy()

        text_widget.update()

        # Validation check 
        if 'User_ID' not in activity_data.columns or 'User_ID' not in activity_data.columns:
            text_widget.insert("end", "Error: User_ID column not found.  Make sure the rename step was completed")
            return False

        # Format date
        try:
            user_data["Date"] = pd.to_datetime(user_data["Date"], format='%d/%m/%Y %H:%M')
            user_data["Month"] = user_data["Date"].dt.strftime('%b-%Y')
        except Exception as e:
            text_widget.insert("end", f"Error formatting dates: {str(e)}\n")
            return False
            
        text_widget.insert("end", """
          Status: Merging datasets...
          This is the longest step - please be patient 
          """)
        text_widget.update()

        merged_data = pd.concat([
            activity_data[['User_ID', 'Component', 'Action', 'Target']],
            user_data[['User_ID', 'Date', 'Month']],
            component_data[['Component', 'Code']]
        ], axis=1)
        
        text_widget.insert("end", "Status: Adding interaction column...\n")
        text_widget.update()

        # remove duplication
        merged_data = merged_data.loc[:, ~merged_data.columns.duplicated()]
        
        # Add Interactions
        merged_data['Interactions'] = 1
                    
        # Storing results
        datasets['merged_data'] = merged_data

        # Display gui  message
        text_widget.insert("end", f"""
              ━━━━━━━━━━━ MERGE STAGE - SUMMARY  ━━━━━━━━━━━
              
                ✓ Total records merged: {len(merged_data):,}

                ━━━━━━━━━━━ Current Progress  ━━━━━━━━━━━
                
                ✓ Step 1: Remove - Completed
                ✓ Step 2: Renamed - Completed:
                ✓ Step 3: Merge - Completed
                ► Step 3: Reshaped - Waiting -> click on Data Manipulation button <-
                """)
        return True
        
    except ValueError as e:
         messagebox.showerror("Error", f"Incorrect file role: {str(e)}")
         text_widget.insert("end", f"Incorrect file role:{str(e)}\n")
         return False
    except Exception as e:
         messagebox.showerror("Error", f"An error occurred during Merge stage: {str(e)}")
         text_widget.insert("end", f"Error: {str(e)}\n")
         return False

def reshape_data(text_widget):
    try:
         # clear previous output
        text_widget.delete('1.0','end')
        # Opener Message
        text_widget.insert("end", """
        ━━━━━━━━━━━ RESHAPING STAGE - STARTING  ━━━━━━━━━━━
        
        Please wait while datasets are being merged...
        Please wait it may take couple seconds due to the large amount of data.
        
        Current Status: Initialising...
        """)
        # Update text widget to show progress
        text_widget.update() 
        
        if 'merged_data' not in datasets:
            text_widget.insert("end", "f Error: Reshape can't be applied. Please complete the merge stage")
            return False
            
        text_widget.update()

        # Get the merged data and store it for the next stage
        merged_data = datasets['merged_data']
        datasets['count_data'] = merged_data[['User_ID', 'Component', 'Month', 'Interactions']]

        text_widget.insert("end", "Status: Creating unique identifiers...\n")
        text_widget.update()

        # Unique identifier for the pivot operation 
        merged_data['Unique_ID'] = range(len(merged_data))
        
        # Reshape the data using pivot
        reshaped_data = merged_data.pivot(
            index="Unique_ID",
            columns="Month",
            values="Interactions"
        )

        text_widget.insert("end", "Status: Finalising and storing results...\n")
        text_widget.update()
        
        # Store reshaped data and build sample
        datasets['reshaped_data'] = reshaped_data
        reshape_sample = reshaped_data.reset_index().head(2).fillna(0)    # snippet to show the users change NAN to 0 

        # Implement formatting and cleaning the display
        format_table = reshape_sample.to_json(orient='records', indent=4)
        cleaned_format = format_table.replace('[', '').replace(']', '').replace('{', '    ').replace('}', '')
        
        # Display in GUI 
        text_widget.delete('1.0', 'end')
        text_widget.insert("end", rf"""
              ━━━━━━━━━━━ RESHAPE STAGE - SUMMARY  ━━━━━━━━━━━
              
                ✓ Data Successfully reshaped:
                ✓ Reshaped to the correct format
                
                A small sample:
                
                {cleaned_format}

             
                ━━━━━━━━━━━ Current Progress  ━━━━━━━━━━━
                
                ✓ Step 1: Remove - Completed
                ✓ Step 2: Renamed - Completed
                ✓ Step 3: Merge - Completed
                ✓ Step 4: Reshaped - Completed
                ► Step 5: Count - Waiting -> click on Data Manipulation button <- 
                """)
        return True
        
    except ValueError as e:
         messagebox.showerror("Error", f"Reshape failed: {str(e)}")
         text_widget.insert("end", f"Reshape failed: Error reshape stage {str(e)}\n")
         return False
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during the reshape stage: {str(e)}")
        text_widget.insert("end", f"Error: {str(e)}\n")
        return False

In [9]:
def count_data(text_widget):
    try:
         # Clear previous output
        text_widget.delete('1.0','end')

        # Opener Message
        text_widget.insert("end", """
        ━━━━━━━━━━━ COUNT STAGE - STARTING  ━━━━━━━━━━━
        
        Please wait while counting...
     
        
        Current Status: Initialising...
        """)
        
        # Update text widget to show progress
        text_widget.update() 
        
        # Ensure merged data exist 
        if 'merged_data' not in datasets:
            text_widget.insert("end", "Error: No reshaped data to count the interactions.\n")
            return False

        # Get the data
        merged_data = datasets['merged_data']

        # Testing data 
        #print("Columns in merged_data:", merged_data.columns.tolist())
        #print("Sample of merged_data:\n", merged_data.head())

        counted_data = merged_data[['User_ID', 'Component', 'Month', 'Interactions']].copy()

        # The Count interactions
        interactions_counts = counted_data.groupby(['User_ID', 'Component','Month'], as_index=False)['Interactions'].sum()
        
        # Store results
        datasets['counted_data'] = interactions_counts

        # Display in GUI 
        text_widget.insert("end", f"""
              ━━━━━━━━━━━ COUNT STAGE - SUMMARY  ━━━━━━━━━━━
              
                ✓ Successfully Counted Interactions 
                ✓ Total records processed: {len(interactions_counts):,}
                
                ━━━━━━━━━━━ Current Progress  ━━━━━━━━━━━
                ✓ Step 1: Remove - Completed
                ✓ Step 2: Renamed - Completed
                ✓ Step 3: Merge - Completed
                ✓ Step 4: Reshaped - Completed
                ✓ Step 5: Count - Completed 
              
                ━━━━━━━━━━━ Next Stage Statistics  ━━━━━━━━━━━
                
                ► Step 6: Output Statistic - Waiting 

                Click 'Output Statistics' to proceed 
                
                  """)
        return True

    except ValueError as e:
        messagebox.showerror("Value Error", f"Validation Error: {str(e)}")
        text_widget.insert("end", f"Value Error: {str(e)}\n")
        return False
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during the Count stage: {str(e)}")
        text_widget.insert("end", f"Error: {str(e)}\n")
        return False

In [10]:

main_steps = 1  # Intialising steps

#  Wrap it with a function that can call each sub function in sequence
def data_manipulation(text_widget):
    global main_steps
    try: 
        if main_steps == 1:
            removal_data = remove_data(text_widget) # Step 1: Remove Stage
            if removal_data is False:
                text_widget.insert("end", "Step 1: Failed to remove the required data in 'Component_Codes'.\n\n")
                return
            main_steps = 2
            return
            
        elif main_steps == 2:
            renamed_data = rename_columns(text_widget)     # Step 2: Rename Stage
            if renamed_data is False:
                text_widget.insert("end", "Please upload required files before proceeding.\n")
                return
                text_widget.update()
                text_widget.see("end")
            main_steps = 3
             
            return
            
        elif main_steps == 3:
          
            merged_data = merge_datasets(text_widget)  # Step 3: Merge Stage
            if merged_data is False:
                text_widget.insert("end", "Step 3: Failed to merge datasets. Exiting...\n")
                return
                text_widget.update()
                text_widget.see("end")
            main_steps = 4
            return
            
        elif main_steps == 4:
            reshaped_data = reshape_data(text_widget)  # Step 4: Reshape Stage
            if reshaped_data is None:
                text_widget.insert("end", "Step 4: Failed to reshape data. Exiting...\n")
                return
                text_widget.update()
                text_widget.see("end")
            main_steps = 5
            return
            
        elif main_steps == 5:
            counted_data = count_data(text_widget) # Step 5: Count Stage
            if counted_data is None:
                text_widget.insert("end", "Step 5: Failed to count interactions. Exiting...\n")
                return
                text_widget.insert("end", "Data manipulation process completed successfully!\n") # Final message
            main_steps = 1 # Reseting back to the first stage
            return
    
    # Handle any unexpected errors 
    except RuntimeError as e:
        text_widget.insert("end", f"Runtime Error occured: {str(e)}\n")
        print(f"Runtime Error ocurred: {str(e)}")
    except Exception as e:
         text_widget.insert("end", f"Error during data manipulation stage: {str(e)}\n")
         print(f"Error during data manipulation stage: {str(e)}")

In [11]:
def output_statistics(text_widget):
    try:
        # Clear previous output
        text_widget.delete('1.0','end')

          # Opener Message
        text_widget.insert("end", """
        ━━━━━━━━━━━ STATISTICS STAGE - STARTING  ━━━━━━━━━━━
        
        Please wait while calculating Statistics...
        Please wait it may take few moments due to the large amount of data.
        
        Current Status: Initialising...
        """)
        
        # Update text widget to show progress
        text_widget.update() 
        
        if 'counted_data' not in datasets:
             text_widget.insert("end", "Error: No Count data  for statistical output.\n")
             return False

        # counted data 
        counted_data = datasets['counted_data']
        
        # The monthly statistics
        monthly_statistics = counted_data.groupby(['Component', 'Month'])['Interactions'].agg([
            ('Mean', 'mean'), # calculate the mean 
            ('Median', 'median'), # calculate the median 
            ('Mode', lambda x: x.mode().iloc[0] if not x.mode().empty else 0) # calculate the mode else present 0
        ]).round(2)
            
        # 13 week academic semester
        semester_statistics = counted_data.groupby('Component')['Interactions'].agg([
            ('Mean', 'mean'), # calculate the mean 
            ('Median', 'median'), # calculate the median 
            ('Mode', lambda x: x.mode().iloc[0] if not x.mode().empty else 0) # calculate the mode else present 0
        ]).round(2)
        
        # Stored results 
        datasets['monthly_statistics'] = monthly_statistics
        datasets['semester_statistics'] = semester_statistics

           
        # Display in GUI 
        text_widget.insert("end", """
        ━━━━━━━━━━━ Statistic STAGE - SUMMARY  ━━━━━━━━━━━
        ✓ Successfully calculated statistics
                        
        ━━━━━━━━━━━ Monthly Statistics ━━━━━━━━━━━
        Component       Month       Mean     Median     Mode
        ----------------------------------------------------""")
        
        for fms in monthly_statistics.index:
            component, month = fms
            row = monthly_statistics.loc[fms]
            text_widget.insert("end", f"\n\t{component:<14} {month:<10} {row['Mean']:>9.2f} {row['Median']:>8.0f} {row['Mode']:>6.0f}")
            
        text_widget.insert("end", """ 
        
        ━━━━━━━━━━━ Semester Statistics (13-week period) ━━━━━━━━━━━
        Component       Mean     Median     Mode
        -----------------------------------------""")
            
        # Formatting Semester stats
        for component in semester_statistics.index:
            row = semester_statistics.loc[component]
            text_widget.insert("end", f"\n\t{component:<14} {row['Mean']:>9.2f} {row['Median']:>8.0f} {row['Mode']:>6.0f}")
            
        text_widget.insert("end", """ 
        
        ━━━━━━━━━━━ Current Progress  ━━━━━━━━━━━
                        
        ✓ Step 1: Remove - Completed
        ✓ Step 2: Renamed - Completed
        ✓ Step 3: Merge - Completed
        ✓ Step 4: Reshaped - Completed
        ✓ Step 5: Count - Completed 
        ✓ Step 6: Statistics - Completed 
                            
        ━━━━━━━━━━━ Next Stage Correlation  ━━━━━━━━━━━
                        
        ► Step 7 : Output Correlation 
        
        Click 'Output Correlation' to proceed
                        
        """)
        return True

    
    # Handle any unexpected errors 
    except Exception as e:
        messagebox.showerror("Value Error", f"Validation Error: {str(e)}")
        text_widget.insert("end", f"Value Error: {str(e)}\n")
        return False
    except KeyError as e:
        messagebox.showerror("Key Error", f"Missing Data: {str(e)}")
        text_widget.insert("end", f"Key Error: {str(e)}\n")  
        return False  

In [12]:
def output_correlation(text_widget):
    try:
        # Clear previous output
        text_widget.delete('1.0','end')

        if 'counted_data' not in datasets:
             text_widget.insert("end", "Error: No Count data  for correlation output.\n")
             return False

        # Get the data
        counted_data = datasets['counted_data']

        #Data Cleaning steps
        
        # Handling negative numbers 
        negative_counts = len(counted_data[counted_data['Interactions'] < 0])
        counted_data = counted_data[counted_data['Interactions'] >= 0]
        counted_data['Interactions'] = counted_data['Interactions'].astype(int)

        #Removing duplicates 
        duplicate_counts = len(counted_data[counted_data.duplicated()])
        counted_data = counted_data.drop_duplicates()
        
        # Components to analyse from the previous data
        required_components = ['assignment', 'quiz', 'lecture', 'book', 'project', 'course']

        # Debug print to check data
        data_interactions = counted_data.shape[0] # check intereactions qty 
        

        # Create box plot showing interaction patterns by component
        plt.figure(figsize=(12, 5))
        for component in required_components:
            component_data = counted_data[counted_data['Component'] == component]
            plt.scatter(component_data['User_ID'], 
                       component_data['Interactions'],
                       label=component,
                       alpha=0.5, s=20)
            
        plt.title('User Interactions by Component', pad=10)
        plt.xlabel('User ID')
        plt.ylabel('Number of Interactions')

        #adjusting data
        plt.xticks(rotation=0)  # Keep ID numbers horizontal
        max_id = counted_data['User_ID'].max()
        plt.xlim(right=max_id + 5)  # Add some padding on the right
        

        # tick marks 
        tick_range = pd.Series(range(0, counted_data['User_ID'].max() + 10, 10))
        plt.xticks(tick_range)  # Show every 10th use
        plt.grid(True, alpha=0.3)  # Add light grid lines to help track points to axes
        
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout(rect=[0, 0, 0.9, 1])  # Leave space for legend
        
        # Save the visualisation
        plt.savefig('correlation_analysis.png', dpi=100, bbox_inches='tight', pad_inches=0.5)
        plt.close()

        # Heat Map for Correlations
        the_correlation = counted_data[counted_data['Component'].isin(required_components)].pivot_table(
            index='User_ID',
            columns='Component',
            values='Interactions',
            aggfunc='sum'
        ).fillna(0)

        correlation_matrix = the_correlation.corr()
        plt.figure(figsize=(8, 6))
        sns.heatmap(correlation_matrix, 
                   annot=True,
                   cmap='coolwarm',
                   vmin=-1, vmax=1,
                   fmt='.2f')
        plt.title('Component Correlation')

        
        # Save the visualisation
        plt.savefig('correlation_heatmap.png', dpi=100, bbox_inches='tight', pad_inches=0.5)
        plt.close()

        # Display Gui and save it as a PNG files
        try:

            # Scatter Plot
            frame = Frame(text_widget)
            photo = PhotoImage(file='correlation_analysis.png')
            image_label = Label(frame, image=photo)
            image_label.image = photo # as reference 
            image_label.grid(row=0, column=0, padx=40, pady=20, sticky='ew')

            # Heatmap
            heatmap_photo = PhotoImage(file='correlation_heatmap.png')
            heatmap_label = Label(frame, image=heatmap_photo)
            heatmap_label.image = heatmap_photo
            heatmap_label.grid(row=1, column=0, padx=40, pady=20)
            
            text_widget.insert("end", f"""
              ━━━━━━━━━━━ Correlation STAGE - SUMMARY  ━━━━━━━━━━━
                ✓ Analysis Patterns 
                ✓ Visualisation Generated:
                    - User Interactions by Component
                    - Component Correlation

                A high quality PNG version is saved follow these steps:
                    1. Go to your project folder
                    2. Look for 'correlation_analysis.png' and 'correlation_heatmap.png'
                    3. Double-click to open and view the full scatter plot
                """)
            
            text_widget.insert("end", """
            ━━━━━━━━━━━ Visualisation Presented below ━━━━━━━━━━━
            """)
            text_widget.insert("end", "\n\n")
            text_widget.window_create("end", window=frame)
            text_widget.insert("end", "\n\n")

            text_widget.insert("end", """
            """)
        except Exception as img_error: # Handle any unexpected errors for images
            messagebox.showerror("Value Error", f"Image unable to be displayed {str(img_error)}")
            text_widget.insert("end", """
             
            To remember: 
            The  visualisation has been saved as: correlation_analysis.png in your project directory.
            Please open the file to view the full visualisation
            """)
            return True
    # Handle any unexpected errors 
    except Exception as e:
        messagebox.showerror("Value Error", f"Error in the Correlation Stage {str(e)}")
        text_widget.insert("end", f"Value Error: {str(e)}\n")
        return False
    except KeyError as e:
        messagebox.showerror("Key Error", f"Missing Data: {str(e)}")
        text_widget.insert("end", f"Key Error: {str(e)}\n")  
        return False  

In [14]:
def gui_app():
    
    # Initialise the window
    root = Tk()
    root.title("Prototype GUI")
    root.geometry("800x600")  # Adjusted for a better initial layout
    root.configure(bg="#f7f7f7")  # Background color
    
    # Header Label
    header_frame = Frame(root, bg="#333333")
    header_frame.grid(row=0, column=0, sticky="ew")                    
    Label(
        header_frame,
        text="Data Management Tool", 
        bg="#333333", 
        fg="#ffffff",
        font=("Helvetica", 18, "bold"), 
        pady=10
    ).grid(row=0, column=0, sticky="ew")
    
 
    header_frame.grid_columnconfigure(0, weight=1)
    
        # File Handling Operation for main screen 
    file_frame = LabelFrame(root, text="Inital Operations", bg="#f7f7f7", fg="#000000", pady=10, padx= 5)
    file_frame.grid(row=1, column=0, sticky="ew", padx=10, pady=5) 

    # for columns have equal spacing for initial operation 
    file_frame.grid_columnconfigure(0, weight=1)
    file_frame.grid_columnconfigure(2, weight=1)
    file_frame.grid_columnconfigure(4, weight=1)
    file_frame.grid_columnconfigure(6, weight=1)

    # Buttons Style
    btn_style = {
        "bg": "#007acc", 
        "fg": "#000000", 
        "font": ("Helvetica", 12, "bold"),
        "relief": "raised", 
        "width": 15
    }

    # Handling intial operations     
    Button(file_frame, text="Load CSV", command=lambda: load_csv(text_widget), **btn_style).grid(row=0, column=1, padx=10, pady=5)
    Button(file_frame, text="Clean Data", command=lambda: clean_data(data, text_widget), **btn_style).grid(row=0, column=3, padx=10, pady=5)
    Button(file_frame, text="Convert to JSON", command=lambda: convert_to_json(data, text_widget), **btn_style).grid(row=0, column=5, padx=10, pady=5)
    
    # Data Manipulayion section 
    manipulation_frame = LabelFrame(root, text="Data Manipulation", bg="#f7f7f7", fg="#000000", pady=10, padx=10)
    manipulation_frame.grid(row=2, column=0, sticky="ew", padx=10, pady=10)

    # Columns have equal spacing for data manipulation 
    manipulation_frame.grid_columnconfigure(0, weight=1)
    manipulation_frame.grid_columnconfigure(2, weight=1)
    manipulation_frame.grid_columnconfigure(4, weight=1)
    manipulation_frame.grid_columnconfigure(6, weight=1)
    manipulation_frame.grid_columnconfigure(8, weight=1)
    
    # Used for the Data Manipulation for each stage
    Button(manipulation_frame, text="Stored JSON Files", command=lambda: load_json(text_widget),  
    **btn_style).grid(row=0, column=1, padx=10, pady=5)

    # Data Manipulation and Output Buttons
    Button(manipulation_frame, text="Data Manipulation", command=lambda: data_manipulation(text_widget), 
    **btn_style).grid(row=0, column=3, padx=10, pady=5)
    Button(manipulation_frame, text="Output Statistics", command=lambda: output_statistics(text_widget), 
    **btn_style).grid(row=0, column=5, padx=10, pady=5)
    Button(manipulation_frame, text="Output Correlation", command=lambda: output_correlation(text_widget), 
    **btn_style).grid(row=0, column=7, padx=10, pady=5)

    # Logs and Output section 
    output_frame = LabelFrame(root, text="Logs and Output", bg="#f7f7f7", fg="#000000", pady=5, padx= 5)
    output_frame.grid(row=3, column=0, sticky="nsew", padx=10, pady=10)
                   
    # Text Widget to Display Output
    text_widget = Text(
        output_frame, 
        height=15,  
        wrap=NONE, 
        bg="#ffffff", 
        fg="#000000",
        font=("Courier", 11)
    )
    text_widget.grid(row=0, column=0, padx=5, pady=5, sticky="nsew")

    # Include bottom and side Scroll bar
    y_scrollbar = ttk.Scrollbar(output_frame, orient="vertical", command=text_widget.yview)
    y_scrollbar.grid(row=0, column=1, sticky="ns")
    x_scrollbar = ttk.Scrollbar(output_frame, orient="horizontal", command=text_widget.xview)
    x_scrollbar.grid(row=1, column=0, sticky="ew")

    # Connecting scrollbar y with the text widget
    text_widget.configure(yscrollcommand=y_scrollbar.set, xscrollcommand=x_scrollbar.set)
 
    text_widget.grid(row=0, column=0, sticky="nsew")

    # Display welcome message and instructions
    open_message = f"""
        
        To get started, you have two options:
        
    1. Load a new CSV file containing your data:
        - Follow the process of data cleaning and Converting file .
        - Once the data is stored, you can then upload the first file to begin the analysis.
        
    2. Use an existing stored JSON file:
        - We have previously uploaded and processed a JSON files, you can use the stored JSON file button.
        - Simply select it during the file upload step
        
        """
    text_widget.insert("end", open_message)
    
    # Adjust Row and Column Weight for resizing
    output_frame.grid_rowconfigure(0, weight=1) 
    output_frame.grid_columnconfigure(0, weight=1) 
    root.grid_rowconfigure(3, weight=1)  
    root.grid_columnconfigure(0, weight=1)  

    root.mainloop()

gui_app()