## Data Analysis Tool

In [None]:
#  Installing the required modules (only need to do this once)

pip install pandas matplotlib seaborn numpy fpdf reportlab

In [2]:
import os
import pandas as pd
import tkinter as tk
from tkinter import ttk, filedialog, messagebox, Text, Scrollbar, simpledialog
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from shutil import copyfile
from pathlib import Path
import numpy as np
import shutil
from fpdf import FPDF
from io import BytesIO
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader

# Function to find failed files
def find_failed_files(directory):
    failed_files = []
    for filename in os.listdir(directory):
        if filename.endswith("FAILED.csv"):
            failed_files.append(filename)
    return failed_files

# Function to check empty files
def check_empty_files(directory):
    empty_files = []
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        if filename.endswith('.csv'):
            if os.path.getsize(file_path) < 1024 or pd.read_csv(file_path).empty:
                empty_files.append(filename)
    return empty_files

# Function to generate statistics table
def generate_statistics_table(directory):
    file_list = os.listdir(directory)
    subject_codes = {}
    time_points = {}
    main_elements = {}
    test_names = {}
    rep_numbers = {}
    sub_elements = {}
    rep_per_participant = {}

    failed_file_count = 0

    for file_name in file_list:
        if file_name.endswith("FAILED.csv"):
            failed_file_count += 1

        attributes, error = parse_file_name(file_name)
        if error:
            continue
        subject_code = attributes[0]
        rep_number = attributes[5]
        subject_rep_key = f"{subject_code}_{rep_number}"
        subject_codes[subject_code] = subject_codes.get(subject_code, 0) + 1
        time_points[attributes[1]] = time_points.get(attributes[1], 0) + 1
        main_elements[attributes[2]] = main_elements.get(attributes[2], 0) + 1
        test_names[attributes[3]] = test_names.get(attributes[3], 0) + 1
        rep_numbers[attributes[4]] = rep_numbers.get(attributes[4], 0) + 1
        sub_elements[attributes[5]] = sub_elements.get(attributes[5], 0) + 1
        rep_per_participant[subject_rep_key] = rep_per_participant.get(subject_rep_key, 0) + 1

    # Protocol definitions
    protocol = {
        'main_elements': {0: 'Dominant Leg'},
        'sub_elements': {0: 'Anterior', 1: 'Posteromedial', 2: 'Posterolateral'},
        'time_points': {0: 'Baseline'}
    }

    time_points = {protocol['time_points'].get(tp, tp): count for tp, count in time_points.items()}
    main_elements = {protocol['main_elements'].get(me, me): count for me, count in main_elements.items()}
    sub_elements = {protocol['sub_elements'].get(se, se): count for se, count in sub_elements.items()}

    dominant_leg_count = sum(1 for group in main_elements if group == 'Dominant Leg')
    total_files_count = sum(rep_per_participant.values())
    passed_file_count = total_files_count - failed_file_count

    protocol_statistics = pd.DataFrame({
        'Attribute': ['Distinct Groups in Main Element', 
                      'Dominant Leg (Main Element)', 
                      'Distinct Sub Elements', 
                      'Distinct Time Points', 
                      'File Counts', 
                      'Failed File Count', 
                      'Passed File Count', 
                      'Number of Participants', 
                      'Test Name', 
                      'Distinct Rep Numbers'],  
        'Value': [len(main_elements), 
                  dominant_leg_count, 
                  ', '.join([f"{sub}: {count}" for sub, count in sub_elements.items()]), 
                  len(time_points), 
                  total_files_count, 
                  failed_file_count, 
                  passed_file_count, 
                  len(subject_codes), 
                  next(iter(test_names.keys()), 'N/A') if test_names else 'N/A', 
                  len(rep_numbers)]
    })

    return protocol_statistics

# Function to parse the file name and extract attributes
def parse_file_name(file_name):
    parts = file_name.split('_')
    if len(parts) != 7:
        return None, True
    try:
        subject_code = parts[0]
        time_point = parts[1]
        main_element = int(parts[2])
        experiment = parts[3]
        test_name = parts[4]
        rep_number = int(parts[5])
        sub_element = int(parts[6].split('.')[0])
        return (subject_code, time_point, main_element, experiment, test_name, rep_number, sub_element), False
    except ValueError:
        return None, True
    
# Function to reorder file attributes based on predefined formats
def reorder_file_attributes(folder_path, format_choice):
    file_list = os.listdir(folder_path)
    
    # Store original file names in a dictionary
    original_file_names = {file: file for file in file_list}

    # Check if reordered_files folder exists
    reordered_folder_path = os.path.join(folder_path, "reordered_files")
    if not os.path.exists(reordered_folder_path):
        os.makedirs(reordered_folder_path)
    else:
        # Clear existing files in the folder
        for filename in os.listdir(reordered_folder_path):
            file_path = os.path.join(reordered_folder_path, filename)
            try:
                if os.path.isfile(file_path) or os.path.islink(file_path):
                    os.unlink(file_path)
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)
            except Exception as e:
                print(f'Failed to delete {file_path}. Reason: {e}')

    for file_name in file_list:
        full_file_path = os.path.join(folder_path, file_name)
        
        if os.path.isdir(full_file_path):
            continue
        
        parts = file_name.split('_')
        extension = file_name.split('.')[-1]

        if len(parts) != 7:
            # Print a message and continue to the next file if the format is invalid
            print(f"Skipping file {file_name}: Invalid format")
            continue

        try:
            if format_choice == '2':
                timestamp = parts[-1].split('.')[0]  
                reordered_parts = [timestamp] + parts[:-1]  # Move timestamp to front
                reordered_file_name = '_'.join(reordered_parts) + f'.{extension}'
                
                new_file_path = os.path.join(reordered_folder_path, reordered_file_name)
                shutil.copy(full_file_path, new_file_path)
                
            elif format_choice == '3':
                # Return to original naming format
                reordered_file_name = original_file_names[file_name]  # Use the original name
                
                new_file_path = os.path.join(reordered_folder_path, reordered_file_name)
                shutil.copy(full_file_path, new_file_path)

            print(f"Reordered file: {file_name} -> {reordered_file_name}")

        except IndexError:
            print(f"Skipping file {file_name}: Index error during reordering")
        except ValueError:
            print(f"Skipping file {file_name}: Unable to reorder")


class Application(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("Data Analysis Tool")
        self.notebook = ttk.Notebook(self)
        
        self.file_check_view_tab = FileCheckViewTab(self.notebook)
        self.folder_summary_tab = FolderSummaryTab(self.notebook)
        self.file_reordering_tab = FileReorderingTab(self.notebook)
        self.data_analysis_tab = DataAnalysisTab(self.notebook)
        self.visualisation_tool_tab = VisualisationToolTab(self.notebook)
        self.generate_report_tab = GenerateReportTab(self.notebook, self.file_check_view_tab, self.folder_summary_tab, self.data_analysis_tab)


        self.notebook.add(self.file_check_view_tab, text="File Check Tab")
        self.notebook.add(self.folder_summary_tab, text="Folder Summary Tab")
        self.notebook.add(self.file_reordering_tab, text="File Reordering Tab")
        self.notebook.add(self.data_analysis_tab, text="Data Analysis Tab")
        self.notebook.add(self.visualisation_tool_tab, text="Visualisation Tool Tab")
        self.notebook.add(self.generate_report_tab, text="Generate Report Tab")
        
        self.notebook.pack(expand=True, fill='both')


# File Check & View Tab
class FileCheckViewTab(ttk.Frame):
    
    def __init__(self, parent):
        super().__init__(parent)
        self.directory = ""
        
        self.browse_button = tk.Button(self, text="Browse Directory", command=self.browse_directory)
        self.browse_button.pack(pady=10)
        
        self.check_button = tk.Button(self, text="Check for Failed/Empty Files", command=self.view_failed_empty_files)
        self.check_button.pack(pady=10)
        
        self.result_frame = tk.Frame(self)
        self.result_frame.pack(pady=10, fill=tk.BOTH, expand=True)
    
    def browse_directory(self):
        self.directory = filedialog.askdirectory()
    
    def view_failed_empty_files(self):
        if self.directory:
            failed_files = find_failed_files(self.directory)
            empty_files = check_empty_files(self.directory)
            self.display_results(failed_files, empty_files)
    
    def display_results(self, failed_files, empty_files):
        result_text = ""
        if failed_files:
            result_text += f"Failed files found: {len(failed_files)}\n" + "\n".join(failed_files) + "\n\n"
        else:
            result_text += "No failed files found.\n\n"

        if empty_files:
            result_text += f"Empty CSV files found: {len(empty_files)}\n" + "\n".join(empty_files)
        else:
            result_text += "No empty CSV files found."

        for widget in self.result_frame.winfo_children():
            widget.destroy()

        text = Text(self.result_frame, wrap='word')
        text.insert('1.0', result_text)
        text.pack(expand=True, fill='both')
        scroll_y = Scrollbar(self.result_frame, orient='vertical', command=text.yview)
        scroll_y.pack(side='right', fill='y')
        text.configure(yscrollcommand=scroll_y.set)

# Folder Summary Tab
class FolderSummaryTab(ttk.Frame):
    def __init__(self, parent):
        super().__init__(parent)
        self.directory = ""
        
        self.browse_button = tk.Button(self, text="Browse Directory", command=self.browse_directory)
        self.browse_button.pack(pady=10)
        
        self.summary_button = tk.Button(self, text="Generate Summary", command=self.view_folder_summary)
        self.summary_button.pack(pady=10)
        
        self.stats_frame = tk.Frame(self)
        self.stats_frame.pack(pady=10, fill=tk.BOTH, expand=True)
    
    def browse_directory(self):
        self.directory = filedialog.askdirectory()
    
    def view_folder_summary(self):
        if self.directory:
            statistics_table = generate_statistics_table(self.directory)
            self.display_statistics_table(statistics_table)
    
    def display_statistics_table(self, statistics_table):
        for widget in self.stats_frame.winfo_children():
            widget.destroy()

        text = Text(self.stats_frame, wrap='none')
        text.insert('1.0', statistics_table.to_string(index=False))
        text.pack(expand=True, fill='both')
        
        scroll_y = Scrollbar(self.stats_frame, orient='vertical', command=text.yview)
        scroll_y.pack(side='right', fill='y')
        text.configure(yscrollcommand=scroll_y.set)

# File Reordering Tab
class FileReorderingTab(ttk.Frame):
    def __init__(self, parent):
        super().__init__(parent)
        self.directory = ""
        
        self.browse_button = tk.Button(self, text="Browse Directory", command=self.browse_directory)
        self.browse_button.pack(pady=10)
        
        self.reorder_button = tk.Button(self, text="Reorder Files", command=self.sort_files)
        self.reorder_button.pack(pady=10)
    
    def browse_directory(self):
        self.directory = filedialog.askdirectory()
    
    def sort_files(self):
        if self.directory:
            self.prompt_reordering_format()
        else:
            messagebox.showwarning("Warning", "Please select a directory first.")
    
    def prompt_reordering_format(self):
        def set_format_choice(choice):
            reorder_file_attributes(self.directory, choice)
            reorder_prompt_window.destroy()

        reorder_prompt_window = tk.Toplevel(self)
        reorder_prompt_window.title("Choose Reordering Format")

        label = ttk.Label(reorder_prompt_window, text="Choose reordering format:")
        label.pack(pady=10)

        format2_button = ttk.Button(reorder_prompt_window, text="Order by Timestamp", command=lambda: set_format_choice('2'))
        format2_button.pack(pady=5)

        format3_button = ttk.Button(reorder_prompt_window, text="Order by Subject Code", command=lambda: set_format_choice('3'))
        format3_button.pack(pady=5)

# Data Analysis Tab
class DataAnalysisTab(ttk.Frame):
    
    def __init__(self, parent):
        super().__init__(parent)
        self.parent = parent
        
        self.browse_button = tk.Button(self, text="Browse CSV", command=self.view_metric_distribution)
        self.browse_button.pack(pady=10)
        
        self.canvas = None  # To store Matplotlib canvas
        
        # Initialize outliers_label here
        self.outliers_label = tk.Label(self, text="Number of outliers: 0")
        self.outliers_label.pack(pady=10)
        
        # Initialize outliers_listbox here
        self.outliers_listbox = tk.Listbox(self)
        self.outliers_listbox.pack(pady=10)

    def view_metric_distribution(self):
        csv_file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])
        if csv_file_path:
            df = pd.read_csv(csv_file_path)
            self.select_column_for_analysis(df)

    def select_column_for_analysis(self, df):
        column_select_window = tk.Toplevel(self.parent)
        column_select_window.title("Select Column for Analysis")
        
        label = ttk.Label(column_select_window, text="Columns in the CSV file:")
        label.pack(pady=10)
        
        listbox = tk.Listbox(column_select_window, selectmode='single')
        for col in df.columns:
            listbox.insert(tk.END, col)
        listbox.pack(pady=10)
        
        def on_select():
            selected_column = listbox.get(tk.ACTIVE)
            self.display_metric_distribution(df, selected_column)
            column_select_window.destroy()
        
        select_button = ttk.Button(column_select_window, text="Select", command=on_select)
        select_button.pack(pady=5)
    
    def display_metric_distribution(self, df, column):
        if self.canvas:
            self.canvas.get_tk_widget().pack_forget()  # Remove the previous canvas if it exists
        
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12), gridspec_kw={'hspace': 0.5})
        
        # Check if column is numeric
        try:
            df[column] = pd.to_numeric(df[column])
            numeric_column = True
        except ValueError:
            numeric_column = False
        
        if numeric_column:
            # Histogram plot
            sns.histplot(df[column], kde=True, ax=ax1)
            ax1.set_title(f'Metric Distribution: {column}')
            ax1.set_xlabel(column)
            ax1.set_ylabel('Frequency')
            
            # Boxplot plot
            sns.boxplot(x=df[column], ax=ax2)
            ax2.set_title(f'Boxplot: {column}')
            ax2.set_xlabel(column)
            ax2.set_ylabel('Value')
            
            # Calculate and print number of outliers
            q1 = df[column].quantile(0.25)
            q3 = df[column].quantile(0.75)
            iqr = q3 - q1
            lower_bound = q1 - 1.5 * iqr
            upper_bound = q3 + 1.5 * iqr
            outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
            num_outliers = len(outliers)
            
            # Update the label with the number of outliers
            self.outliers_label.config(text=f"Number of outliers: {num_outliers}")
            
            # Update the outliers_listbox with the outliers
            self.outliers_listbox.delete(0, tk.END)  # Clear previous entries
            for index, value in outliers[column].items():
                self.outliers_listbox.insert(tk.END, f"Index {index}: {value}") #index= row label of the outlier in the original DataFrame
        
        else:
            # Non-numeric column: plot histogram only
            sns.histplot(df[column], ax=ax1)
            ax1.set_title(f'Metric Distribution: {column}')
            ax1.set_xlabel(column)
            ax1.set_ylabel('Frequency')
            ax2.axis('off')
            self.outliers_label.config(text="Column is not numeric")
            self.outliers_listbox.delete(0, tk.END)  # Clear previous entries
        
        # Draw canvas
        self.canvas = FigureCanvasTkAgg(fig, master=self)
        self.canvas.draw()
        self.canvas.get_tk_widget().pack(expand=True, fill='both')

# Visualisation Tool Tab
class VisualisationToolTab(ttk.Frame):
    def __init__(self, parent):
        super().__init__(parent)
        self.parent = parent
        
        self.directory = ""
        self.files = []
        self.flagged_files = {'Anterior': {'Noisy': [], 'Inverted': [], 'Discard': []},
                              'Posteromedial': {'Noisy': [], 'Inverted': [], 'Discard': []},
                              'Posterolateral': {'Noisy': [], 'Inverted': [], 'Discard': []}}
        self.current_index = 0
        self.selected_sub_element = ""
        
        self.create_widgets()
    
    
    def create_widgets(self):
        # Create and place widgets
        self.directory_label = tk.Label(self, text="Select Data Directory:")
        self.directory_label.grid(row=0, column=0, padx=10, pady=10)
        
        self.directory_button = tk.Button(self, text="Browse", command=self.select_directory)
        self.directory_button.grid(row=0, column=1, padx=10, pady=10)
        
        self.sub_element_label = tk.Label(self, text="Select Sub-Element:")
        self.sub_element_label.grid(row=1, column=0, padx=10, pady=10)
        
        self.sub_element_combo = ttk.Combobox(self, values=["Anterior", "Posteromedial", "Posterolateral"])
        self.sub_element_combo.grid(row=1, column=1, padx=10, pady=10)
        self.sub_element_combo.bind("<<ComboboxSelected>>", self.load_files_for_sub_element)
        
        self.plot_frame = tk.Frame(self)
        self.plot_frame.grid(row=2, column=0, columnspan=2, padx=10, pady=10)
        
        self.control_frame = tk.Frame(self)
        self.control_frame.grid(row=3, column=0, columnspan=2, padx=10, pady=10)
        
        self.previous_button = tk.Button(self.control_frame, text="Previous", command=self.show_previous_trial)
        self.previous_button.grid(row=0, column=0, padx=10, pady=10)
        
        self.next_button = tk.Button(self.control_frame, text="Next", command=self.show_next_trial)
        self.next_button.grid(row=0, column=1, padx=10, pady=10)
        
        self.flag_button = tk.Button(self.control_frame, text="Flag", command=self.flag_current_trial)
        self.flag_button.grid(row=0, column=2, padx=10, pady=10)
        
        self.save_button = tk.Button(self.control_frame, text="Save", command=self.save_flagged_files_and_notify)
        self.save_button.grid(row=0, column=3, padx=10, pady=10)
        
        self.trial_info_label = tk.Label(self, text="")
        self.trial_info_label.grid(row=4, column=0, columnspan=2, padx=10, pady=10)


    def select_directory(self):
        self.directory = filedialog.askdirectory()
        self.files = [f for f in os.listdir(self.directory) if f.endswith('.csv')]
        messagebox.showinfo("Directory Selected", f"Selected directory: {self.directory}\nFound {len(self.files)} CSV files.")
    
    def load_files_for_sub_element(self, event):
        self.selected_sub_element = self.sub_element_combo.get()
        self.filtered_files = [f for f in self.files if self.get_sub_element_from_file_name(f) == self.selected_sub_element]
        self.current_index = 0
        if self.filtered_files:
            self.show_trial_data()
        else:
            messagebox.showinfo("No Files", f"No files found for sub-element: {self.selected_sub_element}")

    def get_sub_element_from_file_name(self, file_name):

        sub_element_code = int(file_name.split('_')[5])
        sub_elements = {0: 'Anterior', 1: 'Posteromedial', 2: 'Posterolateral'}
        print(f"File name: {file_name}")
        print(f"Extracted sub_element_code: {sub_element_code}")
        return sub_elements.get(sub_element_code, "Unknown")
    
    def calculate_gyro_magnitude(self, data):
        gyro_magnitude = np.sqrt(data['Gyro_X']**2 + data['Gyro_Y']**2 + data['Gyro_Z']**2)
        return gyro_magnitude
    
    def show_trial_data(self):
        while self.current_index < len(self.filtered_files):
            file_name = self.filtered_files[self.current_index]
            file_path = os.path.join(self.directory, file_name)
            try:
                data = pd.read_csv(file_path)
                self.visualise_trial_data(data)
                self.update_trial_info(file_name)
                break
            except Exception as e:
                messagebox.showerror("Error", f"Error processing file: {file_name}\n{str(e)}")
                self.current_index += 1

    def visualise_trial_data(self, data):
        data['System_Timestamp'] = (pd.to_datetime(data['System_Timestamp'], unit='ms') - pd.to_datetime(data['System_Timestamp'].iloc[0], unit='ms')).dt.total_seconds()
        data['Gyro_Magnitude'] = self.calculate_gyro_magnitude(data)
        color = 'red' if self.selected_sub_element == 'Anterior' else 'green' if self.selected_sub_element == 'Posteromedial' else 'blue'
        
        fig, axes = plt.subplots(1, 2, figsize=(12, 5))
        axes[0].plot(data['System_Timestamp'], data['Accel_LN_X'], color=color)
        axes[0].set_title(f'Accel_LN_X vs. System_Timestamp ({self.selected_sub_element})')
        axes[0].set_xlabel('Time (seconds)')
        axes[0].set_ylabel('Accel_LN_X')
        axes[0].grid(True)
        
        axes[1].plot(data['System_Timestamp'], data['Gyro_Magnitude'], color='purple')
        axes[1].set_title(f'Gyro_Magnitude vs. System_Timestamp ({self.selected_sub_element})')
        axes[1].set_xlabel('Time (seconds)')
        axes[1].set_ylabel('Gyro_Magnitude')
        axes[1].grid(True)
        
        for widget in self.plot_frame.winfo_children():
            widget.destroy()
        
        canvas = FigureCanvasTkAgg(fig, master=self.plot_frame)
        canvas.draw()
        canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
    
    def show_previous_trial(self):
        if self.current_index > 0:
            self.current_index -= 1
            self.show_trial_data()
    
    def show_next_trial(self):
        if self.current_index < len(self.filtered_files) - 1:
            self.current_index += 1
            self.show_trial_data()

    def flag_current_trial(self):
        flag_types = {'n': 'Noisy', 'i': 'Inverted', 'd': 'Discard'}
        flag_input = simpledialog.askstring("Flag Trial", "Enter flag type (n for Noisy, i for Inverted, d for Discard):")
        flag_type = flag_types.get(flag_input.lower())
        if flag_type:
            file_name = self.filtered_files[self.current_index]
            self.flagged_files[self.selected_sub_element][flag_type].append(file_name)
            if flag_type == 'Discard':
                self.save_file_to_discard_folder(file_name)
                os.remove(os.path.join(self.directory, file_name))
                self.files.remove(file_name)
                self.filtered_files.remove(file_name)
                self.current_index = min(self.current_index, len(self.filtered_files) - 1)
            messagebox.showinfo("File Flagged", f"File {file_name} flagged as {flag_type}.")
            self.show_trial_data()
        else:
            messagebox.showerror("Invalid Flag Type", "Invalid flag type entered. Please enter n for Noisy, i for Inverted, or d for Discard.")
    
    def save_file_to_discard_folder(self, file_name):
        src = os.path.join(self.directory, file_name)
        sub_element = self.get_sub_element_from_file_name(file_name).lower()
        desktop_path = str(Path.home() / "Desktop")
        discard_folder = os.path.join(desktop_path, "flagged_files", f"{sub_element}_flagged", "discard")
        if not os.path.exists(discard_folder):
            os.makedirs(discard_folder)
        dst = os.path.join(discard_folder, file_name)
        copyfile(src, dst)
        messagebox.showinfo("File Moved", f"File {file_name} moved to discard folder:\n{discard_folder}")
    
    def update_trial_info(self, file_name):
        total_trials = len(self.filtered_files)
        current_trial = self.current_index + 1
        self.trial_info_label.config(text=f"Trial {current_trial}/{total_trials}: {file_name}")
    
    def save_flagged_files_and_notify(self):
        self.save_flagged_files()
        desktop_path = str(Path.home() / "Desktop")
        flagged_folder = os.path.join(desktop_path, "flagged_files")
        messagebox.showinfo("Flagged Files Saved", f"Flagged files saved in sub-folders of:\n{flagged_folder}")

    def save_flagged_files(self):
        desktop_path = str(Path.home() / "Desktop")
        output_folder = os.path.join(desktop_path, "flagged_files")
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        
        for sub_element, flag_types in self.flagged_files.items():
            sub_element_output_path = os.path.join(output_folder, f"{sub_element.lower()}_flagged")
            if not os.path.exists(sub_element_output_path):
                os.makedirs(sub_element_output_path)
            
            for flag_type, files in flag_types.items():
                flag_type_output_path = os.path.join(sub_element_output_path, flag_type.lower())
                if not os.path.exists(flag_type_output_path):
                    os.makedirs(flag_type_output_path)
                
                for file_name in files:
                    src = os.path.join(self.directory, file_name)
                    dst = os.path.join(flag_type_output_path, file_name)
                    if os.path.exists(src):
                        copyfile(src, dst)
# Generate Report Tab
class GenerateReportTab(ttk.Frame):
    
    def __init__(self, parent, file_check_view_tab, folder_summary_tab, data_analysis_tab):
        super().__init__(parent)
        self.file_check_view_tab = file_check_view_tab
        self.folder_summary_tab = folder_summary_tab
        self.data_analysis_tab = data_analysis_tab
        
        self.generate_button = tk.Button(self, text="Generate PDF Report", command=self.generate_report)
        self.generate_button.pack(pady=10)
    
    def generate_report(self):
        failed_files = find_failed_files(self.file_check_view_tab.directory)
        empty_files = check_empty_files(self.file_check_view_tab.directory)
        statistics_table = generate_statistics_table(self.folder_summary_tab.directory)
        
        report_path = filedialog.asksaveasfilename(defaultextension=".pdf", filetypes=[("PDF files", "*.pdf")])
        if not report_path:
            messagebox.showwarning("Save Cancelled", "Report generation was cancelled")
            return
        
        c = canvas.Canvas(report_path, pagesize=letter)
        width, height = letter
        
        # Failed Files Section
        c.setFont("Helvetica", 12)
        c.drawString(30, height - 40, "Failed Files")
        y = height - 60 
        for file in failed_files:
            c.drawString(30, y, file)
            y -= 15 #adjusting the vertical position on the page 
        
        # Empty Files Section
        y -= 20
        c.drawString(30, y, "Empty Files")
        y -= 20
        for file in empty_files:
            c.drawString(30, y, file)
            y -= 15
        
        # Statistics Section
        y -= 20
        c.drawString(30, y, "Statistics")
        y -= 20
        for index, row in statistics_table.iterrows():
            c.drawString(30, y, f"{row['Attribute']}: {row['Value']}")
            y -= 15
        
        # Data Analysis Section
        c.showPage()  # Add a new page for data analysis
        y = height - 40
        c.drawString(30, y, "Data Analysis")
        y -= 20
        
        # Include Plots from Data Analysis Tab
        if self.data_analysis_tab.canvas:
            buf = BytesIO()
            self.data_analysis_tab.canvas.figure.savefig(buf, format='png')
            buf.seek(0)
            img_reader = ImageReader(buf)
            c.drawImage(img_reader, 30, y - 400, width=540, height=400)  # Adjust x, y, w, h as needed
            buf.close()
        
        # Include Outliers Information
        y -= 420  # Adjust to move below the image
        c.drawString(30, y, "Outliers Information")
        y -= 20
        outliers_count = self.data_analysis_tab.outliers_label.cget("text")
        c.drawString(30, y, outliers_count)
        y -= 20
        
        outliers_list = self.data_analysis_tab.outliers_listbox.get(0, tk.END)
        for outlier in outliers_list:
            c.drawString(30, y, outlier)
            y -= 15
        
        # Save the PDF
        c.save()
        messagebox.showinfo("Report Generated", f"Report has been saved to {report_path}")

if __name__ == "__main__":
    
    app = Application()
    app.mainloop()