In [1]:
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import numpy as np

class MetaboliteAnalysisApp(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("Metabolite Standard Peak Area Analysis")
        self.geometry("1200x1400")
        
        self.notebook = ttk.Notebook(self)
        self.notebook.pack(fill='both', expand=True)
        self.setup_met_std_peak_area_check_ui()

    def setup_met_std_peak_area_check_ui(self):
        self.tab_peak_area_check = ttk.Frame(self.notebook)
        self.notebook.add(self.tab_peak_area_check, text='Std Peak Area Check')
        
        self.file_path_frame_peak_area = tk.Frame(self.tab_peak_area_check)
        self.file_path_frame_peak_area.pack(fill='x', pady=15)
        
        self.file_path_entry_peak_area = tk.Entry(self.file_path_frame_peak_area)
        self.file_path_entry_peak_area.pack(side=tk.LEFT, fill=tk.X, expand=True)
        self.upload_button_peak_area = tk.Button(self.file_path_frame_peak_area, text="Upload Met Std Peak Area Data", command=self.upload_std_peak_area_file)
        self.upload_button_peak_area.pack(side=tk.RIGHT, padx=5)

        self.std_met_peak_area_stats = tk.Frame(self.tab_peak_area_check)
        self.std_met_peak_area_stats.pack(fill='x', pady=15)
        
        self.met_stats_table = ttk.Treeview(self.std_met_peak_area_stats, columns=('Compound', 'Mean', 'min-max', 'RSD', 'IQR', 'N Mean', 'N min-max', 'N RSD', 'N IQR', 'Score'), show="headings")
        self.met_stats_table.heading("Compound", text="Compound")
        self.met_stats_table.column("Compound", width=150, anchor=tk.CENTER)
        self.met_stats_table.heading("Mean", text="Mean", anchor=tk.CENTER)
        self.met_stats_table.column("Mean", width=70, anchor=tk.CENTER)
        self.met_stats_table.heading("min-max", text="min-max", anchor=tk.CENTER)
        self.met_stats_table.column("min-max", width=120, anchor=tk.CENTER)
        self.met_stats_table.heading("RSD", text="RSD (%)", anchor=tk.CENTER)
        self.met_stats_table.column("RSD", width=30, anchor=tk.CENTER)
        self.met_stats_table.heading("IQR", text="IQR", anchor=tk.CENTER)
        self.met_stats_table.column("IQR", width=70, anchor=tk.CENTER)
        self.met_stats_table.heading("N Mean", text="N Mean", anchor=tk.CENTER)
        self.met_stats_table.column("N Mean", width=70, anchor=tk.CENTER)
        self.met_stats_table.heading("N min-max", text="N min-max", anchor=tk.CENTER)
        self.met_stats_table.column("N min-max", width=120, anchor=tk.CENTER)
        self.met_stats_table.heading("N RSD", text="N RSD (%)", anchor=tk.CENTER)
        self.met_stats_table.column("N RSD", width=30, anchor=tk.CENTER)
        self.met_stats_table.heading("N IQR", text="N IQR", anchor=tk.CENTER)
        self.met_stats_table.column("N IQR", width=70, anchor=tk.CENTER)
        self.met_stats_table.heading("Score", text="Score", anchor=tk.CENTER)
        self.met_stats_table.column("Score", width=80, anchor=tk.CENTER)
        self.met_stats_table.pack(fill='x', pady=5, padx=15)

        self.met_stats_table.bind("<Double-1>", self.on_double_click)

    def upload_std_peak_area_file(self):
        analysis_fpath = filedialog.askopenfilename()
        if not analysis_fpath:
            return
        
        self.file_path_entry_peak_area.delete(0, tk.END)
        self.file_path_entry_peak_area.insert(0, analysis_fpath)
        
        if self.is_excel_file(analysis_fpath):
            xls = pd.ExcelFile(analysis_fpath)
            if 'PoolAfterDF' in xls.sheet_names:
                self.df = pd.read_excel(analysis_fpath, sheet_name='PoolAfterDF', index_col='Compound')
                self.df_normalized = self.df.div(self.df.loc['trifluoromethanesulfonate'])
                self.update_treeview()
            else:
                messagebox.showerror("Error", "'PoolAfterDF' sheet not present in the Excel file.")
        else:
            messagebox.showerror("Error", "Selected file is not a valid Excel file.")

    def is_excel_file(self, fpath):
        return fpath.endswith(('.xls', '.xlsx'))
    
    def calculate_rsd(self, data):
        """Calculate the Relative Standard Deviation (RSD), handling cases where the mean is zero."""
        mean = data.mean()
        if mean == 0:
            return np.nan  # Or return 0.0, depending on the preferred handling of this case
        else:
            return (data.std() / mean) * 100
    
    def calculate_iqr_and_range(self, data):
        """Calculates the interquartile range and min-max of the given data."""
        q75, q25 = np.percentile(data, [75 ,25])
        iqr = q75 - q25
        data_min = np.min(data)
        data_max = np.max(data)
        min_max_range = f"{data_min:.2e}-{data_max:.2e}"
        return iqr, min_max_range
    
    def calculate_scores(self):
        max_mean = self.df_normalized.mean().max()
        max_variance = self.df_normalized.var().max()
        max_rsd = (self.df_normalized.std() / self.df_normalized.mean()).max()

        scores = {}
        for compound in self.df_normalized.index:
            norm_mean = self.df_normalized.loc[compound].mean() / max_mean
            norm_variance = self.df_normalized.loc[compound].var() / max_variance
            norm_rsd = (self.df_normalized.loc[compound].std() / self.df_normalized.loc[compound].mean()) / max_rsd

            # Assuming equal weights for simplicity; adjust weights as necessary
            score = (norm_mean + norm_variance + norm_rsd) / 3
            scores[compound] = score
        return scores


    def update_treeview(self):
        scores = self.calculate_scores()  # Calculate scores if scoring is applied

        # Clear existing entries in the TreeView
        for i in self.met_stats_table.get_children():
            self.met_stats_table.delete(i)

        # Special handling to ensure trifluoromethanesulfonate is added first
        special_compound = 'trifluoromethanesulfonate'
        if special_compound in self.df.index:
            self.add_compound_to_treeview(special_compound, scores)

        # Inserting all other compounds into the TreeView
        for compound in self.df.index:
            if compound != special_compound:  # Skip the special compound since it's already added
                self.add_compound_to_treeview(compound, scores)

    def add_compound_to_treeview(self, compound, scores):
        original_data = self.df.loc[compound]
        normalized_data = self.df_normalized.loc[compound]

        # Calculate RSD
        original_rsd = self.calculate_rsd(original_data)
        normalized_rsd = self.calculate_rsd(normalized_data)

        # Calculate IQR and min-max for original and normalized data
        original_iqr, original_min_max = self.calculate_iqr_and_range(original_data)
        normalized_iqr, normalized_min_max = self.calculate_iqr_and_range(normalized_data)

        # Prepare data for insertion
        values = (
            compound,
            f"{original_data.mean():.3e}",  # Mean
            original_min_max,  # Min-Max
            f"{original_rsd:.2f}",  # RSD
            f"{original_iqr:.3e}",  # IQR
            f"{normalized_data.mean():.3e}",  # Norm Mean
            normalized_min_max,  # Norm Min-Max
            f"{normalized_rsd:.2f}",  # Norm RSD
            f"{normalized_iqr:.3e}",  # Norm IQR
            f"{scores[compound]:.3f}" if scores and compound in scores else "N/A"  # Score
        )

        self.met_stats_table.insert("", "end", values=values)

    def on_double_click(self, event):
        # Identify the item and column clicked on
        item_id = self.met_stats_table.identify_row(event.y)
        column_id = self.met_stats_table.identify_column(event.x)
        
        # Get the name of the compound from the row
        compound = self.met_stats_table.item(item_id, 'values')[0]
        
        # Decide which dataset to use based on the column clicked
        # '#2' is 'Mean', '#3' is min-max, '#6' is 'N Mean', '#7' is min-max
        if column_id in ["#2", "#3"]:  
            data = self.df.loc[compound]
            title_suffix = "Original Replicates"
            self.plot_data_points_scatter(data, compound, title_suffix)

        elif column_id in ["#6", "#7"]:
            data = self.df_normalized.loc[compound]
            title_suffix = "Normalized Replicates"
            self.plot_data_points_scatter(data, compound, title_suffix)

        elif column_id in ["#4", "#5"]:  # These are the columns for 'RSD', 'IQR'
            data = self.df.loc[compound]
            title_suffix = "Original"
            self.plot_data_points(data, compound, title_suffix)

        elif column_id in ["#8", "#9"]:  # These are the columns for 'N RSD', 'N IQR'
            data = self.df_normalized.loc[compound]
            title_suffix = "Normalized"
            self.plot_data_points(data, compound, title_suffix)

        # If 'Score' column is clicked, do nothing
        elif column_id == "#10":  # This is the column for 'Score'
            return

    def plot_data_points_scatter(self, data, compound_name, title_suffix):
        # Create a popup window for the scatter plot
        popup = tk.Toplevel()
        popup.title(f"{compound_name} - {title_suffix}")
        popup.geometry("1000x1000")
        
        fig, ax = plt.subplots()
        # Plot data points
        ax.scatter(range(len(data)), data, color='blue', alpha=0.7, label=f'{compound_name} data')
        
        # Customizing the plot
        ax.set_title(f"{compound_name} - {title_suffix}")
        ax.set_ylabel("Values")
        ax.set_xlabel("Sample Index")
        plt.xticks(rotation=45)

        # Creating a canvas as a matplotlib backend
        canvas = FigureCanvasTkAgg(fig, master=popup)
        canvas.draw()
        canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)
        
        # Add a close button to the popup
        tk.Button(popup, text="Close", command=popup.destroy).pack(side=tk.BOTTOM)

    def plot_data_points(self, data, compound_name, title_suffix):
        # Prepare data by grouping by the initial part of the column name
        groups = {}
        for col in data.dropna().index:
            date = col.split('-')[0]  # Assuming date is the first part before '-HEK-std'
            if date not in groups:
                groups[date] = []
            groups[date].append(data[col])

        # Setup the popup window
        popup = tk.Toplevel()
        popup.title(f"{compound_name} - {title_suffix}")
        popup.geometry("1000x1000")

        fig, ax = plt.subplots()
        # Create boxplot for each group of data
        box_data = [groups[date] for date in sorted(groups)]
        bp = ax.boxplot(box_data, tick_labels=sorted(groups.keys()), notch=True, vert=True, patch_artist=True, showfliers=True)

        # Customize the boxplot appearance
        for box in bp['boxes']:
            # Set edge color and fill with a more transparent color
            box.set(color='#1f77b4', linewidth=2)
            box.set(facecolor='#1f77b4', alpha=0.5)  # Set transparency

        for whisker in bp['whiskers']:
            whisker.set(color='#1f77b4', linewidth=2)

        for cap in bp['caps']:
            cap.set(color='#1f77b4', linewidth=2)

        for median in bp['medians']:
            median.set(color='yellow', linewidth=2)  # Set medians to yellow for visibility

        for flier in bp['fliers']:
            flier.set(marker='o', color='#e7298a', alpha=0.9)  # Outliers visible as pink dots

        # Add individual data points on the plot for clarity
        for i, line in enumerate(groups):
            y_data = groups[line]
            x_data = np.random.normal(1 + i, 0.02, size=len(y_data))  # Add some jitter to the x-axis
            ax.plot(x_data, y_data, 'r.', alpha=0.5)  # Points are plotted as red dots with transparency

        ax.set_title(f"{compound_name} - {title_suffix}")
        ax.set_ylabel("Peak Area")
        ax.set_xlabel("Date Run")
        plt.xticks(rotation=45)
        plt.grid(True)

        # Create a canvas as a matplotlib backend
        canvas = FigureCanvasTkAgg(fig, master=popup)
        canvas.draw()
        canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)

        # Add a close button to the popup
        tk.Button(popup, text="Close", command=popup.destroy).pack(side=tk.BOTTOM)

if __name__ == "__main__":
    app = MetaboliteAnalysisApp()
    app.mainloop()


  norm_rsd = (self.df_normalized.loc[compound].std() / self.df_normalized.loc[compound].mean()) / max_rsd
