<a href="https://colab.research.google.com/github/HanqiLouis/GFET-Characterization/blob/main/DP_Boxplot_GUI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
# @title DP Boxplot
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import ipywidgets as widgets
from IPython.display import display, clear_output


### ------- Plot Functions -------
def DP_forward(directory_path, site_number, runs_per_site, dual_sweep=False, remove_outliers=False, print_option=True):
    """
    Extracts the forward DP (Dirac Point) Id and Vg for a specific site and checks for potential malfunctions.
    """

    start_index = (site_number - 1) * runs_per_site
    end_index = start_index + runs_per_site
    excel_files = sorted(glob.glob(os.path.join(directory_path, '*.xlsx')))
    site_files = excel_files[start_index:end_index]

    if not site_files:
        print(f"No files found for Site {site_number}.")
        return None

    I, V = [], []

    for file in site_files:
        df = pd.read_excel(file)

        if dual_sweep:
            df = df.iloc[: df.shape[0] // 2]  # First half of data for forward sweep

        # Extract Vg and Id
        Vg = df['VG']
        Id = df['ID']

        # Find the Dirac Point (minimum Id)
        min_Id = Id.min()
        DP_forward_candidates = df.loc[df['ID'] == min_Id, 'VG'].values

        if len(DP_forward_candidates) > 0:
            DP_forward = DP_forward_candidates[0]  # Take first match if multiple exist
        else:
            continue  # Skip this run if no valid DP found

        # Convert to uA and mV
        I.append(1000000 * min_Id)
        V.append(1000 * DP_forward)

    if not I or not V:
        print(f"No valid Dirac Point data found for Site {site_number}.")
        return None

    D = np.column_stack((I, V))

    if print_option:
        print("Raw Data:\n", D)

    if remove_outliers:
        z_scores = stats.zscore(D, axis=0, nan_policy='omit')
        valid_runs = (np.abs(z_scores) < 2).all(axis=1)

        if valid_runs.any():
            D_filtered = D[valid_runs]
            deleted_runs = [i + 1 for i, is_valid in enumerate(valid_runs) if not is_valid]
            print(f"Deleted runs due to outliers: {deleted_runs}")
            print("Filtered Data:\n", D_filtered)
            return D_filtered
        else:
            print("All runs were classified as outliers. Site may be malfunctioning.")
            return None

    return D



def compute_full_refiling(folder_path, total_site_nb, runs_per_site, dual_sweep=False, remove_outliers=False):
    """
    Extract the Id and Vg at forward DP for all sites of a refiling.

    Returns:
    - combined_data: numpy array of shape (num_sites, num_runs, 2).
    """

    Data = []
    for site in range(1, total_site_nb+1):
        D = DP_forward(folder_path, site, runs_per_site, dual_sweep, remove_outliers, print_option=False)
        if D is not None:  # Ensure valid data
            Data.append(D)

    if Data:  # Ensure list is not empty before stacking
        combined_data = np.stack(Data, axis=0)
        return combined_data
    else:
        print("No valid data found across all sites.")
        return None



def filter_malfunctioning_sites(all_refilings, malfunctioning_sites):
    """
    Computes the mean Vg and Id across multiple matrices and removes malfunctioning sites.
    """

    # Concatenate all matrices along axis=1 (merge runs)
    all_refilings_conc = np.concatenate(all_refilings, axis=1)

    # Extract Vg and Id values
    Vg = all_refilings_conc[:, :, 1]  # Index 1 for Vg
    Id = all_refilings_conc[:, :, 0]  # Index 0 for Id

    # Compute mean along runs
    mean_Vg = np.mean(Vg, axis=1)
    mean_Id = np.mean(Id, axis=1)

    if not malfunctioning_sites:
        return mean_Vg, mean_Id  # No need to remove anything

    # Convert 1-based indices to 0-based and filter valid indices
    max_index = len(mean_Vg)
    index_to_remove = [i - 1 for i in malfunctioning_sites if 0 <= (i - 1) < max_index]

    if index_to_remove:
        filtered_mean_Vg = np.delete(mean_Vg, index_to_remove)
        filtered_mean_Id = np.delete(mean_Id, index_to_remove)
        return filtered_mean_Vg, filtered_mean_Id
    else:
        print("No valid malfunctioning sites found in range.")
        return mean_Vg, mean_Id



def plot_boxplot_mean_functioning_sites(filtered_Vg_matrix, label, ID=False):
    """
    Plots a boxplot and scatter plot for a single dataset of VG or ID.
    """
    fig, ax = plt.subplots(figsize=(6, 4))

    ax.boxplot(filtered_Vg_matrix, notch=True, patch_artist=True, boxprops=dict(facecolor="lightblue"))

    # Scatter points
    x_positions = np.random.normal(1, 0.02, size=len(filtered_Vg_matrix))
    ax.scatter(x_positions, filtered_Vg_matrix, color="darkblue", alpha=0.6)

    # Labels
    ax.set_title("Dirac Points of Functioning Sites", fontsize=14)
    ax.set_ylabel("VG [mV]" if not ID else "ID [uA]")

    ax.grid(True, linestyle="--", alpha=0.6)
    plt.tight_layout()
    plt.show()



def plot_distribution(data, labels, title, ylabel, save_output=None, file_name='boxplot'):
    fig, ax = plt.subplots(figsize=(6, 5))
    ax.boxplot(data, notch=True, patch_artist=True, boxprops=dict(facecolor="lightblue"))

    for i, matrix in enumerate(data, start=1):
        x_positions = np.random.normal(i, 0.05, size=len(matrix))
        ax.scatter(x_positions, matrix, alpha=0.6)

    ax.set_xticks(range(1, len(labels) + 1))
    ax.set_xticklabels(labels, fontsize=12)
    ax.set_title(title, fontsize=14)
    ax.set_ylabel(ylabel, fontsize=12)
    ax.grid(True, linestyle="--", alpha=0.6)
    plt.tight_layout()

    # Save the plot if save_output is specified
    if save_output is not None:
        os.makedirs(save_output, exist_ok=True)  # Create the directory if it doesn't exist
        plot_path = os.path.join(save_output, f'{file_name}.png')
        plt.savefig(plot_path, dpi=300)
        print(f"Plot saved to {plot_path}")

    plt.show()


def plot_vg_distribution(data, labels, title="Dirac Points of Functioning Sites", save_output=None, file_name='boxplot'):
    plot_distribution(data, labels, title, "VG [mV]", save_output, file_name)

def plot_id_distribution(data, labels, title="Current Distribution of Functioning Sites", save_output=None, file_name='boxplot'):
    plot_distribution(data, labels, title, "ID [uA]", save_output, file_name)

### ------- Global Variables -------
groups = []  # List of groups containing folders
output = widgets.Output()  # Output display for results

### ------- Group Class -------
class Group:
    def __init__(self, index):
        """ Initialize a measurement group with all required fields. """
        self.index = index

        # Group name
        self.group_name = widgets.Text(
            description=f'Measurement Group {index} Name:',
            placeholder=f'Enter name for Group {index}',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='750px')
        )

        # Number of sites
        self.nb_sites = widgets.BoundedIntText(
            value=0, min=0,
            description="Number of sites:",
            style={'description_width': 'initial'}
        )

        # Runs per site
        self.runs_per_site = widgets.BoundedIntText(
            value=0, min=0,
            description="Runs per site:",
            style={'description_width': 'initial'}
        )

        # Checkboxes
        self.dual_sweep = widgets.Checkbox(value=False, description="Dual sweep")
        self.remove_outliers = widgets.Checkbox(value=False, description="Remove outliers")

        # Malfunctioning sites input
        self.malfunctioning_sites = widgets.Text(
            description=f'Malfunctioning sites:',
            placeholder=f'Enter site numbers (comma-separated)',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='500px')
        )

        # Folder storage
        self.folders = []
        self.add_folder()  # Ensure at least one folder initially

        # Buttons for folder management
        self.add_folder_button = widgets.Button(description="+ Folder", button_style="success")
        self.remove_folder_button = widgets.Button(description="- Folder", button_style="danger")

        self.add_folder_button.on_click(self.add_folder)
        self.remove_folder_button.on_click(self.remove_folder)

    def add_folder(self, _=None):
        """ Adds a new folder input field to the group. """
        folder_widget = widgets.Text(
            description=f'Folder {len(self.folders) + 1}:',
            placeholder=f'Enter folder path',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='750px')
        )
        self.folders.append(folder_widget)
        update_ui()

    def remove_folder(self, _=None):
        """ Removes the last folder, ensuring at least one remains. """
        if len(self.folders) > 1:
            self.folders.pop()
            update_ui()

### ------- UI Update Function -------
def update_ui():
    """ Clears and updates the UI dynamically. """
    clear_output(wait=True)

    for group in groups:
        display(group.group_name)

        for folder in group.folders:
            display(folder)

        display(widgets.HBox([group.nb_sites, group.runs_per_site]))
        display(widgets.HBox([group.dual_sweep, group.remove_outliers]))
        display(group.malfunctioning_sites)
        display(widgets.HBox([group.add_folder_button, group.remove_folder_button]))
        display(widgets.HTML('<hr>'))  # Separator

    # Display global controls
    display(widgets.HBox([add_group_button, remove_group_button]))
    display(mode)
    display(save_checkbox)
    display(save_directory_field, save_as)
    display(plot_button, output)

### ------- Group Management Functions -------
def add_group(_=None):
    """ Adds a new group with an initial folder. """
    groups.append(Group(len(groups) + 1))
    update_ui()

def remove_group(_=None):
    """ Removes the last group, ensuring at least one remains. """
    if len(groups) > 1:
        groups.pop()
        update_ui()

### ------- Input Validation -------
def validate_inputs():
    """ Validates required fields before processing data. """
    with output:
        output.clear_output()

        for group in groups:
            if group.nb_sites.value == 0:
                print(f"{group.group_name.value or f'Group {group.index}'}: Please enter the number of sites")
                return False
            if group.runs_per_site.value == 0:
                print(f"{group.group_name.value or f'Group {group.index}'}: Please enter the number of runs per site")
                return False

            for idx, folder in enumerate(group.folders, start=1):
                if not folder.value.strip():
                    print(f"{group.group_name.value or f'Group {group.index}'}: Folder {idx} path is missing")
                    return False

        return True

### ------- Data Processing -------
def collect_group_data():
    """ Collects input data from UI into structured lists. """
    groups_list, groups_names, runs_per_site_list, dual_sweep_list = [], [], [], []

    for group in groups:
        groups_names.append(group.group_name.value or f"Group {group.index}")
        runs_per_site_list.append(group.runs_per_site.value)
        dual_sweep_list.append(group.dual_sweep.value)
        groups_list.append([folder.value.strip() for folder in group.folders])

    return groups_list, groups_names, runs_per_site_list, dual_sweep_list

### ------- Plotting Functions -------
def plot_boxplot(_=None):
    """ Processes data and generates a boxplot based on user input. """
    with output:
        output.clear_output()

        if not validate_inputs():
            return

        if save_checkbox.value and not save_directory_field.value.strip():
            print("Please enter the save directory")
            return

        Vgs, Ids = [], []
        groups_list, groups_names, runs_per_site_list, dual_sweep_list = collect_group_data()

        for group in groups:
            measures = []
            for folder in group.folders:
                R = compute_full_refiling(
                    folder.value.strip(),
                    group.nb_sites.value,
                    group.runs_per_site.value,
                    group.dual_sweep.value,
                    group.remove_outliers.value
                )
                measures.append(R)

            malfunctioning_sites = [int(x.strip()) for x in group.malfunctioning_sites.value.split(",") if x.strip().isdigit()]
            filtered_Vg_matrix, filtered_Id_matrix = filter_malfunctioning_sites(measures, malfunctioning_sites)
            Vgs.append(filtered_Vg_matrix)
            Ids.append(filtered_Id_matrix)

        if mode.value == "ID":
            plot_id_distribution(Ids, groups_names, save_output=save_directory_field.value.strip() if save_checkbox.value else None,
                                 file_name=save_as.value.strip() if save_as.value else 'boxplot')
        else:
            plot_vg_distribution(Vgs, groups_names, save_output=save_directory_field.value.strip() if save_checkbox.value else None,
                                 file_name=save_as.value.strip() if save_as.value else 'boxplot')

    update_ui()

### ------- Widgets -------
mode = widgets.Dropdown(options=["VG", "ID"], value="VG", description="Mode:", style={'description_width': 'initial'})
save_checkbox = widgets.Checkbox(value=False, description='Save', style={'description_width': 'initial'})

save_directory_field = widgets.Text(description='Save Directory:', placeholder='Enter save directory',
                                    style={'description_width': 'initial'}, layout=widgets.Layout(width='750px'))

save_as = widgets.Text(description='Save as:', placeholder='Enter file name',
                       style={'description_width': 'initial'}, layout=widgets.Layout(width='500px'))

add_group_button = widgets.Button(description="+ Group", button_style="success")
remove_group_button = widgets.Button(description="- Group", button_style="danger")
add_group_button.on_click(add_group)
remove_group_button.on_click(remove_group)

plot_button = widgets.Button(description='Plot', button_style='warning')
plot_button.on_click(plot_boxplot)

### ------- Initialize UI -------
add_group(None)
update_ui()

Text(value='Bare GFET', description='Measurement Group 1 Name:', layout=Layout(width='750px'), placeholder='En…

Text(value='/content/drive/MyDrive/GFET_Ali_Hanqi/BareGFET_step5mV/bare', description='Folder 1:', layout=Layo…

HBox(children=(BoundedIntText(value=48, description='Number of sites:', style=DescriptionStyle(description_wid…

HBox(children=(Checkbox(value=False, description='Dual sweep'), Checkbox(value=False, description='Remove outl…

Text(value='1,3,5,12,18,23,34,38,43,44,45', description='Malfunctioning sites:', layout=Layout(width='500px'),…

HBox(children=(Button(button_style='success', description='+ Folder', style=ButtonStyle()), Button(button_styl…

HTML(value='<hr>')

Text(value='No wash', description='Measurement Group 2 Name:', layout=Layout(width='750px'), placeholder='Ente…

Text(value='/content/drive/MyDrive/GFET_Ali_Hanqi/BareGFET_step5mV/no_wash', description='Folder 1:', layout=L…

HBox(children=(BoundedIntText(value=48, description='Number of sites:', style=DescriptionStyle(description_wid…

HBox(children=(Checkbox(value=False, description='Dual sweep'), Checkbox(value=False, description='Remove outl…

Text(value='1,3,5,12,18,23,34,38,43,44,45', description='Malfunctioning sites:', layout=Layout(width='500px'),…

HBox(children=(Button(button_style='success', description='+ Folder', style=ButtonStyle()), Button(button_styl…

HTML(value='<hr>')

Text(value='1st wash', description='Measurement Group 3 Name:', layout=Layout(width='750px'), placeholder='Ent…

Text(value='/content/drive/MyDrive/GFET_Ali_Hanqi/BareGFET_step5mV/1st_wash', description='Folder 1:', layout=…

HBox(children=(BoundedIntText(value=48, description='Number of sites:', style=DescriptionStyle(description_wid…

HBox(children=(Checkbox(value=False, description='Dual sweep'), Checkbox(value=False, description='Remove outl…

Text(value='1,3,5,12,18,23,34,38,43,44,45', description='Malfunctioning sites:', layout=Layout(width='500px'),…

HBox(children=(Button(button_style='success', description='+ Folder', style=ButtonStyle()), Button(button_styl…

HTML(value='<hr>')

Text(value='2nd wash', description='Measurement Group 4 Name:', layout=Layout(width='750px'), placeholder='Ent…

Text(value='/content/drive/MyDrive/GFET_Ali_Hanqi/BareGFET_step5mV/2nd_wash', description='Folder 1:', layout=…

HBox(children=(BoundedIntText(value=48, description='Number of sites:', style=DescriptionStyle(description_wid…

HBox(children=(Checkbox(value=False, description='Dual sweep'), Checkbox(value=False, description='Remove outl…

Text(value='1,3,5,12,18,23,34,38,43,44,45', description='Malfunctioning sites:', layout=Layout(width='500px'),…

HBox(children=(Button(button_style='success', description='+ Folder', style=ButtonStyle()), Button(button_styl…

HTML(value='<hr>')

HBox(children=(Button(button_style='success', description='+ Group', style=ButtonStyle()), Button(button_style…

Dropdown(description='Mode:', index=1, options=('VG', 'ID'), style=DescriptionStyle(description_width='initial…

Checkbox(value=True, description='Save', style=DescriptionStyle(description_width='initial'))

Text(value='/content/drive/MyDrive/GFET_Ali_Hanqi/BareGFET_step5mV/boxplot', description='Save Directory:', la…

Text(value='Box_ID', description='Save as:', layout=Layout(width='500px'), placeholder='Enter file name', styl…



Output()