# **Plot&Stats**
---

<font size = 4>Code use to perform the statistical analysises used in the paper


<font size = 4>Notebook created by [Guillaume Jacquemet](https://cellmig.org/)



In [None]:
# @title #MIT License

print("""
**MIT License**

Copyright (c) 2023 Guillaume Jacquemet

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.""")

--------------------------------------------------------
# **Part 1. Prepare the session and load your data**
--------------------------------------------------------


## **1.1. Install key dependencies**
---
<font size = 4>

In [None]:
#@markdown ##Play to install
%pip -q install pandas scikit-learn
%pip -q install plotly
%pip -q install prettytable
%pip -q install reportlab





In [None]:
#@markdown ##Play to load the dependancies

import ipywidgets as widgets
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np
import itertools
from matplotlib.gridspec import GridSpec
import requests
from io import StringIO
from IPython.display import display, clear_output
import pandas as pd
from ipywidgets import Layout, VBox, Button, Accordion, SelectMultiple, IntText
from matplotlib.ticker import FixedLocator
from prettytable import PrettyTable
import os

# Function to calculate Cohen's d
def cohen_d(group1, group2):
    diff = group1.mean() - group2.mean()
    n1, n2 = len(group1), len(group2)
    var1 = group1.var()
    var2 = group2.var()
    pooled_var = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)
    d = diff / np.sqrt(pooled_var)
    return d

def save_dataframe_with_progress(df, path, desc="Saving", chunk_size=50000):
    """Save a DataFrame with a progress bar."""

    # Estimating the number of chunks based on the provided chunk size
    num_chunks = int(len(df) / chunk_size) + 1

    # Create a tqdm instance for progress tracking
    with tqdm(total=len(df), unit="rows", desc=desc) as pbar:
        # Open the file for writing
        with open(path, "w") as f:
            # Write the header once at the beginning
            df.head(0).to_csv(f, index=False)

            for chunk in np.array_split(df, num_chunks):
                chunk.to_csv(f, mode="a", header=False, index=False)
                pbar.update(len(chunk))

def check_for_nans(df, df_name):
    """
    Checks the given DataFrame for NaN values and prints the count for each column containing NaNs.

    Args:
    df (pd.DataFrame): DataFrame to be checked for NaN values.
    df_name (str): The name of the DataFrame as a string, used for printing.
    """
    # Check if the DataFrame has any NaN values and print a warning if it does.
    nan_columns = df.columns[df.isna().any()].tolist()

    if nan_columns:
        for col in nan_columns:
            nan_count = df[col].isna().sum()
            print(f"Column '{col}' in {df_name} contains {nan_count} NaN values.")
    else:
        print(f"No NaN values found in {df_name}.")


def save_parameters(params, file_path, param_type):
    # Convert params dictionary to a DataFrame for human readability
    new_params_df = pd.DataFrame(list(params.items()), columns=['Parameter', 'Value'])
    new_params_df['Type'] = param_type

    if os.path.exists(file_path):
        # Read existing file
        existing_params_df = pd.read_csv(file_path)

        # Merge the new parameters with the existing ones
        # Update existing parameters or append new ones
        updated_params_df = pd.merge(existing_params_df, new_params_df,
                                     on=['Type', 'Parameter'],
                                     how='outer',
                                     suffixes=('', '_new'))

        # If there's a new value, update it, otherwise keep the old value
        updated_params_df['Value'] = updated_params_df['Value_new'].combine_first(updated_params_df['Value'])

        # Drop the temporary new value column
        updated_params_df.drop(columns='Value_new', inplace=True)
    else:
        # Use new parameters DataFrame directly if file doesn't exist
        updated_params_df = new_params_df

    # Save the updated DataFrame to CSV
    updated_params_df.to_csv(file_path, index=False)


## **1.2. Mount your Google Drive**
---
<font size = 4> To use this notebook on the data present in your Google Drive, you need to mount your Google Drive to this notebook.

<font size = 4> Play the cell below to mount your Google Drive and follow the instructions.

<font size = 4> Once this is done, your data are available in the **Files** tab on the top left of notebook.

In [None]:
#@markdown ##Play the cell to connect your Google Drive to Colab

from google.colab import drive
drive.mount('/content/gdrive')
%cd /gdrive



## **1.3. Load your dataset**
---

<font size = 4> Please ensure that your data is properly organised (see above)


In [None]:
#@markdown ##Load your dataset:

import pandas as pd
import os
from io import StringIO
import ipywidgets as widgets
from IPython.display import display, clear_output

# Initialize dataset_df as an empty DataFrame globally
dataset_df = pd.DataFrame()


# Create widgets
dataset_path_input = widgets.Text(
    value='',
    placeholder='Enter the path to your dataset',
    description='Dataset Path:',
    layout={'width': '80%'}
)

results_folder_input = widgets.Text(
    value='',
    placeholder='Enter the path to your results folder',
    description='Results Folder:',
    layout={'width': '80%'}
)

data_textarea = widgets.Textarea(
    value='',
    placeholder='Or copy and paste your tab sperated data here (direct copy and paste from a spreedsheet)',
    description='Or Paste Data:',
    layout={'width': '80%', 'height': '200px'}
)

load_button = widgets.Button(
    description='Load Data',
    button_style='success',  # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to load the data',
)

output = widgets.Output()

# Load data function
def load_data(b):
    global dataset_df
    global Results_Folder

    with output:
        clear_output()
        Results_Folder = results_folder_input.value.strip()
        if not Results_Folder:
            Results_Folder = './Results'  # Default path if not provided
        if not os.path.exists(Results_Folder):
            os.makedirs(Results_Folder)  # Create the folder if it doesn't exist
        print(f"Results folder is located at: {Results_Folder}")

        if dataset_path_input.value.strip():
            dataset_path = dataset_path_input.value.strip()
            try:
                dataset_df = pd.read_csv(dataset_path)
                print(f"Loaded dataset from {dataset_path}")
            except Exception as e:
                print(f"Failed to load dataset from {dataset_path}: {e}")
        elif data_textarea.value.strip():
            input_data = StringIO(data_textarea.value)
            try:
                dataset_df = pd.read_csv(input_data, sep='\t')
                print("Loaded dataset from pasted tab-separated data")
            except Exception as e:
                print(f"Failed to load dataset from pasted data: {e}")
        else:
            print("No dataset path provided or data pasted. Please provide a dataset.")
            return

        # Perform a check for NaNs or any other required processing here
        check_for_nans(dataset_df, "your dataset")

        display(dataset_df.head())

# Set the button click event
load_button.on_click(load_data)

# Display the widgets
display(widgets.VBox([dataset_path_input, results_folder_input, data_textarea, load_button, output]))


## **1.4. Turn your dataset to tidy**
---



In [None]:
Figure_panel = 'FigS7I'  # @param {type: "string"}


tidy_df = pd.melt(dataset_df, var_name='Condition', value_name=Figure_panel)
tidy_df['Repeat'] = 1
tidy_df = tidy_df.dropna()

-------------------------------------------

# **Part 2. Plot your dataset**
-------------------------------------------

<font size = 4> In this section you can plot your data. Data and graphs are automatically saved in your result folder.


## **2.1. Plot your entire dataset**
--------

##**Statistical analyses**
### Cohen's d (Effect Size):
<font size = 4>Cohen's d measures the size of the difference between two groups, normalized by their pooled standard deviation. Values can be interpreted as small (0 to 0.2), medium (0.2 to 0.5), or large (0.5 and above) effects. It helps quantify how significant the observed difference is, beyond just being statistically significant.

### Randomization Test:
<font size = 4>This non-parametric test evaluates if observed differences between conditions could have arisen by random chance. It shuffles condition labels multiple times, recalculating the Cohen's d each time. The resulting p-value, which indicates the likelihood of observing the actual difference by chance, provides evidence against the null hypothesis: a smaller p-value implies stronger evidence against the null.



In [None]:
# @title ##Plot (entire dataset)

import ipywidgets as widgets
from ipywidgets import Layout, VBox, Button, Accordion, SelectMultiple, IntText
import pandas as pd
import os
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.ticker import FixedLocator
import itertools
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from scipy import stats
import time

# Parameters to adapt in function of the notebook section
base_folder = f"{Results_Folder}/Plots"  # Change to your actual folder path
Conditions = 'Condition'
df_to_plot = tidy_df  # Change to your actual dataframe variable

# Check and create necessary directories
folders = ["pdf", "csv"]
for folder in folders:
    dir_path = os.path.join(base_folder, folder)
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

def get_selectable_columns(df):
    # Exclude certain columns from being plotted
    exclude_cols = ['Condition', 'experiment_nb', 'File_name', 'Repeat', 'Unique_ID', 'LABEL', 'TRACK_INDEX', 'TRACK_ID', 'TRACK_X_LOCATION', 'TRACK_Y_LOCATION', 'TRACK_Z_LOCATION', 'Exemplar','TRACK_STOP', 'TRACK_START', 'Cluster_UMAP', 'Cluster_tsne']
    # Select only numerical columns
    return [col for col in df.columns if (df[col].dtype.kind in 'biufc') and (col not in exclude_cols)]

def display_variable_checkboxes(selectable_columns):
    # Create checkboxes for selectable columns
    variable_checkboxes = [widgets.Checkbox(value=False, description=col) for col in selectable_columns]

    # Display checkboxes in the notebook
    display(widgets.VBox([
        widgets.Label('Variables to Plot:'),
        widgets.GridBox(variable_checkboxes, layout=widgets.Layout(grid_template_columns="repeat(3, 300px)")),
    ]))
    return variable_checkboxes

def create_condition_selector(df, column_name):
    conditions = df[column_name].unique()
    condition_selector = SelectMultiple(
        options=conditions,
        description='Conditions:',
        disabled=False,
        layout=Layout(width='100%')  # Adjusting the layout width
    )
    return condition_selector

def display_condition_selection(df, column_name):
    condition_selector = create_condition_selector(df, column_name)

    condition_accordion = Accordion(children=[VBox([condition_selector])])
    condition_accordion.set_title(0, 'Select Conditions')
    display(condition_accordion)
    return condition_selector

def format_scientific_for_ticks(x):
    """Format p-values for ticks: use scientific notation for values below 0.001, otherwise use standard notation."""
    if x < 0.001:
        return f"{x:.1e}"
    else:
        return f"{x:.4f}"

def format_p_value(x):
    """Format p-values to four significant digits."""
    if x < 0.001:
        return "< 0.001"
    else:
        return f"{x:.4g}"  # .4g ensures four significant digits

def safe_log10_p_values(matrix):
    """Apply a safe logarithmic transformation to p-values, handling p=1 specifically."""
    # Replace non-positive values with a very small number just greater than 0
    small_value = np.nextafter(0, 1)
    adjusted_matrix = np.where(matrix > 0, matrix, small_value)

    logged_matrix = -np.log10(adjusted_matrix)
    logged_matrix[matrix == 1] = -np.log10(0.999)
    return logged_matrix

def plot_heatmap(ax, matrix, title, cmap='viridis'):
    """Plot a heatmap with logarithmic scaling of p-values and real p-values as annotations.
    Skip annotations if there are more than 7 conditions."""
    log_matrix = safe_log10_p_values(matrix.fillna(1))

    # Define the normalization range
    vmin = -np.log10(0.1)  # Set vmin to the log-transformed value of 0.1
    vmax = np.max(log_matrix[np.isfinite(log_matrix)])

    if vmin > vmax:
        vmin = vmax

    # Format annotations if conditions are 6 or fewer
    num_conditions = len(matrix.columns)
    if num_conditions <= 7:
        formatted_annotations = matrix.applymap(lambda x: format_p_value(x) if pd.notna(x) else "NaN")
    else:
        formatted_annotations = False  # No annotations

    # Plot the heatmap without the color bar
    heatmap = sns.heatmap(log_matrix, ax=ax, cmap=cmap, annot=formatted_annotations,
                          fmt="", xticklabels=matrix.columns, yticklabels=matrix.index, cbar=False, vmin=vmin, vmax=vmax)
    ax.set_title(title)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)

    # Create a color bar with conditional formatting for ticks
    norm = plt.Normalize(vmin=vmin, vmax=vmax)
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])
    cbar = ax.figure.colorbar(sm, ax=ax)

    # Set custom ticks and labels for the color bar
    num_ticks = 5
    tick_locs = np.linspace(vmin, vmax, num_ticks)
    tick_labels = [format_scientific_for_ticks(10**-tick) for tick in tick_locs]
    cbar.set_ticks(tick_locs)
    cbar.set_ticklabels(tick_labels)

def cohen_d(group1, group2):
    """Calculate Cohen's d for measuring effect size between two groups."""
    diff = group1.mean() - group2.mean()
    n1, n2 = len(group1), len(group2)
    var1 = group1.var(ddof=1)  # ddof=1 for sample variance
    var2 = group2.var(ddof=1)
    pooled_var = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)
    d = diff / np.sqrt(pooled_var)
    return d

def perform_randomization_test(df, cond1, cond2, var, n_iterations=1000):
    """Perform a randomization test using Cohen's d as the effect size metric."""
    group1 = df[df['Condition'] == cond1][var]
    group2 = df[df['Condition'] == cond2][var]
    observed_effect_size = cohen_d(group1, group2)
    combined = np.concatenate([group1, group2])
    count_extreme = 0
    # Perform the randomization test
    for i in range(n_iterations):
      if i % 100 == 0:
        np.random.shuffle(combined)
        new_group1 = combined[:len(group1)]
        new_group2 = combined[len(group1):]
        new_effect_size = cohen_d(new_group1, new_group2)
      if abs(new_effect_size) >= abs(observed_effect_size):
          count_extreme += 1

    p_value = (count_extreme + 1) / (n_iterations + 1)
    return p_value

def perform_t_test(df, cond1, cond2, var):
    """
    Perform a t-test directly between the two groups (conditions) for the given variable.

    Args:
        df (pd.DataFrame): The input dataframe in tidy format.
        cond1 (str): The name of the first condition.
        cond2 (str): The name of the second condition.
        var (str): The variable to perform the t-test on.

    Returns:
        float: The p-value from the t-test.
    """
    # Extract the data for the two conditions
    group1 = df[df['Condition'] == cond1][var]
    group2 = df[df['Condition'] == cond2][var]

    t_stat, p_value = stats.ttest_ind(group1, group2, equal_var=False)  # Welch's t-test for unequal variances

    return p_value

def plot_selected_vars(button, df, Conditions, Results_Folder, condition_selector, stat_method_selector):
    plt.clf()  # Clear the current figure before creating a new plot
    print("Plotting in progress...")

    # Get selected variables
    variables_to_plot = [box.description for box in variable_checkboxes if box.value]
    print(f"Variables to plot: {variables_to_plot}")
    n_plots = len(variables_to_plot)

    if n_plots == 0:
        print("No variables selected for plotting")
        return

    # Get selected conditions
    selected_conditions = condition_selector.value
    print(f"Selected conditions: {selected_conditions}")
    selected_conditions = condition_selector.value
    n_selected_conditions = len(selected_conditions)
    if n_selected_conditions == 0:
        print("No conditions selected for plotting, therefore all available conditions are selected by default")
        selected_conditions = df[Conditions].unique().tolist()

    n_selected_conditions = len(selected_conditions)

    # Use only selected and ordered conditions
    filtered_df = df[df[Conditions].isin(selected_conditions)].copy()

    unique_conditions = filtered_df[Conditions].unique().tolist()

    num_comparisons = len(unique_conditions) * (len(unique_conditions) - 1) // 2
    n_iterations = 10000
    method = stat_method_selector.value
    print(f"Selected method: {method}")

    effect_size_matrices = {}
    p_value_matrices = {}
    bonferroni_matrices = {}

    for var in variables_to_plot:
        print(f"Processing variable: {var}")
        effect_size_matrices[var] = pd.DataFrame(0, index=unique_conditions, columns=unique_conditions)
        p_value_matrices[var] = pd.DataFrame(1, index=unique_conditions, columns=unique_conditions)


        for cond1, cond2 in itertools.combinations(unique_conditions, 2):
            group1 = filtered_df[filtered_df[Conditions] == cond1][var]
            group2 = filtered_df[filtered_df[Conditions] == cond2][var]

            effect_size = abs(cohen_d(group1, group2))

            if method == 't-test':
                p_value = perform_t_test(filtered_df, cond1, cond2, var)
            elif method == 'randomization test':
                p_value = perform_randomization_test(filtered_df, cond1, cond2, var, n_iterations=n_iterations)

            # Set and mirror effect sizes and p-values
            effect_size_matrices[var].loc[cond1, cond2] = effect_size_matrices[var].loc[cond2, cond1] = effect_size
            p_value_matrices[var].loc[cond1, cond2] = p_value_matrices[var].loc[cond2, cond1] = p_value


        # Save to CSV
        combined_df = pd.concat([
            effect_size_matrices[var].rename(columns=lambda x: f"{x} (Effect Size)"),
            p_value_matrices[var].rename(columns=lambda x: f"{x} ({method} P-Value)")
        ], axis=1)

        combined_df.to_csv(f"{Results_Folder}/csv/{var}_statistics_combined.csv")
        print(f"Saved statistics to CSV for variable: {var}")

        # Create a new figure
        fig = plt.figure(figsize=(16, 10))
        gs = GridSpec(2, 2, height_ratios=[1, 1])
        ax_box = fig.add_subplot(gs[0, :])

        # Calculate the Interquartile Range (IQR) using the 25th and 75th percentiles
        Q1 = df[var].quantile(0.1)
        Q3 = df[var].quantile(0.9)
        IQR = Q3 - Q1

        # Define bounds for the outliers
        multiplier = 10
        lower_bound = Q1 - multiplier * IQR
        upper_bound = Q3 + multiplier * IQR

        # Plotting
        sns.boxplot(x=Conditions, y=var, data=filtered_df, ax=ax_box, color='lightgray')  # Boxplot
        ax_box.set_ylim([max(min(filtered_df[var]), lower_bound), min(max(filtered_df[var]), upper_bound)])
        ax_box.set_title(f"{var}")
        ax_box.set_xlabel('Condition')
        ax_box.set_ylabel(var)
        tick_labels = ax_box.get_xticklabels()
        tick_locations = ax_box.get_xticks()
        ax_box.xaxis.set_major_locator(FixedLocator(tick_locations))
        ax_box.set_xticklabels(tick_labels, rotation=90)
        ax_box.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Repeat')

        # Statistical Analyses and Heatmaps

        # Effect Size heatmap
        ax_d = fig.add_subplot(gs[1, 0])
        ax_d.set_xticklabels(ax_d.get_xticklabels(), rotation=90)
        sns.heatmap(effect_size_matrices[var].fillna(0), annot=True, cmap="viridis", cbar=True, square=True, ax=ax_d, vmax=1)
        ax_d.set_title(f"Effect Size (Cohen's d)")

        # p-value heatmap using the new function
        ax_p = fig.add_subplot(gs[1, 1])
        plot_heatmap(ax_p, p_value_matrices[var], f"{method} p-value")


        plt.tight_layout()
        pdf_pages = PdfPages(f"{Results_Folder}/pdf/{var}_Boxplots_and_Statistics.pdf")
        pdf_pages.savefig(fig)
        pdf_pages.close()
        print(f"Saved PDF for variable: {var}")
        plt.show()

# Initialize UI elements
selectable_columns = get_selectable_columns(df_to_plot)
variable_checkboxes = display_variable_checkboxes(selectable_columns)
condition_selector = display_condition_selection(df_to_plot, Conditions)
stat_method_selector = widgets.Dropdown(
    options=['randomization test', 't-test'],
    value='randomization test',
    description='Stat Method:',
    style={'description_width': 'initial'}
)

button = Button(description="Plot Selected Variables", layout=Layout(width='400px'), button_style='info')
button.on_click(lambda b: plot_selected_vars(b, df_to_plot, Conditions, base_folder, condition_selector, stat_method_selector))

display(VBox([stat_method_selector, button]))


## **2.2. Export data summaries**
--------

In [None]:
# @title ##Export data summaries

import pandas as pd
import os
from reportlab.lib.pagesizes import letter
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph
from reportlab.lib.styles import getSampleStyleSheet

# Assuming Results_Folder, dataset_df, and Conditions are defined
save_path = f"{Results_Folder}/variables_summary"
df_to_plot = tidy_df  # Assuming dataset_df is the DataFrame to work with

if not os.path.exists(save_path):
    os.makedirs(save_path)

def save_table_as_pdf(data, column_names, filename):
    """
    Saves the table data as a nicely formatted PDF using ReportLab's Table capabilities.

    Parameters:
    - data: List of lists containing the table data (including the header).
    - column_names: List of column names for the table.
    - filename: Path to save the PDF file.
    """
    pdf = SimpleDocTemplate(filename, pagesize=letter)
    elements = []

    # Create a table with the data
    table_data = [column_names] + data

    # Set up the table with style
    table = Table(table_data)
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),  # Header background color
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),  # Header text color
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),  # Center all text
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),  # Header font
        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),  # Padding below header
        ('BACKGROUND', (0, 1), (-1, -1), colors.beige),  # Background color for data
        ('GRID', (0, 0), (-1, -1), 1, colors.black),  # Grid lines
    ]))

    # Append table to elements and build PDF
    elements.append(table)
    pdf.build(elements)

def generate_display_and_save_statistics(df, columns, Conditions, save_path):
    """
    Generates, displays using prettytable, and saves as CSV and PDF the statistical summaries
    for selected columns of the DataFrame, grouped by the specified condition column.

    Parameters:
    - df: DataFrame to analyze.
    - columns: List of column names to generate statistics for.
    - Conditions: Column name to group by.
    - save_path: Directory path where CSV files will be saved.
    """
    # Ensure the save directory exists
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    for var in columns:
        if var in df.columns:
            # Compute descriptive statistics and additional metrics
            grouped_stats = df.groupby(Conditions)[var].describe()

            # Round all values to 3 decimal places
            grouped_stats = grouped_stats.round(3)

            # Save the summary to a CSV file
            csv_filename = f"{var}_summary.csv"
            grouped_stats.to_csv(os.path.join(save_path, csv_filename))
            print(f"Saved statistical summary for {var} to {csv_filename}")

            # Convert DataFrame to list of lists for PDF table
            table_data = [list(row) for row in grouped_stats.itertuples()]
            column_names = ["Condition"] + list(grouped_stats.columns)

            # Save the table as a nicely formatted PDF
            pdf_filename = os.path.join(save_path, f"{var}_summary.pdf")
            save_table_as_pdf(table_data, column_names, pdf_filename)
            print(f"Saved statistical summary for {var} to {pdf_filename}")

# Generate and save statistics as CSV and PDF
generate_display_and_save_statistics(df_to_plot, selectable_columns, Conditions, save_path)
