In [1]:
import numpy as np
import pandas as pd

# To plot
import seaborn as sns
import matplotlib.pyplot as plt

# To create GUI
import gradio as gr

# To integrate a function
from scipy import integrate

# Gamma function
from scipy.special import gamma

# To calculate statistics
from scipy.stats import norm, t, chi2
from scipy.stats import hmean, trim_mean, iqr, median_abs_deviation, skew, kurtosis
from scipy.stats.mstats import gmean, winsorize
from statsmodels.distributions import ECDF

In [2]:
ROUND = 4 # Number of decimals to round the results

df_cache = {
    "df": None,
    "filtered_df": None,
    "stats": None,
    "numeric_cols": [],
    "categorical_cols": [],
    "overrides": {
        "num_to_cat": [],
        "cat_to_num": []
    }
}

---

## Statistics class

In [3]:
class Statistics():
    def __init__(self, data):
        self.data = data    
        self.n = len(data)  

    # --- Descriptive Statistics ---    

    # --- Quantiles ---
    def CalculateQuantiles(self, prob):
        if type(prob) is list:
            self.quantiles = pd.DataFrame({'Value': np.quantile(self.data, prob)}, ['Q{}'.format(p) for p in prob])
        else:
            self.quantiles = pd.DataFrame({'Value': np.quantile(self.data, prob)}, ['Q{}'.format(prob)])

    # --- Quartiles ---
    def CalculateQuartiles(self):
        self.quartiles = pd.DataFrame({'Value': np.quantile(self.data, [0.25,0.5,0.75])},['Q1', 'Q2', 'Q3'])

    # --- Central Tendency ---
    def CalculateCentralTendency(self, trim_param=0.1, winsor_param=[0.1,0.1], weights=None):

        #Mode = mode(data) # To calculate the mode 
        self.mean = self.data.mean()
        self.median = np.median(self.data)
        self.interquartile_mean = trim_mean(self.data, 0.25)
        
        if trim_param is None:
            self.trimmed_mean = np.nan
        else:
            self.trimmed_mean = trim_mean(self.data, trim_param)

        if winsor_param is None:
            self.winsorized_mean = np.nan
        else:
            # Winsorized data
            data_winsorized = winsorize(self.data, winsor_param)
            self.winsorized_mean = data_winsorized.mean()
            
        if np.all(self.data > 0): # If all observations are greater than zero, calculate geometric and harmonic mean
            self.geometric_mean = gmean(self.data)
            self.harmonic_mean = hmean(self.data)
        else:
            self.geometric_mean = np.nan
            self.harmonic_mean = np.nan

        if weights is None:
            self.weighted_mean = np.nan
        else:
            self.weighted_mean = np.average(self.data, weights=weights)

        # Write the statistics in a list
        central_tendency = [
            self.mean,
            self.median,
            self.geometric_mean,
            self.harmonic_mean,
            self.weighted_mean,
            self.trimmed_mean,
            self.interquartile_mean,
            self.winsorized_mean
        ]

        # Return the statistics as a table
        labels = ['Mean', 'Median', 'Geometric Mean', 'Harmonic Mean', 'Weighted Mean', 'Trimmed Mean', 'Interquartile Mean', 'Winsorized Mean']
        self.central_tendency = pd.DataFrame({'Value':central_tendency, 'Robust':[1, 0, 0, 0, 0, 1, 1, 1]}, labels)

    # --- Dispersion ---
    # Auxiliary functions to correct the bias
    def c4(self, n):
        return np.sqrt(2/(n-1)) * gamma(n/2) / gamma((n-1)/2)

    def d2(self, n):
        f = lambda x, n: 1 - (1 - norm.cdf(x))**n - (norm.cdf(x))**n
        return round(integrate.quad(f, -np.inf, np.inf, args=(n,))[0], 3)
    
    def CalculateDispersion(self):
        # Original estimators
        self.S0 = np.std(self.data)             # By default the standard deviation is calculated with zero degrees of freedom
        self.S1 = np.std(self.data, ddof=1)     # Standard deviation with one degree of freedom
        R = self.data.max() - self.data.min()
        IQR = iqr(self.data)                
        MAD = median_abs_deviation(self.data)   
        AAD = abs(self.data - self.data.mean()).mean()

        # Bias correction
        S0_bias_correct = self.S0 * np.sqrt(self.n/(self.n-1)) / self.c4(self.n)
        S1_bias_corrected = self.S1 / self.c4(self.n)
        self.R_bias_corrected = R / self.d2(self.n)
        self.IQR_bias_corrected = IQR / (2 * norm.ppf(0.75))
        self.MAD_bias_corrected = MAD / norm.ppf(0.75)
        self.AAD_bias_corrected = AAD * np.sqrt(np.pi/2)

        # Write the statistics in a list
        sigma_biased = [self.S0, self.S1, R, IQR, MAD, AAD]
        sigma_unbiased = [
            S0_bias_correct,
            S1_bias_corrected,
            self.R_bias_corrected,
            self.IQR_bias_corrected,
            self.MAD_bias_corrected,
            self.AAD_bias_corrected
        ] 

        # Return the statistics as a table
        labels = ['Deviation, ddof=0', 'Deviation, ddof=1', 'Range', 'IQR', 'MAD', 'AAD']
        self.dispersion = pd.DataFrame({'Value':sigma_biased, 'Value_bias_corrected':sigma_unbiased, 'Robust':[0,0,0,1,1,1]}, labels)

    # --- Skew ---
    def CalculateSkewness(self):
        SkewCentralMoments = skew(self.data)
        SkewKStatistics = skew(self.data, bias=False)

        self.skew = pd.DataFrame({'Value':[SkewCentralMoments, SkewKStatistics]}, ['Skew Central Moments', 'Skew K Statistics'])
    
    # --- Kurtosis ---
    def CalculateKurtosis(self):
        KurtosisCentralMoments = kurtosis(self.data, fisher=False)
        KurtosisKStatistics = kurtosis(self.data, fisher=False, bias=False)

        self.kurtosis = pd.DataFrame(
            {'Value':[KurtosisCentralMoments, KurtosisKStatistics], 'Excess Kurtosis':[KurtosisCentralMoments-3, KurtosisKStatistics-3]},
            ['Kurtosis CentralMoments', 'Kurtosis K Statistics']
        )

    def CalculateDescriptiveStatistics(self, trim_param, winsor_param, weights):
        self.CalculateQuartiles()
        self.CalculateCentralTendency(trim_param, winsor_param, weights)
        self.CalculateDispersion()
        self.CalculateSkewness()
        self.CalculateKurtosis()

    # --- Statistical Inference ---

    # --- Confidence Intervals ---
    def CalculateCiMean(self, alpha, hat_mean, hat_sigma, dist):
        # Calculate confidence interval for the mean
        scale = hat_sigma / np.sqrt(self.n)

        if dist=="norm":
            self.ci_mean = norm.ppf(alpha/2, hat_mean, scale), norm.ppf(1-alpha/2, hat_mean, scale)
        if dist=="t":
            # Only if we are using standard deviaiton with one degree of freedom without correction
            self.ci_mean = t.ppf(alpha/2, self.n-1, hat_mean, scale), t.ppf(1-alpha/2, self.n-1, hat_mean, scale)
        
    def CalculateCiMedian(self, alpha, hat_median, hat_sigma):
        # Calculate confidence interval based on the median
        scale = hat_sigma * np.sqrt(np.pi/(2*self.n))
        self.ci_median = norm.ppf(alpha/2, hat_median, scale), norm.ppf(1-alpha/2, hat_median, scale)

    def CalculateCiDeviation(self, alpha):
        # Calculate confidence interval for the standard deviation
        num = self.S1 * np.sqrt(self.n-1)
        den_low = np.sqrt(chi2.ppf(1-alpha/2, self.n-1))
        den_upp = np.sqrt(chi2.ppf(alpha/2, self.n-1))

        self.ci_deviation = num/den_low, num/den_upp

    def CalculateConfidenceInterval(self, alpha, hat_mean, hat_median, hat_sigma, dist):
        self.CalculateCiMean(alpha, hat_mean, hat_sigma, dist)
        self.CalculateCiMedian(alpha, hat_median, hat_sigma)
        self.CalculateCiDeviation(alpha)

        # Return the statistics as a table
        labels = ['Mean', 'Median', 'Deviation']
        self.confidence_intervals = pd.DataFrame(
            [self.ci_mean, self.ci_median, self.ci_deviation],
            index=labels, columns=["Lower", "Upper"]
        )

    # --- Prediction Intervals ---
    def CalculatePiMean(self, alpha, hat_mean, hat_sigma, dist):
        # Calculate prediction interval based on the mean
        scale = np.sqrt(hat_sigma**2 + hat_sigma**2/self.n)

        if dist == "norm":
            self.pi_mean = norm.ppf(alpha/2, hat_mean, scale), norm.ppf(1-alpha/2, hat_mean, scale)
        if dist == "t":
            # Only if we are using standard deviaiton with one degree of freedom without correction
            self.pi_mean = t.ppf(alpha/2, self.n-1, hat_mean, scale), t.ppf(1-alpha/2, self.n-1, hat_mean, scale)
    
    def CalculatePiMedian(self, alpha, hat_median=None, hat_sigma=None):
        scale = np.sqrt(hat_sigma**2 + np.pi*hat_sigma**2/(2*self.n))
        self.pi_median = norm.ppf(alpha/2, hat_median, scale), norm.ppf(1-alpha/2, hat_median, scale)

    def CalculatePiIqr(self, alpha):
        # Calculate prediction interval based on the first and third quartile
        q1, q3 = np.quantile(self.data, [0.25, 0.75])
        iqr = q3-q1
        delta = 0.5 * (norm.ppf(1-alpha/2)/norm.ppf(0.75)-1)

        self.pi_iqr = q1 - delta * iqr, q3 + delta * iqr

    def CalculatePredictionInterval(self, alpha, hat_mean, hat_median, hat_sigma, dist):
        self.CalculatePiMean(alpha, hat_mean, hat_sigma, dist)
        self.CalculatePiMedian(alpha, hat_median, hat_sigma)
        self.CalculatePiIqr(alpha)

        # Return the statistics as a table
        labels = ['Mean', 'Median', 'IQR']
        self.prediction_intervals = pd.DataFrame(
            [self.pi_mean, self.pi_median, self.pi_iqr],
            index=labels, columns=["Lower", "Upper"]
        )

    # --- Relative Likelihood ---
    def RelativeLogLikelihood(self, mu, sigma):
        return self.n * (np.log(self.S0 / sigma) + 0.5 * (1 - (np.mean(self.data**2) - 2 * mu * np.mean(self.data) + mu**2) / sigma**2))

    def RelativeLikelihood(self, mu, sigma):
        return np.exp(self.RelativeLogLikelihood(mu, sigma))

    # --- Graphical Analysis ---
    
    # --- Plot Histogram ---
    def PlotHistogram(self, kde, show_data, histo_add_ci, histo_choose_ci, histo_add_pi, histo_choose_pi, add_normal, hat_mu, hat_sigma):
        
        if histo_add_ci or histo_add_pi:
            fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)

            # Create histogram
            sns.histplot(self.data, kde=kde, stat="density", color="rebeccapurple", alpha=0.5, ax=ax1)

            if histo_add_ci:
                if histo_choose_ci == "Mean":
                    ax2.hlines(0.2, self.ci_mean[0], self.ci_mean[1], color='k')
                    ax2.scatter((self.ci_mean[0] + self.ci_mean[1])/2, 0.2, marker="o", color="k")
                    ax2.text(self.ci_mean[1], 0.2, " Mean", va="center",
                             bbox=dict(boxstyle='square', facecolor='lightgray', edgecolor='black'))
                elif  histo_choose_ci == "Median":
                    ax2.hlines(0.3, self.ci_median[0], self.ci_median[1], color='k')
                    ax2.scatter((self.ci_median[0] + self.ci_median[1])/2, 0.3, marker="o", color="k")
                    ax2.text(self.ci_median[1], 0.3, " Median", va="center",
                             bbox=dict(boxstyle='square', facecolor='lightgray', edgecolor='black'))
                elif  histo_choose_ci == "Both":
                    ax2.hlines(0.2, self.ci_mean[0], self.ci_mean[1], color='k')
                    ax2.scatter((self.ci_mean[0] + self.ci_mean[1])/2, 0.2, marker="o", color="k")
                    ax2.text(self.ci_mean[1], 0.2, " Mean", va="center",
                             bbox=dict(boxstyle='square', facecolor='lightgray', edgecolor='black'))
                    ax2.hlines(0.3, self.ci_median[0], self.ci_median[1], color='k')
                    ax2.scatter((self.ci_median[0] + self.ci_median[1])/2, 0.3, marker="o", color="k")
                    ax2.text(self.ci_median[1], 0.3, " Median", va="center",
                             bbox=dict(boxstyle='square', facecolor='lightgray', edgecolor='black'))
            if histo_add_pi:
                if histo_choose_pi == "Mean": # If a prediction interval is given, add it to the plot
                    ax2.hlines(0.1, self.pi_mean[0], self.pi_mean[1], color='k')
                    ax2.scatter((self.pi_mean[0] + self.pi_mean[1])/2, 0.1, marker="o", color="k")
                    ax2.text(self.pi_mean[1], 0.1, " PI", va="center",
                             bbox=dict(boxstyle='square', facecolor='lightgray', edgecolor='black'))
                elif histo_choose_pi == "Median":
                    ax2.hlines(0.1, self.pi_median[0], self.pi_median[1], color='k')
                    ax2.scatter((self.pi_median[0] + self.pi_median[1])/2, 0.1, marker="o", color="k")
                    ax2.text(self.pi_median[1], 0.1, " PI", va="center",
                             bbox=dict(boxstyle='square', facecolor='lightgray', edgecolor='black'))
                elif histo_choose_pi == "IQR":
                    ax2.hlines(0.1, self.pi_iqr[0], self.pi_iqr[1], color='k')
                    ax2.scatter((self.pi_iqr[0] + self.pi_iqr[1])/2, 0.1, marker="o", color="k")
                    ax2.text(self.pi_iqr[1], 0.1, " PI", va="center",
                             bbox=dict(boxstyle='square', facecolor='lightgray', edgecolor='black'))
            
            ax2.spines[['left', 'right', 'top']].set_visible(False)
            ax2.set_yticks([])

            if histo_add_pi:
                y_lower = 0
                if histo_add_ci:
                    if histo_choose_ci in ["Median", "Both"]:
                        y_upper = 0.4
                    elif histo_choose_ci == "Mean":
                        y_upper = 0.3
                else:
                    y_upper = 0.2
            else:
                if histo_choose_ci == "Mean":
                    y_lower, y_upper = 0.1, 0.3
                elif histo_choose_ci == "Median":
                    y_lower, y_upper = 0.2, 0.4
                elif histo_choose_ci == "Both":
                    y_lower, y_upper = 0.1, 0.4

            ax2.set_ylim(y_lower, y_upper)
        else:
            fig, ax1 = plt.subplots(1, 1)

            # Create histogram
            sns.histplot(self.data, kde=kde, stat="density", color="rebeccapurple", alpha=0.5, ax=ax1)

        if add_normal:
            y_vect = np.linspace(hat_mu - 3*hat_sigma, hat_mu + 3*hat_sigma, 100)
            ax1.plot(y_vect, norm.pdf(y_vect, hat_mu, hat_sigma), color="k", ls="--",  label="Normal density")
            ax1.legend()

        if show_data: # Show the observations
            _, upper = ax1.get_ylim()
            sns.rugplot(self.data, height=0.1*upper, ax=ax1,  color='k')

        ax1.spines[['right', 'top']].set_visible(False)
        return fig
    
    # --- Plot ECDF ---
    def PlotEcdf(self, alpha, confidence, add_normal, hat_mu, hat_sigma):
        ecdf = ECDF(self.data)

        fig, ax = plt.subplots(1, 1)

        # In the second subfigure we show the ECDF
        ax.spines[['right', 'top']].set_visible(False) # Do not plot the right and top margins

        # Plot the ECDF
        ax.scatter(ecdf.x, ecdf.y, color='rebeccapurple')        
        ax.step(ecdf.x, ecdf.y, where='post', color='rebeccapurple', label="ECDF")

        if confidence:
            epsilon = np.sqrt(np.log(2/alpha)/(2*self.n))
            upper = ecdf.y + epsilon
            lower = ecdf.y - epsilon

            upper[upper>1] = 1
            lower[lower<0] = 0

            ax.fill_between(ecdf.x, lower, upper, step='post', color='rebeccapurple', alpha=0.5)

        if add_normal:
            y_vect = np.linspace(hat_mu - 3*hat_sigma, hat_mu + 3*hat_sigma, 100)
            ax.plot(y_vect, norm.cdf(y_vect, hat_mu, hat_sigma), color="k", ls="--", label="Normal CDF")
            ax.set_xlim(min(self.data.min(), y_vect.min())-0.05, max(self.data.max(), y_vect.max())+0.05)
        else:
            ax.set_xlim(self.data.min()-0.05, self.data.max()+0.05)

        # Add title and labels to the plot
        ax.set_title("Empirical Cumulative Distribution Function", fontsize=12)
        ax.set_xlabel(r'$x$')
        ax.set_ylabel(r'$F_n(x)$')
        ax.set_ylim(0, 1.05)
        ax.legend()

        return fig

    # --- Plot Confidence Regions ---
    def PlotConfidenceRegions(self, probs, eps, add):
        probs = probs[::-1] # We need to write the probabilities in a dicreasing order
        
        levels = np.exp(-0.5 * chi2.ppf(probs, 2))

        mu_vect = np.linspace(self.ci_mean[0] - eps[0], self.ci_mean[1] + eps[0], 100)
        sigma_vect = np.linspace(self.ci_deviation[0] - eps[1], self.ci_deviation[1] + eps[1], 100)

        mu_grid, sigma_grid = np.meshgrid(mu_vect, sigma_vect)

        fig, ax = plt.subplots(1, 1)

        cnt = ax.contour(mu_grid, sigma_grid, self.RelativeLikelihood(mu_grid, sigma_grid), levels)
        ax.scatter(x=self.mean, y=self.S0, color='k')

        if add:
            ax.plot(
                [self.ci_mean[0], self.ci_mean[0], self.ci_mean[1], self.ci_mean[1], self.ci_mean[0]],
                [self.ci_deviation[0], self.ci_deviation[1], self.ci_deviation[1], self.ci_deviation[0], self.ci_deviation[0]],
                color='r', ls='--')

        ax.set_title(r"Confidence regions for $\mu$ and $\sigma$")
        ax.set_xlabel(r"$\mu$")
        ax.set_ylabel(r"$\sigma$")

        _, labels = cnt.legend_elements()
        ax.legend(_, probs, loc="upper right", frameon=False)

        ax.spines[['right', 'top']].set_visible(False)

        return fig

---

## Auxiliary functions to control logic of GUI

In [4]:
def load_numeric_cols():
    numeric_cols = df_cache.get("numeric_cols", [])
    selected = numeric_cols[0] if numeric_cols else None
    return gr.update(choices=numeric_cols, value=selected)

In [5]:
def blank_plot():
    fig, ax = plt.subplots()
    ax.axis('off')
    return fig

In [6]:
def prepare_data(column):
    # --- Read data and validate ---
    original_df = df_cache.get("df")
    filtered_df = df_cache.get("filtered_df")

    if original_df is None:
        return None, None, None, pd.DataFrame([["Please upload a valid CSV."]], columns=["Error"]), blank_plot()

    # --- Use filtered data if it differs from original ---
    df = filtered_df if filtered_df is not None and not filtered_df.equals(original_df) else original_df

    # --- Select numeric column ---
    if column not in df.columns:
        return None, None, None, pd.DataFrame([["Selected column is not in the dataframe."]], columns=["Error"]), blank_plot()

    data = df[column].dropna()

    # --- Initialize or reuse Statistics object ---
    stats = df_cache.get("stats")
    if stats is None or not np.array_equal(stats.data, data.to_numpy()):
        stats = Statistics(data)
        df_cache["stats"] = stats

    return df, data, stats, None, None  # df, data, stats, error_df, error_plot


In [7]:
def add_normal_warning(check):
    if check:
        gr.Warning("If you haven't done it yet, run first a descriptive analysis for central tendency and dispersion.")

In [8]:
def toggle_add_normal(check, sel_mu, sel_sigma):
    if check:
        if sel_mu == "Other":
            if sel_sigma == "Other":
                return [
                    gr.update(visible=True), # hat_mu
                    gr.update(visible=True), # hat_mu_text
                    gr.update(visible=True), # hat_sigma
                    gr.update(visible=True)  # hat_sigma_text
                ]
            else:
                return (
                    gr.update(visible=True),
                    gr.update(visible=True),
                    gr.update(visible=True),
                    gr.update(visible=False)
                )
        else:
            if sel_sigma == "Other":
                return (
                    gr.update(visible=True), 
                    gr.update(visible=False), 
                    gr.update(visible=True), 
                    gr.update(visible=True)
                )
            else:
                return (
                    gr.update(visible=True),
                    gr.update(visible=False),
                    gr.update(visible=True),
                    gr.update(visible=False)
                )
    else:
        return (
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False)
        )

In [9]:
def parse_text(input_str):   
        return float(input_str) if input_str.strip() else None

## Logic control of Data Tab

In [10]:
def blank_plot():
    fig, ax = plt.subplots()
    ax.axis('off')
    return fig

In [11]:
def get_effective_column_types(df):
    all_numeric = df.select_dtypes(include=[np.number]).columns.tolist()
    all_categorical = df.select_dtypes(exclude=[np.number]).columns.tolist()
    overrides = df_cache.get("overrides", {"num_to_cat": [], "cat_to_num": []})

    numeric = [col for col in all_numeric if col not in overrides["num_to_cat"]]
    categorical = [col for col in all_categorical if col not in overrides["cat_to_num"]]

    numeric += [col for col in overrides["cat_to_num"] if col in df.columns]
    categorical += [col for col in overrides["num_to_cat"] if col in df.columns]

    return sorted(set(numeric)), sorted(set(categorical))

In [12]:
def load_csv(file):
    try:
        df = pd.read_csv(file.name)

        if df.empty:
            raise ValueError("The uploaded CSV file is empty.")

        df_cache["df"] = df
        df_cache["filtered_df"] = df
        df_cache["stats"] = None

        numeric_cols, categorical_cols = get_effective_column_types(df)

        df_cache["numeric_cols"] = numeric_cols
        df_cache["categorical_cols"] = categorical_cols

        return (
            gr.update(choices=categorical_cols, value=[]),            # cat_col_dropdown
            gr.update(choices=numeric_cols, value=None),              # num_override_dropdown
            gr.update(choices=categorical_cols, value=None),          # cat_override_dropdown
            gr.update(choices=[], value=[], visible=False),           # cat_val_multiselect_1
            gr.update(choices=[], value=[], visible=False),           # cat_val_multiselect_2
            gr.update(choices=[], value=[], visible=False),           # cat_val_multiselect_3
            "CSV loaded successfully."                                # status_output (Textbox!)
        )
    except Exception as e:
        return tuple([gr.update(choices=[], value=None)] * 7 + [f"Error: {e}"])

In [13]:
def reclassify_as_categorical(column):
    numeric_cols = df_cache.get("numeric_cols", [])
    categorical_cols = df_cache.get("categorical_cols", [])

    if column and column in numeric_cols:
        numeric_cols.remove(column)
        categorical_cols.append(column)
        df_cache["numeric_cols"] = numeric_cols
        df_cache["categorical_cols"] = categorical_cols

        return (
            gr.update(choices=categorical_cols),                      # cat_col_dropdown
            gr.update(choices=numeric_cols),                          # num_override_dropdown
            gr.update(choices=categorical_cols),                      # cat_override_dropdown
            f"Column '{column}' reclassified as categorical."         # status
        )
    else:
        return (
            gr.update(), gr.update(), gr.update(), gr.update(),
            f"Column '{column}' is not currently classified as numeric."
        )


In [14]:
def reclassify_as_numeric(column):
    numeric_cols = df_cache.get("numeric_cols", [])
    categorical_cols = df_cache.get("categorical_cols", [])

    if column and column in categorical_cols:
        categorical_cols.remove(column)
        numeric_cols.append(column)
        df_cache["categorical_cols"] = categorical_cols
        df_cache["numeric_cols"] = numeric_cols

        return (
            gr.update(choices=categorical_cols),                      # cat_col_dropdown
            gr.update(choices=numeric_cols),                          # num_override_dropdown
            gr.update(choices=categorical_cols),                      # cat_override_dropdown
            f"Column '{column}' reclassified as numeric."             # status
        )
    else:
        return (
            gr.update(), gr.update(), gr.update(), gr.update(),
            f"Column '{column}' is not currently classified as categorical."
        )


In [15]:
# Only 3 category filters are supported.
def update_category_filters(selected_columns):
    df = df_cache.get("df")

    if df is None or not selected_columns:
        # Hide all category selectors if nothing is selected
        return [gr.update(visible=False, choices=[], value=[]) for _ in range(3)]

    updates = []
    for i in range(3):
        if i < len(selected_columns):
            col = selected_columns[i]
            if col in df.columns:
                values = sorted(df[col].dropna().unique().tolist())
                updates.append(gr.update(visible=True, choices=values, value=[]))
            else:
                updates.append(gr.update(visible=False, choices=[], value=[]))
        else:
            updates.append(gr.update(visible=False, choices=[], value=[]))

    return updates

In [16]:
def apply_filters(cat_cols, val1, val2, val3):
    df = df_cache.get("df")
    if df is None:
        return "❌ No data loaded."

    filtered_df = df.copy()
    category_filters = [val1, val2, val3]

    if not cat_cols or all(not vals for vals in category_filters):
        # No filters applied
        df_cache["filtered_df"] = df
        return "⚠️ No filters selected. Using full dataset."

    for i, col in enumerate(cat_cols[:3]):
        selected_vals = category_filters[i]
        if selected_vals:
            filtered_df = filtered_df[filtered_df[col].isin(selected_vals)]

    df_cache["filtered_df"] = filtered_df
    return f"✅ Filter applied. Rows remaining: {len(filtered_df)}"

In [17]:
def toggle_preview(check):
    df = df_cache.get("df")
    if check:
        return df.head(5), gr.update(visible=True)
    else:
        return pd.DataFrame(), gr.update(visible=False) # csv_preview, csv_preview
    
def max_categorical_warning(check):
    if check:
        gr.Info("The maximum number of categorical columns for filter is 3.")

## GUI of Data Tab

In [18]:
def build_data_tab():
    with gr.TabItem("🗄️ Data"):

        with gr.Group():
            gr.Markdown("# 📁 File Explorer")
            
            with gr.Row():
                file_input = gr.File(file_types=[".csv"], label="Upload CSV")
                status_output = gr.Textbox(label="Status", interactive=False)
                preview_checkbox = gr.Checkbox(label="Show CSV Preview", value=False)
                
            csv_preview = gr.Dataframe(label="CSV Preview", visible=False)

        with gr.Group():
            gr.Markdown("# 🛠️ Fix Variable Type")
            with gr.Accordion(open=False):
                with gr.Row():
                    # Reclassify numeric ➝ categorical
                    num_override_dropdown = gr.Dropdown(label="Reclassify Numeric Column as Categorical")
                    fix_to_categorical_button = gr.Button("Reclassify as Categorical")

                    # Reclassify categorical ➝ numeric
                    cat_override_dropdown = gr.Dropdown(label="Reclassify Categorical Column as Numeric")
                    fix_to_numeric_button = gr.Button("Reclassify as Numeric")

                    fix_dtype_status = gr.Textbox(label="Status", interactive=False)        

        with gr.Group():
            gr.Markdown("# ➖ Filter Data") 
            with gr.Accordion(open=False):       
                with gr.Row():
                    cat_col_dropdown = gr.Dropdown(label="Select Categorical Columns for Filter", multiselect=True, max_choices=3)
                    cat_val_multiselect_1 = gr.Dropdown(label="Categories for Filter 1", multiselect=True, visible=False, interactive=True)
                    cat_val_multiselect_2 = gr.Dropdown(label="Categories for Filter 2", multiselect=True, visible=False, interactive=True)
                    cat_val_multiselect_3 = gr.Dropdown(label="Categories for Filter 3", multiselect=True, visible=False, interactive=True)
        
                with gr.Row():
                    apply_filter_button = gr.Button("🚀 Apply Filter")
                    filter_status = gr.Textbox(label="Filter Status", interactive=False)

    # --- Modify behavior of components of the GUI ---
    file_input.change(
        fn=load_csv,
        inputs=[file_input],
        outputs=[
            cat_col_dropdown,
            num_override_dropdown,
            cat_override_dropdown,
            cat_val_multiselect_1,
            cat_val_multiselect_2,
            cat_val_multiselect_3,
            status_output
        ]
    )

    preview_checkbox.change(
        fn=toggle_preview,
        inputs=preview_checkbox,
        outputs=[csv_preview, csv_preview]
    )

    fix_to_categorical_button.click(
        fn=reclassify_as_categorical,
        inputs=[num_override_dropdown],
        outputs=[
            cat_col_dropdown,
            num_override_dropdown,
            cat_override_dropdown,
            fix_dtype_status
        ]
    )

    fix_to_numeric_button.click(
        fn=reclassify_as_numeric,
        inputs=[cat_override_dropdown],
        outputs=[
            cat_col_dropdown,
            num_override_dropdown,
            cat_override_dropdown,
            fix_dtype_status
        ]
    )

    cat_col_dropdown.change(
        fn=max_categorical_warning,
        inputs=cat_col_dropdown,
        outputs=[]
    )

    cat_col_dropdown.change(
        fn=update_category_filters,
        inputs=cat_col_dropdown,
        outputs=[cat_val_multiselect_1, cat_val_multiselect_2, cat_val_multiselect_3]
    )

    apply_filter_button.click(
        fn=apply_filters,
        inputs=[
            cat_col_dropdown,
            cat_val_multiselect_1,
            cat_val_multiselect_2,
            cat_val_multiselect_3
        ],
        outputs=[filter_status]
    )

## Logic control of Graphical Tab

In [19]:
def histo_add_ci_warning(check):
    if check:
        gr.Warning("If you haven't done it yet, run first a statistical inference for confidence interval.")

def histo_add_pi_warning(check):
    if check:
        gr.Warning("If you haven't done it yet, run first a statistical inference for prediction interval.")

In [20]:
def toggle_graph_stat(graph_stat, histo_add_normal, histo_add_ci, histo_add_pi, ecdf_add_normal):
    if graph_stat == "Histogram":
        return (
            gr.update(visible=True), # histo_add_kde
            gr.update(visible=True), # histo_add_data
            gr.update(visible=True, value=histo_add_normal), # histo_add_normal
            gr.update(visible=True, value=histo_add_ci), # histo_add_ci
            gr.update(visible=True, value=histo_add_pi), # histo_add_pi
            gr.update(visible=False, value=False), # ecdf_add_normal
            gr.update(visible=False), # ecdf_add_conf
            gr.update(visible=False), # ecdf_alpha
        )
    elif graph_stat == "Empirical Cumulative Distribution Function (ECDF)":
        return ( 
            gr.update(visible=False), 
            gr.update(visible=False),
            gr.update(visible=False, value=False),
            gr.update(visible=False, value=False),
            gr.update(visible=False, value=False),
            gr.update(visible=True, value=ecdf_add_normal),
            gr.update(visible=True),
            gr.update(visible=True),
        )

In [21]:
def run_graph_stat(
        column,
        graph_stat,
        histo_add_kde_check, histo_add_data_check,
        histo_add_ci, histo_choose_ci,
        histo_add_pi, histo_choose_pi,
        histo_add_normal,
        histo_hat_mu, histo_hat_mu_text,
        histo_hat_sigma, histo_hat_sigma_text,
        ecdf_add_conf,
        ecdf_alpha,
        ecdf_add_normal,
        ecdf_hat_mu, ecdf_hat_mu_text,
        ecdf_hat_sigma, ecdf_hat_sigma_text,
        ):
    
    df, data, stats, error_df, error_plot = prepare_data(column)
    if error_df:
        return error_df, error_plot

    # --- Graphical Analysis ---
    if graph_stat == "Histogram":
        hat_mu, hat_sigma = None, None

        if histo_add_normal:
            # Choose mu
            if histo_hat_mu == "Sample Mean":
                hat_mu = stats.mean
            elif histo_hat_mu == "Sample Median":
                hat_mu = stats.median
            elif histo_hat_mu == "Geometric Mean":
                hat_mu = stats.geometric_mean
            elif histo_hat_mu == "Harmonic Mean":
                hat_mu = stats.harmonic_mean
            elif histo_hat_mu == "Weighted Mean":
                hat_mu = stats.weighted_mean
            elif histo_hat_mu == "Trimmed Mean":
                hat_mu = stats.trimmed_mean
            elif histo_hat_mu == "Interquartile Mean":
                hat_mu = stats.interquartile_mean
            elif histo_hat_mu == "Winsorized Mean":
                hat_mu = stats.winsorized_mean
            elif histo_hat_mu == "Other":
                hat_mu = float(histo_hat_mu_text)

            # Choose sigma
            if histo_hat_sigma == "Deviation (1 ddof)":
                hat_sigma = stats.S1
            elif histo_hat_sigma == "Range (bias corrected)":
                hat_sigma = stats.R_bias_corrected
            elif histo_hat_sigma == "IQR (bias corrected)":
                hat_sigma = stats.IQR_bias_corrected
            elif histo_hat_sigma == "MAD (bias corrected)":
                hat_sigma = stats.MAD_bias_corrected
            elif histo_hat_sigma == "AAD (bias corrected)":
                hat_sigma = stats.AAD_bias_corrected
            elif histo_hat_sigma == "Other":
                hat_sigma = float(histo_hat_sigma_text)

        fig = stats.PlotHistogram(
            histo_add_kde_check,
            histo_add_data_check,
            histo_add_ci, histo_choose_ci,
            histo_add_pi, histo_choose_pi,
            histo_add_normal,
            hat_mu,
            hat_sigma
        )
    elif graph_stat == "Empirical Cumulative Distribution Function (ECDF)":

        alpha = parse_text(ecdf_alpha)
        alpha = 1 -alpha
        if alpha is None or not (0 < alpha < 1):
            return pd.DataFrame([["Invalid alpha value."]], columns=["Error"])

        hat_mu, hat_sigma = None, None

        if ecdf_add_normal:
            # Choose mu
            if ecdf_hat_mu == "Sample Mean":
                hat_mu = stats.mean
            elif ecdf_hat_mu == "Sample Median":
                hat_mu = stats.median
            elif ecdf_hat_mu == "Geometric Mean":
                hat_mu = stats.geometric_mean
            elif ecdf_hat_mu == "Harmonic Mean":
                hat_mu = stats.harmonic_mean
            elif ecdf_hat_mu == "Weighted Mean":
                hat_mu = stats.weighted_mean
            elif ecdf_hat_mu == "Trimmed Mean":
                hat_mu = stats.trimmed_mean
            elif ecdf_hat_mu == "Interquartile Mean":
                hat_mu = stats.interquartile_mean
            elif ecdf_hat_mu == "Winsorized Mean":
                hat_mu = stats.winsorized_mean
            elif ecdf_hat_mu == "Other":
                hat_mu = float(ecdf_hat_mu_text)
            
            # Choose sigma
            if ecdf_hat_sigma == "Deviation (1 ddof)":
                hat_sigma = stats.S1
            elif ecdf_hat_sigma == "Range (bias corrected)":
                hat_sigma = stats.R_bias_corrected
            elif ecdf_hat_sigma == "IQR (bias corrected)":
                hat_sigma = stats.IQR_bias_corrected
            elif ecdf_hat_sigma == "MAD (bias corrected)":
                hat_sigma = stats.MAD_bias_corrected
            elif ecdf_hat_sigma == "AAD (bias corrected)":
                hat_sigma = stats.AAD_bias_corrected
            elif ecdf_hat_sigma == "Other":
                hat_sigma = float(ecdf_hat_sigma_text)

        fig = stats.PlotEcdf(
            alpha,
            ecdf_add_conf,
            ecdf_add_normal,
            hat_mu,
            hat_sigma
        )
        
    # results_block, output_table, output_table, output_plot, output_plot
    return gr.update(visible=True), gr.update(visible=False), pd.DataFrame(), gr.update(visible=True), fig 

## GUI of Graphical Tab

In [22]:
def build_graphical_tab():
    with gr.TabItem("📊 Graphical Analysis"):
        with gr.Group():
            gr.Markdown("# 📊 Graphical Analysis")
            with gr.Row():
                refresh_columns_button = gr.Button("🔄 Refresh Numeric Columns")
                column_dropdown = gr.Dropdown(label="Select Numeric Column", choices=[], interactive=True)

                graph_stat_dropdown = gr.Dropdown(
                    label="Select Graph",
                    choices=[
                        "Histogram",
                        "Empirical Cumulative Distribution Function (ECDF)"
                    ],
                    value="Histogram",
                    interactive=True
                )

                histo_add_kde = gr.Checkbox(label="Add KDE", value=True, visible=True, interactive=True)
                histo_add_data = gr.Checkbox(label="Show data", value=False, visible=True, interactive=True)

                ecdf_add_conf = gr.Checkbox(label="Add CI for the ECDF", value=True, visible=False, interactive=True)
                ecdf_alpha = gr.Textbox(label="Confidence level (e.g. 0.95)", value=0.95, interactive=True, visible=False)

            with gr.Row():
                histo_add_normal = gr.Checkbox(label="Add Normal Density", value=False)

                histo_hat_mu = gr.Dropdown(
                    label="μ",
                    choices=[
                        'Sample Mean',
                        'Sample Median',
                        'Geometric Mean',
                        'Harmonic Mean',
                        'Weighted Mean',
                        'Trimmed Mean',
                        'Interquartile Mean',
                        'Winsorized Mean',
                        "Other"
                    ],
                    value="Sample Mean",
                    interactive=True,
                    visible=False
                )
                histo_hat_mu_text = gr.Textbox(label="Write the value of a consistent estimator", visible=False)

                histo_hat_sigma = gr.Dropdown(
                    label="σ",
                    choices=[
                        "Deviation (1 ddof)",
                        "Range (bias corrected)",
                        "IQR (bias corrected)",
                        "MAD (bias corrected)",
                        "AAD (bias corrected)",
                        "Other"
                    ],
                    value="Deviation (1 ddof)",
                    interactive=True,
                    visible=False
                )
                histo_hat_sigma_text = gr.Textbox(label="Write the value of a consistent estimator", visible=False)

            with gr.Row():
                histo_add_ci = gr.Checkbox(label="Add Confidence Interval", value=False)

                histo_choose_ci = gr.Radio(
                    label="Confidence Interval",
                    choices=["Mean", "Median", "Both"],
                    value="Mean",
                    interactive=True,
                    visible=False
                )

            with gr.Row():                
                histo_add_pi = gr.Checkbox(label="Add Prediction Interval", value=False)
                histo_choose_pi = gr.Radio(
                    label="Prediction Interval",
                    choices=["Mean", "Median", "IQR"],
                    value="Mean",
                    interactive=True,
                    visible=False
                )

            with gr.Row():
                ecdf_add_normal = gr.Checkbox(label="Add Normal CDF", value=False, visible=False)

                ecdf_hat_mu = gr.Dropdown(
                    label="μ",
                    choices=[
                        'Sample Mean',
                        'Sample Median',
                        'Geometric Mean',
                        'Harmonic Mean',
                        'Weighted Mean',
                        'Trimmed Mean',
                        'Interquartile Mean',
                        'Winsorized Mean',
                        "Other"
                    ],
                    value="Sample Mean",
                    interactive=True,
                    visible=False
                )
                ecdf_hat_mu_text = gr.Textbox(label="Write the value of a consistent estimator", visible=False)

                ecdf_hat_sigma = gr.Dropdown(
                    label="σ",
                    choices=[
                        "Deviation (1 ddof)",
                        "Range (bias corrected)",
                        "IQR (bias corrected)",
                        "MAD (bias corrected)",
                        "AAD (bias corrected)",
                        "Other"
                    ],
                    value="Deviation (1 ddof)",
                    interactive=True,
                    visible=False
                )
                ecdf_hat_sigma_text = gr.Textbox(label="Write the value of a consistent estimator", visible=False)

            run_graph_stat_button = gr.Button(value=" 🚀 Run Graphical Analysis")

        # --- Results ---
        with gr.Group(visible=False) as results_block:
            gr.Markdown("# 🎯 Results")
            output_table = gr.Dataframe()
            output_plot = gr.Plot()

    # --- Modify behavior of components of the GUI ---
    refresh_columns_button.click(
        fn=load_numeric_cols,
        inputs=[],
        outputs=[column_dropdown]
    )

    graph_stat_dropdown.change(
        fn=toggle_graph_stat,
        inputs=[graph_stat_dropdown, histo_add_normal, histo_add_ci, histo_add_pi, ecdf_add_normal],
        outputs=[
            histo_add_kde,
            histo_add_data,
            histo_add_normal,
            histo_add_ci,
            histo_add_pi,
            ecdf_add_normal,
            ecdf_add_conf,
            ecdf_alpha]
    )

    histo_add_ci.change(
        fn=lambda check: gr.update(visible=check),
        inputs=[histo_add_ci],
        outputs=[histo_choose_ci]
    )

    histo_add_pi.change(
        fn=lambda check: gr.update(visible=check),
        inputs=[histo_add_pi],
        outputs=[histo_choose_pi]
    )

    histo_add_ci.change(
        fn=histo_add_ci_warning,
        inputs=[histo_add_ci],
        outputs=[]
    )

    histo_add_pi.change(
        fn=histo_add_pi_warning,
        inputs=[histo_add_pi],
        outputs=[]
    )

    histo_add_normal.change(
        fn=toggle_add_normal,
        inputs=[histo_add_normal, histo_hat_mu, histo_hat_sigma],
        outputs=[histo_hat_mu, histo_hat_mu_text, histo_hat_sigma, histo_hat_sigma_text]
    )

    histo_add_normal.change(
        fn=add_normal_warning,
        inputs=histo_add_normal,
        outputs=[]
    )

    histo_hat_mu.change(
        fn=lambda choice: gr.update(visible=(choice == "Other")),
        inputs=histo_hat_mu,
        outputs=histo_hat_mu_text
    )

    histo_hat_sigma.change(
        fn=lambda choice: gr.update(visible=(choice == "Other")),
        inputs=histo_hat_sigma,
        outputs=histo_hat_sigma_text
    )

    ecdf_add_conf.change(
        fn=lambda check: gr.update(visible=check),
        inputs=ecdf_add_conf,
        outputs=ecdf_alpha
    )

    ecdf_add_normal.change(
        fn=toggle_add_normal,
        inputs=[ecdf_add_normal, ecdf_hat_mu, ecdf_hat_sigma],
        outputs=[ecdf_hat_mu, ecdf_hat_mu_text, ecdf_hat_sigma, ecdf_hat_sigma_text]
    )

    ecdf_add_normal.change(
        fn=add_normal_warning,
        inputs=ecdf_add_normal,
        outputs=[]
    )

    ecdf_hat_mu.change(
        fn=lambda choice: gr.update(visible=(choice == "Other")),
        inputs=ecdf_hat_mu,
        outputs=ecdf_hat_mu_text
    )

    ecdf_hat_sigma.change(
        fn=lambda choice: gr.update(visible=(choice == "Other")),
        inputs=ecdf_hat_sigma,
        outputs=ecdf_hat_sigma_text
    )

    # --- Run Analysis Button ---
    run_graph_stat_button.click(
        run_graph_stat,
        inputs=[
            column_dropdown,
            graph_stat_dropdown,
            histo_add_kde, histo_add_data,
            histo_add_ci, histo_choose_ci,
            histo_add_pi, histo_choose_pi,
            histo_add_normal,
            histo_hat_mu, histo_hat_mu_text,
            histo_hat_sigma, histo_hat_sigma_text,
            ecdf_add_conf,
            ecdf_alpha,
            ecdf_add_normal,
            ecdf_hat_mu, ecdf_hat_mu_text,
            ecdf_hat_sigma, ecdf_hat_sigma_text          
        ],
        outputs=[results_block, output_table, output_table, output_plot, output_plot])

## Logic control of Descriptive Tab

In [23]:
def toggle_desc_params(desc_stat):
    if desc_stat == "Quantiles":
        return (
            gr.update(visible=True),  # quantiles input
            gr.update(visible=False), # weights input
            gr.update(visible=False), # trim input
            gr.update(visible=False)  # winsorized input
        )
    elif desc_stat in ["Central Tendency", "All Descriptive Statistics"]:
        return (
            gr.update(visible=False),  
            gr.update(visible=True), 
            gr.update(visible=True), 
            gr.update(visible=True)
        )
    else:
        return (
            gr.update(visible=False),  
            gr.update(visible=False), 
            gr.update(visible=False), 
            gr.update(visible=False)
        )

In [24]:
def parse_weights(input_str, length):
    if not input_str.strip():
        return None
    weights = [float(w.strip()) for w in input_str.split(',') if w.strip()]
    if len(weights) != length:
        raise ValueError(f"Number of weights ({len(weights)}) must match number of observations ({length})")
    return weights

def parse_winsor(input_str):
    if not input_str.strip():
        return None
    winsor_param = [float(w.strip()) for w in input_str.split(',') if w.strip()]
    if len(winsor_param) != 2:
        raise ValueError(f"Length of winsorized input ({len(winsor_param)}) must be two (lower, and upper)")
    return winsor_param

def parse_quantiles(input_str):
    if ',' in input_str:
        q = [float(x.strip()) for x in input_str.split(',') if x.strip()]
    else:
        q = float(input_str)
    return q

In [25]:
def run_desc_stat(
        column,
        descriptive_stat,
        quantiles_input, weights_input, trim_input, winsor_input
        ):
    
    df, data, stats, error_df, error_plot = prepare_data(column)
    if error_df:
        return error_df, error_plot

    # --- Descriptive Analysis ---
    
    if descriptive_stat == "Quantiles":
        q = parse_quantiles(quantiles_input)
        stats.CalculateQuantiles(q)
        df_output = stats.quantiles.round(ROUND).reset_index().rename(columns={"index": "Measure"})

    elif descriptive_stat == "Quartiles":
        stats.CalculateQuartiles()
        df_output = stats.quartiles.round(ROUND).reset_index().rename(columns={"index": "Measure"})

    elif descriptive_stat == "Central Tendency":
        trim_param = parse_text(trim_input)
        winsor_param = parse_winsor(winsor_input)
        weights = parse_weights(weights_input, len(data))

        stats.CalculateCentralTendency(weights=weights, winsor_param=winsor_param, trim_param=trim_param)
        df_output = stats.central_tendency.round(ROUND).reset_index().rename(columns={"index": "Measure"})

    elif descriptive_stat == "Dispersion":
        stats.CalculateDispersion()
        df_output = stats.dispersion.round(ROUND).reset_index().rename(columns={"index": "Measure"})

    elif descriptive_stat == "Skewness":
        stats.CalculateSkewness()
        df_output = stats.skew.round(ROUND).reset_index().rename(columns={"index": "Measure"})

    elif descriptive_stat == "Kurtosis":
        stats.CalculateKurtosis()
        df_output = stats.kurtosis.round(ROUND).reset_index().rename(columns={"index": "Measure"})

    elif descriptive_stat == "All Descriptive Statistics":
        trim_param = parse_text(trim_input)
        winsor_param = parse_winsor(winsor_input)
        weights = parse_weights(weights_input, len(data))

        stats.CalculateDescriptiveStatistics(weights=weights, winsor_param=winsor_param, trim_param=trim_param)

        # Merge all tables with a hierarchical index
        df_combined = pd.concat([
            stats.quartiles,
            stats.central_tendency,
            stats.dispersion,
            stats.skew,
            stats.kurtosis
        ], keys=["Quartiles", "Central Tendency", "Dispersion", "Skewness", "Kurtosis"])

        df_output = df_combined.round(ROUND).reset_index().rename(columns={"level_0": "Statistic Type", "level_1": "Measure"})

    # results_block, output_table, output_table, output_plot, output_plot
    return gr.update(visible=True), gr.update(visible=True), df_output, gr.update(visible=False), None 

## GUI of Descriptive Tab

In [26]:
def build_descriptive_tab():
    with gr.TabItem("🧮 Descriptive Analysis"):
        with gr.Group():
            gr.Markdown("# 🧮 Descriptive Analysis")
            #with gr.Accordion("🔢 Numeric Parameters", open=True):
            with gr.Row():
                refresh_columns_button = gr.Button("🔄 Refresh Numeric Columns")
                column_dropdown = gr.Dropdown(label="Select Numeric Column", choices=[])

                descriptive_stat = gr.Dropdown(
                    label="Select Descriptive Statistic",
                    choices=[
                        "Quantiles",
                        "Quartiles",
                        "Central Tendency",
                        "Dispersion",
                        "Skewness",
                        "Kurtosis",
                        "All Descriptive Statistics"
                    ],
                    value="All Descriptive Statistics",
                    interactive=True
                )
                quantiles_input = gr.Textbox(label="Quantiles (e.g., 0.25, 0.5, 0.75)", value="0.25, 0.5, 0.75", visible=False)

            with gr.Row() as desc_stat_options:
                weights_input = gr.Textbox(label="Weights (comma-separated)", placeholder="e.g., 1, 1, 0.5, 0.8", visible=True)
                trim_input = gr.Textbox(label="Trim percentage (e.g., 0.1)", value=0.1, visible=True)
                winsor_input = gr.Textbox(label="Winsorized percentages (e.g., 0.1, 0.1)", value="0.1, 0.1", visible=True)

            run_desc_stat_button = gr.Button(value=" 🚀 Run Descriptive Analysis")

        # --- Results ---
        with gr.Group(visible=False) as results_block:
            gr.Markdown("# 🎯 Results")
            output_table = gr.Dataframe()
            output_plot = gr.Plot()

    # --- Modify behavior of components of the GUI ---
    refresh_columns_button.click(
        fn=load_numeric_cols,
        inputs=[],
        outputs=[column_dropdown]
    )

    descriptive_stat.change(
        fn=toggle_desc_params,
        inputs=descriptive_stat,
        outputs=[quantiles_input, weights_input, trim_input, winsor_input]
    )

    # --- Run Analysis Button ---
    run_desc_stat_button.click(
        run_desc_stat,
        inputs=[
            column_dropdown,
            descriptive_stat,
            quantiles_input, weights_input, trim_input, winsor_input
        ],
        outputs=[results_block, output_table, output_table, output_plot, output_plot]
    )


## Logic control of Statistical Inference Tab

In [27]:
def stat_inf_warning(check):
    if check:
        gr.Warning("If you haven't done it yet, run first a descriptive analysis for central tendency and dispersion.")

def conf_interval_warning(check):
    if check == "Confidence Regions":
        gr.Warning("If you haven't done it yet, run first a statistical inference for CI.")

In [28]:
def toggle_stat_inf(sel):
    if sel in ["Confidence Interval", "Prediction Interval", "Confidence and Prediction Intervals"]:
        return gr.update(visible=True), gr.update(visible=False)
    elif sel == "Confidence Regions":
        return gr.update(visible=False), gr.update(visible=True)
    else:
        return gr.update(visible=True), gr.update(visible=True)

In [29]:
def parse_probs(input_str):
    if not input_str.strip():
        return None
    probs = [float(w.strip()) for w in input_str.split(',') if w.strip()]
    return probs

def parse_margin(input_str):
    if not input_str.strip():
        return None
    eps = [float(w.strip()) for w in input_str.split(',') if w.strip()]
    if len(eps) != 2:
        raise ValueError(f"Length of margin ({len(eps)}) must be two (μ,σ)")
    return eps

In [30]:
def run_stat_inf(
        column,
        statistical_inf,
        alpha_input,
        probs_input, eps_input,
        like_add_interval,
        mean_select, mean_estimate_text,
        median_select, median_estimate_text,
        sigma_select, sigma_estimate_text
        ):
    
    df, data, stats, error_df, error_plot = prepare_data(column)
    if error_df:
        return error_df, error_plot

    # --- Statistical Inference ---
    
    alpha = parse_text(alpha_input)
    alpha = 1 - alpha
    if alpha is None or not (0 < alpha < 1):
        return pd.DataFrame([["Invalid alpha value."]], columns=["Error"])

    # Choose mean
    if mean_select == "Sample Mean":
        hat_mean = stats.mean
    if mean_select == "Geometric Mean":
        hat_mean = stats.geometric_mean
    if mean_select == "Harmonic Mean":
        hat_mean = stats.harmonic_mean
    if mean_select == "Weighted Mean":
        hat_mean = stats.weighted_mean
    if mean_select == "Trimmed Mean":
        hat_mean = stats.trimmed_mean
    if mean_select == "Interquartile Mean":
        hat_mean = stats.interquartile_mean
    if mean_select == "Winsorized Mean":
        hat_mean = stats.winsorized_mean
    elif mean_select == "Other":
        hat_mean = float(mean_estimate_text)
    
    # Choose median
    if median_select == "Sample Median":
        hat_median = stats.median
    elif  median_select == "Other":
        hat_median = float(median_estimate_text)
    
    # Choose sigma
    if sigma_select == "Deviation (1 ddof)":
        hat_sigma = stats.S1
    elif sigma_select == "Range (bias corrected)":
        hat_sigma = stats.R_bias_corrected
    elif sigma_select == "IQR (bias corrected)":
        hat_sigma = stats.IQR_bias_corrected
    elif sigma_select == "MAD (bias corrected)":
        hat_sigma = stats.MAD_bias_corrected
    elif sigma_select == "AAD (bias corrected)":
        hat_sigma = stats.AAD_bias_corrected
    elif sigma_select == "Other":
        hat_sigma = float(sigma_estimate_text)
        
    if (mean_select == "Sample Mean") and (sigma_select == "Deviation (1 ddof)"):
        dist="t"
    else:
        dist="norm"

    if statistical_inf == "Confidence Interval":
        stats.CalculateConfidenceInterval(alpha, hat_mean, hat_median, hat_sigma, dist)
        df_output = stats.confidence_intervals.round(ROUND).reset_index().rename(columns={"index": "Measure"})
        fig = None
        visible_table = True
        visible_fig = False

    elif statistical_inf == "Prediction Interval":
        stats.CalculatePredictionInterval(alpha, hat_mean, hat_median, hat_sigma, dist)
        df_output = stats.prediction_intervals.round(ROUND).reset_index().rename(columns={"index": "Measure"})
        fig = None
        visible_table = True
        visible_fig = False

    elif statistical_inf == "Confidence and Prediction Intervals":
        stats.CalculateConfidenceInterval(alpha, hat_mean, hat_median, hat_sigma, dist)
        stats.CalculatePredictionInterval(alpha, hat_mean, hat_median, hat_sigma, dist)

        # Merge all tables with a hierarchical index
        df_combined = pd.concat([
            stats.confidence_intervals,
            stats.prediction_intervals
        ], keys=["Confidence", "Prediction"])
    
        df_output = df_combined.round(ROUND).reset_index().rename(columns={"level_0": "Interval Type", "level_1": "Measure"})
        fig = None
        visible_table = True
        visible_fig = False

    elif statistical_inf == "Confidence Regions":
        probs = parse_probs(probs_input)
        eps = parse_margin(eps_input)
        fig = stats.PlotConfidenceRegions(probs, eps, like_add_interval)
        df_output = pd.DataFrame()
        visible_table = False 
        visible_fig = True
    
    elif statistical_inf == "Confidence Interval and Regions":
        probs = parse_probs(probs_input)
        eps = parse_margin(eps_input)
        stats.CalculateConfidenceInterval(alpha, hat_mean, hat_median, hat_sigma, dist)
        df_output = stats.confidence_intervals.round(ROUND).reset_index().rename(columns={"index": "Measure"})
        fig = stats.PlotConfidenceRegions(probs, eps, like_add_interval)
        visible_table = True
        visible_fig = True

    # results_block, output_table, output_table, output_plot, output_plot
    return gr.update(visible=True), gr.update(visible=visible_table), df_output, gr.update(visible=visible_fig), fig

## GUI of Statistical Inference Tab

In [31]:
def build_inference_tab():
    with gr.TabItem("💭 Statistical Inference"):
        with gr.Group():
            gr.Markdown("# 💭 Statistical Inference")

            with gr.Accordion("🧠 Technical Information", open=False):
                gr.Markdown(
                    """
                    - All intervals are calculated assuming the observations are i.i.d. from a Normal distribution.  
                    - If the sample mean and the sample deviation with one degree of freedom are selected as estimators for the mean and standard deviation, 
                    then a *t*-distribution is used to compute the Confidence Interval (CI) for the mean and the Prediction Interval (PI) based on the mean.  
                    - The asymptotic Normal distribution is used for the CI and PI based on the median.
                    """
                )

            with gr.Row():
                refresh_columns_button = gr.Button("🔄 Refresh Numeric Columns")
                column_dropdown = gr.Dropdown(label="Select Numeric Column", choices=[])
                
                stat_inf_dropdown = gr.Dropdown(
                    label="Type of Estimation",
                    choices=[
                        "Confidence Interval",
                        "Prediction Interval",
                        "Confidence and Prediction Intervals",
                        "Confidence Regions",
                        "Confidence Interval and Regions"
                    ],
                    value="Confidence and Prediction Intervals",
                    interactive=True
                )

            with gr.Row(visible=True) as stat_inf_intervals:
                alpha_input = gr.Textbox(label="Confidence level (e.g. 0.95)", value=0.95, interactive=True)
                mean_select = gr.Dropdown(
                    choices=[
                        "Sample Mean",
                        "Geometric Mean",
                        "Harmonic Mean",
                        "Weighted Mean",
                        "Trimmed Mean",
                        "Interquartile Mean",
                        "Winsorized Mean",
                        "Other"
                        ],
                    label="Mean Estimate",
                    value="Sample Mean"
                )

                mean_estimate_text = gr.Textbox(label="Write the value of a consistent estimator", visible=False)

                median_select = gr.Dropdown(
                    choices=["Sample Median", "Other"],
                    label="Median Estimate", value="Sample Median"
                )
                median_estimate_text = gr.Textbox(label="Write the value of a consistent estimator", visible=False)

                sigma_select = gr.Dropdown(
                    choices=[
                        "Deviation (1 ddof)",
                        "Range (bias corrected)",
                        "IQR (bias corrected)",
                        "MAD (bias corrected)",
                        "AAD (bias corrected)",
                        "Other"
                    ],
                    label="Deviation Estimate",
                    value="Deviation (1 ddof)"
                )
                sigma_estimate_text = gr.Textbox(label="Write the value of a consistent estimator", visible=False)

            with gr.Row(visible=False) as stat_inf_regions:
                like_probs = gr.Textbox(label="Confidence levels (from lower to higher)", value="0.1, 0.5, 0.75, 0.89, 0.95", interactive=True, visible=True)
                like_eps = gr.Textbox(label="Extra margin for μ and σ", value="0.1, 0.05", interactive=True, visible=True)
                like_add_interval = gr.Checkbox(label="Add CI for μ and σ", value=True)

            run_stat_inf_button = gr.Button(value=" 🚀 Run Statistical Inference")
        
        # --- Results ---
        with gr.Group(visible=False) as results_block:
            gr.Markdown("# 🎯 Results")
            output_table = gr.Dataframe()
            output_plot = gr.Plot()

    # --- Modify behavior of components of the GUI ---
    refresh_columns_button.click(
        fn=load_numeric_cols,
        inputs=[],
        outputs=[column_dropdown]
    )

    column_dropdown.change(
        fn=stat_inf_warning,
        inputs=[column_dropdown],
        outputs=[]
    )

    stat_inf_dropdown.change(
        fn=conf_interval_warning,
        inputs=[stat_inf_dropdown],
        outputs=[]
    )

    stat_inf_dropdown.change(
        fn=toggle_stat_inf,
        inputs=stat_inf_dropdown,
        outputs=[stat_inf_intervals, stat_inf_regions]
    )

    mean_select.change(
        fn=lambda choice: gr.update(visible=(choice == "Other")),
        inputs=mean_select,
        outputs=mean_estimate_text
    )

    median_select.change(
        fn=lambda choice: gr.update(visible=(choice == "Other")),
        inputs=median_select,
        outputs=median_estimate_text
    )

    sigma_select.change(
        fn=lambda choice: gr.update(visible=(choice == "Other")),
        inputs=sigma_select,
        outputs=sigma_estimate_text
    )

    # --- Run Analysis Button ---
    run_stat_inf_button.click(
        run_stat_inf,
        inputs=[
            column_dropdown,
            stat_inf_dropdown,
            alpha_input,
            like_probs, like_eps,
            like_add_interval,
            mean_select, mean_estimate_text,
            median_select, median_estimate_text,
            sigma_select, sigma_estimate_text
        ],
        outputs=[results_block, output_table, output_table, output_plot, output_plot]
    )

In [32]:
def build_hypothesis_tab():
    with gr.TabItem("🧪 Hypothesis Testing"):
        with gr.Group():
            gr.Markdown("# 🧪 Hypothesis Testing")
            gr.Markdown("# 🚧 On construction")

In [33]:
def build_regression_tab():
    with gr.TabItem("📈 Linear Regression"):
        with gr.Group():
            gr.Markdown("# 📈 Linear Regression")
            gr.Markdown("# 🚧 On construction")

In [None]:
with gr.Blocks() as demo:
    with gr.Tabs():
        build_data_tab()
        build_graphical_tab()
        build_descriptive_tab()
        build_inference_tab()
        build_hypothesis_tab()
        build_regression_tab()

    gr.Markdown("### 🤓 Created by Irving Gómez Méndez, version 2.1.0, June 2025.")

In [35]:
demo.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


