In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import os
import seaborn as sns
from matplotlib.patches import Patch
from matplotlib.lines import Line2D


In [None]:
# Helper functions
def read_and_adjust_sheet(filename, sheet_name):
    total_rows = 35
    rows_to_skip = [1, 2]
    nrows = total_rows - len(rows_to_skip) - 6
    df = pd.read_excel(filename, sheet_name=sheet_name, skiprows=rows_to_skip, nrows=nrows)
    df.index = range(1, len(df) + 1)
    return df

def normalize_to_one(df, columns):
    normalized_df = df.copy()
    for column in columns:
        normalized_df[column] = normalized_df[column].astype(float)

    for index, row in df.iterrows():
        row_sum = row[columns].sum()
        for column in columns:
            normalized_df.at[index, column] = row[column] / row_sum if row_sum != 0 else 0
    return normalized_df

def _sig_bar(ax, x1, x2, y, text, tick=0.01, lw=1.6, pad=0.006):
    """Draw a short significance bar from x1 to x2 at height y with centered label."""
    ax.plot([x1, x1, x2, x2], [y, y+tick, y+tick, y], color='black', lw=lw, solid_capstyle='butt', clip_on=False)
    ax.text((x1+x2)/2, y+tick+pad, text, ha='center', va='bottom')

def _tile_markers_in_box(ax, box_patch, marker, n_cols=7, n_rows=6, size=22):
    """Fill a box (PathPatch) with a regular grid of markers, clipped to the box."""
    path = box_patch.get_path().transformed(box_patch.get_transform())
    verts = path.vertices
    xmin, xmax = verts[:,0].min(), verts[:,0].max()
    ymin, ymax = verts[:,1].min(), verts[:,1].max()

    # small margins so markers don't touch edges
    x_margin = (xmax - xmin) * 0.08
    y_margin = (ymax - ymin) * 0.12
    xs = np.linspace(xmin + x_margin, xmax - x_margin, n_cols)
    ys = np.linspace(ymin + y_margin, ymax - y_margin, n_rows)

    X, Y = np.meshgrid(xs, ys)
    ax.scatter(
        X.ravel(), Y.ravel(),
        marker=marker, s=size,
        facecolor='black', edgecolor='black',
        linewidths=0.5, clip_path=box_patch, clip_on=True, zorder=3
    )

In [None]:
# File paths and data loading
base_path = "/Users/jordanfeldman/Desktop/Research/Subject data"
bmh_files = [
    'data_BMH01_test.xlsx',
    'data_BMH02_test.xlsx',
    'data_BMH21_test.xlsx',
    'data_BMH06_test.xlsx',
    'data_BMH07_test.xlsx',
    'data_BMH08_test.xlsx',
    'data_BMH09_test.xlsx',
    'data_BMH10_test.xlsx',
    'data_BMH13_test.xlsx',
    'data_BMH19_test.xlsx',
    'data_BMH20_test.xlsx',
    'data_BMH17_test.xlsx'
]

# Response variable file
filepath_survey = os.path.join(base_path, "Subjective_Responses.xlsx")

# Define the mappings for categorical data
walking_speed_mapping = {'Slow': 0, 'Medium': 1, 'Fast': 2}
accuracy_mapping = {'Low': 0, 'Medium': 1, 'High': 2}
balance_mapping = {'Low': 0, 'Medium': 1, 'High': 2}

# Columns for predictors and response variables
predictors_columns = ['MeanError (mm)','Mean Width Straights (mm)', 'Straights Width Variability (mm)',
    'Mean Length Straights (mm)', 'Straights Length Variability (mm)', 'Average Speed (m/s)', 'Head angle','Condition']
targets_columns = ['Balance', 'Foot Placement', 'Walking Speed', 'Metabolics']

for test_file in bmh_files:
    combined_data = pd.DataFrame()
    combined_targets = pd.DataFrame()


    for bmh_file in bmh_files:
        data_path = os.path.join(base_path, bmh_file)
        df_data = pd.read_excel(data_path)
        df_data['Walking Speed'] = df_data['Walking Speed'].map(walking_speed_mapping).astype(int)
        df_data['Accuracy'] = df_data['Accuracy'].map(accuracy_mapping).astype(int)
        df_data['Balance'] = df_data['Balance'].map(balance_mapping).astype(int)
        df_data['Condition'] = df_data.apply(lambda row: f"s{row['Walking Speed']:.0f}a{row['Accuracy']:.0f}b{row['Balance']:.0f}", axis=1)
        predictors = df_data[predictors_columns]

        # Read the corresponding response variables
        sheet_name = bmh_file.split('_')[1]
        df_survey = read_and_adjust_sheet(filepath_survey, sheet_name)

        # Normalize the relevant columns in the survey data
        df_survey = normalize_to_one(df_survey, ['Balance', 'Foot Placement', 'Walking Speed', 'Metabolics'])

        # Extract normalized targets/responses from the survey data
        targets = df_survey[targets_columns]
        # Add participant identifier
        predictors['Participant'] = bmh_file.split('_')[1]
        targets['Participant'] = bmh_file.split('_')[1]

        combined_data = pd.concat([combined_data, predictors], ignore_index=True)
        combined_targets = pd.concat([combined_targets, targets], ignore_index=True)

    combined_data = pd.DataFrame(combined_data, columns=combined_data.columns)
    combined_targets = pd.DataFrame(combined_targets, columns=combined_targets.columns)

In [None]:
# Define the plotting function
def plot_grouped_boxplot(data, column_to_plot, grouping_condition, ax=None):
    """
    Boxplot + significance, with conditional styling:
      s: teal/yellow/red
      b: grayscale light→dark
      a: black/white with overlaid patterns (dots, triangles, squares)
    grouping_condition in {'s','b','a'}; 'Condition' contains tokens like a0/a1/a2 etc.
    """
    if not os.path.exists('plots'):
        os.makedirs('plots')

    df = data.copy()
    df['Grouping'] = df['Condition'].str.extract(f'(?<={grouping_condition})(\\d)').astype(int)

    # ---- Units: convert mm → m ----
    name_lower = column_to_plot.lower()
    is_speed = (column_to_plot == 'Average Speed (m/s)')
    is_mm_like = ('(mm' in name_lower) or (' variability (mm' in name_lower) or ('mean error straights' in name_lower)

    if not is_speed:
        if is_mm_like:
            # convert values and adjust the column name for plotting labels
            df[column_to_plot] = df[column_to_plot] / 1000.0
            column_for_label = column_to_plot.replace('(mm)', '(m)')
        else:
            column_for_label = column_to_plot  # already meters
    else:
        column_for_label = column_to_plot

    # Build groups
    groups = [df[df['Grouping'] == i][column_to_plot].dropna().values for i in range(3)]

    # ---- Stats ----
    pair_p = {}
    if all(len(g) for g in groups):
        H, p_omni = stats.kruskal(*groups)
        print(f'Kruskal–Wallis ({column_for_label}): H={H:.3f}, p={p_omni:.4g}')
        if p_omni < 0.05:
            pairs = [(0,1),(0,2),(1,2)]
            m = len(pairs)
            for i,j in pairs:
                U, p = stats.mannwhitneyu(groups[i], groups[j], alternative='two-sided')
                pair_p[(i,j)] = min(p*m, 1.0)
                print(f'  {i} vs {j}: U={U:.2f}, p_adj={pair_p[(i,j)]:.4g}')
    else:
        print('Omnibus skipped: one or more empty groups.')

    # ---- Plot ----
    if ax is None:
        fig, ax = plt.subplots(figsize=(7.0, 5.6))
    bp = ax.boxplot(
        groups,
        labels=[0,1,2],
        patch_artist=True,
        showfliers=False,
        widths=0.55,
        medianprops=dict(color='red', lw=1.6), 
        boxprops=dict(color='black', lw=1.2),
        whiskerprops=dict(color='black', lw=1.2),
        capprops=dict(color='black', lw=1.2)
    )
    means = [np.mean(g) for g in groups if len(g)]
    ax.scatter(
        [i+1 for i, g in enumerate(groups) if len(g)],
        means,
        color='red', marker='^', s=70, zorder=3, label='Mean'
    )


    # ---- Styling by grouping ----
    if grouping_condition == 's':
        colors = ['#F8766D', '#F9C21A', '#00BFC4']  # Slow, Typical, Fast
        for box, c in zip(bp['boxes'], colors):
            box.set_facecolor(c); box.set_edgecolor('black')
        ax.set_xlabel('Speed prompt', fontsize=20)
        ax.set_xticklabels(['Slow','Typical','Fast'])
        ax.set_yticks([0,0.5,1.0,1.5,2.0])
        ax.set_ylim(0,2.0)

    elif grouping_condition == 'b':
        grays = ['#e5e5e5', '#a9a9a9', '#4a4a4a']  # None, Medium, High
        for box, c in zip(bp['boxes'], grays):
            box.set_facecolor(c); box.set_edgecolor('black')
        ax.set_xlabel('Visual perturbation intensity', fontsize=20)
        ax.set_xticklabels(['None','Low','High'])

    elif grouping_condition == 'a':
        # Base white boxes, then overlay circle/triangle/square patterns
        for box in bp['boxes']:
            box.set_facecolor('white'); box.set_edgecolor('black')

        patterns = ['o', '^', 's']  # circles (Ignore), triangles (Near), squares (Accurate)
        labels = ['Ignore','Near','Accurate']

        # Legend outside the axes so it never covers the plot
        legend_handles = [
            Line2D([0],[0], marker='o', linestyle='None', color='black', label='Ignore', markersize=7),
            Line2D([0],[0], marker='^', linestyle='None', color='black', label='Near', markersize=7),
            Line2D([0],[0], marker='s', linestyle='None', color='black', label='Accurate', markersize=7),
        ]
        ax.set_xlabel('Foot placement prompt', fontsize=20)
        ax.set_xticklabels(labels)

    # ---- Y label (consistent with units shown) ----
    if 'straights length variability' in name_lower:
        ax.set_ylabel('Step length variability (m)' if not is_speed else column_for_label)
    elif 'straights width variability' in name_lower:
        ax.set_ylabel('Step width variability (m)')
    elif 'mean error straights' in name_lower:
        ax.set_ylabel('Foot placement error (m)')
    elif is_speed:
        ax.set_ylabel('Walking speed (m/s)')
    else:
        ax.set_ylabel(column_for_label)


    # ---- Significance bars ----
    whisk_tops = [line.get_ydata().max() for idx, line in enumerate(bp['whiskers']) if idx % 2 == 1]
    ymax_data = np.nanmax(whisk_tops) if whisk_tops else max([g.max() for g in groups if len(g)])
    ymin_data = min([g.min() for g in groups if len(g)]) if any(len(g) for g in groups) else 0.0


    ax.set_ylim(bottom=min(0.0, ymin_data * 0.95), top=max(ymax_data * 1.05, ymax_data + 1e-9))
    ylo, yhi = ax.get_ylim()
    span = max(1e-9, yhi - ylo)
    y0 = ymax_data
    y_inc = 0.08 * span  # vertical spacing between brackets

    ordered_pairs = [((0,1),'low'), ((1,2),'mid'), ((0,2),'high')]
    level_map = {'low': 1, 'mid': 2, 'high': 3}
    any_brackets = False
    for (i, j), lvl in ordered_pairs:
        p = pair_p.get((i, j))
        if p is None:
            continue
        if p < 0.005:
            stars = '**'
        elif p < 0.05:
            stars = '*'
        else:
            continue
        x1, x2 = i+1, j+1   # center over boxes
        y = y0 + level_map[lvl] * y_inc
        _sig_bar(ax, x1, x2, y, stars,
                tick=y_inc*0.25, lw=1.6, pad=y_inc*0.12)
        any_brackets = True

    if any_brackets:
        ax.set_ylim(top=y0 + 4.2 * y_inc) 
        # ---- Custom fixed y-axis ranges & ticks ----
    if column_to_plot == 'Average Speed (m/s)':
        ax.set_ylim(0, 2.5)
        ax.set_yticks(np.linspace(0, 2.5, 6))
    elif 'Straights Width Variability' in column_to_plot:
        ax.set_ylim(0, 0.25)
        ax.set_yticks(np.linspace(0, 0.25, 6))
    elif 'Mean Error Straights' in column_to_plot:
        ax.set_ylim(0, 0.36)
        ax.set_yticks(np.linspace(0, 0.4, 6))
    elif 'Straights Length Variability' in column_to_plot:
        ax.set_ylim(0, 0.25)
        ax.set_yticks(np.linspace(0, 0.25, 6))


    ax.spines[['top','right']].set_visible(False)
    ax.tick_params(axis='both', labelsize=11)
    for spine in ['left','bottom']:
        ax.spines[spine].set_linewidth(1.2)

    ax.xaxis.label.set_fontname('Arial')
    ax.xaxis.label.set_fontsize(20)
    ax.yaxis.label.set_fontname('Arial')
    ax.yaxis.label.set_fontsize(20)

    for label in ax.get_xticklabels():
        label.set_fontname('Arial')
        label.set_fontsize(16)
    for label in ax.get_yticklabels():
        label.set_fontname('Arial')
        label.set_fontsize(16)

    if ax is None:
        plt.tight_layout()
        plt.show()


In [None]:
# Create subplots for the four metrics
fig, axes = plt.subplots(1, 4, figsize=(26, 6)) 
panel_tags = ['a', 'b', 'c', 'd']
for i, ax in enumerate(axes):
    ax.text(-0.12, 1.06, panel_tags[i],
            transform=ax.transAxes, ha='left', va='bottom',
            fontsize=30, fontname='Arial', fontweight='bold')
plt.subplots_adjust(top=0.9, wspace=0.25)


plot_grouped_boxplot(combined_data, 'Average Speed (m/s)', 's', ax=axes[0])
plot_grouped_boxplot(combined_data, 'Straights Width Variability (mm)', 'b', ax=axes[1])
plot_grouped_boxplot(combined_data, 'Mean Error Straights', 'a', ax=axes[2])
plot_grouped_boxplot(combined_data, 'Straights Length Variability (mm)', 'a', ax=axes[3])

plt.tight_layout()
#plt.savefig('sub_paper_fig_4.svg', bbox_inches='tight')
plt.show()
