In [None]:
import os
import pandas as pd
import numpy as np

import audeer

import yaml

from evaluate_time_course_args_external import Args
import evaluate_time_course_utils

In [2]:
# Path to the directories
dir_metadata = "../../data/interim-meta"
dir_evaluated = "../../data/evaluated"
dir_out_wind = audeer.mkdir(os.path.join(dir_evaluated, "wind_speed"))

In [3]:
dir_output_root = "/scratch/phecker/project/audiary/projects/2021-safetytech_accelerator-mwas/passive-voyage_data_recorder/data/output"
dir_evaluated = "/scratch/phecker/project/audiary/projects/2021-safetytech_accelerator-mwas/passive-voyage_data_recorder/data/evaluated"
preds_str = "emotion"
flag_concat = "false"
path_active = "/data/share/aisoundlab-mental_wellbeing_at_sea/data_mwas_processed-final_data/final_data-df_files.csv"
path_time_course_events = "/scratch/phecker/project/audiary/projects/2021-safetytech_accelerator-mwas/passive-voyage_data_recorder/src/evaluate_results/evaluate_time_course_events.yaml"

In [4]:
df_passive = evaluate_time_course_utils.concat_all_preds(
    dir_output_root, dir_evaluated, flag_concat, preds_str
)

df_active = evaluate_time_course_utils.load_database_aisl(path_active)

# Open the mapping on when the ship was at land/sea/operation
with open(path_time_course_events, "r") as f:
    event_timings = yaml.safe_load(f)

Loading existing DataFrame from /scratch/phecker/project/audiary/projects/2021-safetytech_accelerator-mwas/passive-voyage_data_recorder/data/evaluated/df_all_predictions_emotion.pkl.
Number of participants before filtering out too few sessions: 27; Shape of the DataFrame before filtering: (3346, 152)
Number of participants after filtering out too few sessions: 25; Shape of the DataFrame after filtering: (3304, 152)
Shape of the DataFrame after filtering prompts: (3204, 152)


In [5]:
df_wind = pd.read_csv(os.path.join(dir_out_wind, "true_wind_speed.csv"))

In [6]:
df_wind_raw = df_wind.copy()

## Number of emotion segments

In [7]:
# Count total number of rows
total_rows = len(df_passive)
print(f"Total number of rows: {total_rows}")

# Calculate segment duration in seconds
durations = (
    df_passive.index.get_level_values("end")
    - df_passive.index.get_level_values("start")
).total_seconds()

# Count segments >= 3 seconds
count_3s = (durations >= 3).sum()
print(f"Number of segments >= 3 seconds: {count_3s}")

Total number of rows: 1473922
Number of segments >= 3 seconds: 764028


In [8]:
# Filter df_passive for segments with duration >= 3 seconds
durations = (
    df_passive.index.get_level_values("end")
    - df_passive.index.get_level_values("start")
).total_seconds()
df_passive_3s = df_passive[durations >= 3]
df_passive_3s_raw = df_passive_3s.copy()

print(f"Number of segments >= 3 seconds: {len(df_passive_3s)}")

Number of segments >= 3 seconds: 764028


# Manually set time filter

In [9]:
df_passive = df_passive_3s.copy()

# Align mismatching time frames

The emotion data is time-stamped for every 3h &rarr: aggregat the wind and emotion data both respectively.

In [10]:
# --- Prepare and align DataFrames
# Define microphone categories
bridge_mics = ["M1", "M2", "M3", "M6"]
radio_mics = ["V4", "V5"]


def mic_to_modality(mic):
    if mic in bridge_mics:
        return "bridge"
    elif mic in radio_mics:
        return "radio"
    else:
        return "other"


# Convert time columns to datetime if not already
df_passive["time"] = pd.to_datetime(df_passive["time"])
df_wind["time"] = pd.to_datetime(df_wind["time"])

# Assign modality column before any resampling
df_passive["modality"] = df_passive["microphone"].apply(mic_to_modality)
df_passive_3s["modality"] = df_passive_3s["microphone"].apply(mic_to_modality)

In [11]:
# Group by both 'microphone' and 'time', then aggregate (mean)
df_passive_agg = (
    df_passive.groupby(["microphone", "time"]).mean(numeric_only=True).reset_index()
)

In [12]:
# Get sorted unique time stamps from df_passive
unique_times = pd.to_datetime(df_passive["time"].unique())
unique_times = np.sort(unique_times)

# Calculate the time intervals between successive time stamps
intervals = np.diff(unique_times)

# Find and output all intervals longer than 3.5 hours
long_gaps = [
    (unique_times[i], unique_times[i + 1], intervals[i])
    for i in range(len(intervals))
    if intervals[i] > pd.Timedelta(hours=3.5)
]

print("Successive time intervals longer than 3.5h:")
for start, end, delta in long_gaps:
    print(f"{start} -> {end}: {delta}")

Successive time intervals longer than 3.5h:
2023-01-13T05:21:49.000000000 -> 2023-01-15T15:33:19.000000000: 209490000000000 nanoseconds
2023-01-17T12:33:01.000000000 -> 2023-01-18T09:36:32.000000000: 75811000000000 nanoseconds
2023-01-20T04:22:31.000000000 -> 2023-01-20T12:23:30.000000000: 28859000000000 nanoseconds
2023-01-22T04:31:29.000000000 -> 2023-01-22T09:21:59.000000000: 17430000000000 nanoseconds
2023-01-23T06:21:58.000000000 -> 2023-01-23T10:05:58.000000000: 13440000000000 nanoseconds
2023-01-25T06:53:28.000000000 -> 2023-01-25T13:40:57.000000000: 24449000000000 nanoseconds
2023-01-29T05:33:55.000000000 -> 2023-01-29T10:04:55.000000000: 16260000000000 nanoseconds
2023-01-30T07:04:54.000000000 -> 2023-01-30T13:58:09.000000000: 24795000000000 nanoseconds
2023-02-01T08:01:23.000000000 -> 2023-02-01T15:10:37.000000000: 25754000000000 nanoseconds
2023-02-03T10:00:12.000000000 -> 2023-02-03T15:11:27.000000000: 18675000000000 nanoseconds


In [13]:
# Align df_wind to df_passive blocks, using the next time stamp as window end if interval <= 3.5h, else use 3h window

# Get sorted unique time stamps from df_passive
unique_times = np.sort(df_passive["time"].unique())

# Set the column containing the wind speed calculation based on GPS coordinates
wind_col = "true_wind_speed_gps"
wind_agg_list = []

for i, t_start in enumerate(unique_times):
    t_start = pd.Timestamp(t_start)
    # Determine window end
    if i < len(unique_times) - 1:
        t_next = pd.Timestamp(unique_times[i + 1])
        interval = t_next - t_start
        if interval <= pd.Timedelta(hours=3.5):
            t_end = t_next
        else:
            t_end = t_start + pd.Timedelta(hours=3)
    else:
        # Last interval: use 3h window
        t_end = t_start + pd.Timedelta(hours=3)
        print(f"Last interval: {t_start} -> {t_end}, using 3h window")
    mask = (df_wind["time"] >= t_start) & (df_wind["time"] < t_end)
    wind_block = df_wind.loc[mask, wind_col]
    wind_mean = wind_block.mean() if not wind_block.empty else np.nan
    wind_agg_list.append(
        {"time": t_start, f"{wind_col}_mean_block": wind_mean, "window_end": t_end}
    )

df_wind_agg = pd.DataFrame(wind_agg_list)

# Merge with df_passive_agg (align by both time and microphone if needed)
df_merged = pd.merge(
    df_passive_agg,
    df_wind_agg[["time", f"{wind_col}_mean_block"]],
    on="time",
    how="left",
)

print(df_wind_agg.head())

Last interval: 2023-02-06 04:59:54 -> 2023-02-06 07:59:54, using 3h window
                 time  true_wind_speed_gps_mean_block          window_end
0 2022-12-25 23:56:48                       15.531865 2022-12-26 02:56:48
1 2022-12-26 02:56:48                       14.460068 2022-12-26 05:56:48
2 2022-12-26 05:56:48                        8.357641 2022-12-26 08:56:48
3 2022-12-26 08:56:48                        7.778203 2022-12-26 11:56:48
4 2022-12-26 11:56:48                       12.121520 2022-12-26 14:56:48


In [14]:
# Check for large gaps in df_wind and ensure no forward-filling occurs in wind aggregation

# Find large gaps in df_wind
df_wind_sorted = df_wind.sort_values("time")
wind_times = pd.to_datetime(df_wind_sorted["time"])
wind_intervals = wind_times.diff().dropna()
gap_threshold = pd.Timedelta(days=1)
large_gaps = wind_intervals[wind_intervals > gap_threshold]

print("Large gaps in df_wind (greater than 1 day):")
for idx in large_gaps.index:
    print(
        f"{wind_times.loc[idx-1]} -> {wind_times.loc[idx]}: {wind_intervals.loc[idx]}"
    )

# Check current aggregation behavior
# The aggregation code uses .mean() on the wind values in each window, so if there are no wind values in a window,
# wind_mean will be np.nan (not forward-filled).

# 3. Drop all df_passive time windows where the corresponding wind mean is NaN
df_merged_clean = df_merged.dropna(subset=[f"{wind_col}_mean_block"])

print(
    f"Number of windows dropped due to missing wind data: {len(df_merged) - len(df_merged_clean)}"
)
print(df_merged_clean.head())

Large gaps in df_wind (greater than 1 day):
2023-01-12 23:57:05.200000 -> 2023-01-23 10:05:59.100000: 10 days 10:08:53.900000
Number of windows dropped due to missing wind data: 295
  microphone                time  prediction_arousal  prediction_dominance  \
0         M1 2022-12-25 23:56:48            0.438617              0.440492   
1         M1 2022-12-26 02:56:48            0.418873              0.436246   
2         M1 2022-12-26 05:56:48            0.436397              0.447649   
3         M1 2022-12-26 08:56:48            0.434784              0.439725   
4         M1 2022-12-26 11:56:48            0.425345              0.426833   

   prediction_valence  true_wind_speed_gps_mean_block  
0            0.417524                       15.531865  
1            0.435056                       14.460068  
2            0.438311                        8.357641  
3            0.416567                        7.778203  
4            0.417296                       12.121520  


In [15]:
df_merged_clean

Unnamed: 0,microphone,time,prediction_arousal,prediction_dominance,prediction_valence,true_wind_speed_gps_mean_block
0,M1,2022-12-25 23:56:48,0.438617,0.440492,0.417524,15.531865
1,M1,2022-12-26 02:56:48,0.418873,0.436246,0.435056,14.460068
2,M1,2022-12-26 05:56:48,0.436397,0.447649,0.438311,8.357641
3,M1,2022-12-26 08:56:48,0.434784,0.439725,0.416567,7.778203
4,M1,2022-12-26 11:56:48,0.425345,0.426833,0.417296,12.121520
...,...,...,...,...,...,...
1539,V5,2023-02-05 16:59:55,0.583297,0.563519,0.570522,11.112832
1540,V5,2023-02-05 19:59:54,0.515110,0.523224,0.437897,10.206050
1541,V5,2023-02-05 22:59:54,0.481199,0.457990,0.408968,6.894521
1542,V5,2023-02-06 01:59:54,0.532976,0.528061,0.473926,7.730939


# Correlation analysis

In [16]:
# --- Pre-processing for correlation analysis ---


# Standardise wind speed to [0, 1] range for comparability with emotion predictions
# (they are intrinsically scaled like that too)
def minmax_scale(series):
    return (series - series.min()) / (series.max() - series.min())


# Apply scaling to the wind speed block column in df_merged_clean
df_merged_clean = df_merged_clean.copy()  # Ensure it's a copy, not a view
df_merged_clean.loc[:, "true_wind_speed_gps_mean_block_scaled"] = minmax_scale(
    df_merged_clean["true_wind_speed_gps_mean_block"]
)

# Use the scaled wind column for correlation analysis
wind_col_block_scaled = "true_wind_speed_gps_mean_block_scaled"

print("Wind speed has been min-max scaled to [0, 1] for correlation analysis.")

Wind speed has been min-max scaled to [0, 1] for correlation analysis.


In [17]:
# --- Aggregate and analyze by event windows: land, sea, operation ---


def aggregate_event_slices(df, event_list):
    dfs = []
    for event in event_list:
        start = pd.to_datetime(event["start"])
        end = pd.to_datetime(event["end"])
        # Select rows where 'time' is within the event window
        dfs.append(df[(df["time"] >= start) & (df["time"] <= end)])
    if dfs:
        return pd.concat(dfs)
    else:
        return pd.DataFrame()


# Use df_merged_clean for all further analysis
# Prepare event slices
land_df_all = aggregate_event_slices(
    df_merged_clean, event_timings["land_and_sea"]["land"]
)
sea_df_all = aggregate_event_slices(
    df_merged_clean, event_timings["land_and_sea"]["sea"]
)
operation_events = (
    event_timings["loading_and_discharge"]["loading"]
    + event_timings["loading_and_discharge"]["discharge"]
)
operation_df_all = aggregate_event_slices(df_merged_clean, operation_events)


# If you want to split by microphone type, filter df_merged_clean accordingly
def filter_by_mic(df, mic_list):
    return df[df["microphone"].isin(mic_list)]


bridge_mics = ["M1", "M2", "M3", "M6"]
radio_mics = ["V4", "V5"]

land_df_bridge = filter_by_mic(land_df_all, bridge_mics)
sea_df_bridge = filter_by_mic(sea_df_all, bridge_mics)
operation_df_bridge = filter_by_mic(operation_df_all, bridge_mics)

land_df_radio = filter_by_mic(land_df_all, radio_mics)
sea_df_radio = filter_by_mic(sea_df_all, radio_mics)
operation_df_radio = filter_by_mic(operation_df_all, radio_mics)

In [18]:
from scipy.stats import pearsonr, spearmanr, shapiro
from statsmodels.stats.multitest import multipletests
import numpy as np

emotion_cols = ["prediction_arousal", "prediction_dominance", "prediction_valence"]
# Use scaled wind column for correlation analysis
wind_col_block = wind_col_block_scaled


# --- Helper: Compute Pearson r, p-value, and bootstrap CI ---
def correlation_stats(x, y, n_boot=1000, alpha=0.05):
    mask = (~pd.isna(x)) & (~pd.isna(y))
    x = x[mask]
    y = y[mask]
    n = len(x)
    if n < 3:
        return np.nan, np.nan, (np.nan, np.nan), n
    r, p = pearsonr(x, y)
    rs = []
    for _ in range(n_boot):
        idx = np.random.choice(n, n, replace=True)
        r_boot, _ = pearsonr(x[idx], y[idx])
        rs.append(r_boot)
    lower = np.percentile(rs, 100 * alpha / 2)
    upper = np.percentile(rs, 100 * (1 - alpha / 2))
    return r, p, (lower, upper), n


# --- Helper: Compute Spearman's rho and p-value ---
def spearman_corr_stats(x, y):
    mask = (~pd.isna(x)) & (~pd.isna(y))
    x = x[mask]
    y = y[mask]
    n = len(x)
    if n < 3:
        return np.nan, np.nan, n
    r, p = spearmanr(x, y)
    return r, p, n


# --- Compute correlations for each event type and modality, including FDR correction ---
def event_corr_summary(df, label, emotion_cols, wind_col_block):
    if df.empty:
        print(f"{label}: No data")
        return None
    print(f"\n{label} correlations (n = {len(df)} samples):")
    results = []
    # Normality test for wind and emotion columns
    print(f"Normality test results for {label}:")
    for col in [wind_col_block] + emotion_cols:
        data = df[col].dropna()
        if len(data) > 5000:
            print(
                f"{col}: Too many samples for Shapiro-Wilk (n={len(data)}), consider using another test."
            )
            continue
        stat, pval = shapiro(data)
        print(
            f"{col}: W={stat:.3f}, p={pval:.3g} (n={len(data)}) {'NOT normal' if pval < 0.05 else 'normal'}"
        )
    # Correlation analysis
    pearson_ps = []
    spearman_ps = []
    pearson_rs = []
    pearson_cis = []
    spearman_rs = []
    ns = []
    for col in emotion_cols:
        # Pearson
        r, p, ci, n = correlation_stats(df[col].values, df[wind_col_block].values)
        # Spearman
        r_s, p_s, n_s = spearman_corr_stats(df[col].values, df[wind_col_block].values)
        pearson_ps.append(p)
        spearman_ps.append(p_s)
        pearson_rs.append(r)
        pearson_cis.append(ci)
        spearman_rs.append(r_s)
        ns.append(n)
        results.append(
            {
                "metric": col,
                "pearson_r": r,
                "pearson_p": p,
                "pearson_ci_lower": ci[0],
                "pearson_ci_upper": ci[1],
                "spearman_rho": r_s,
                "spearman_p": p_s,
                "n": n,
            }
        )
    # FDR correction
    _, pearson_p_fdr, _, _ = multipletests(pearson_ps, alpha=0.05, method="fdr_bh")
    _, spearman_p_fdr, _, _ = multipletests(spearman_ps, alpha=0.05, method="fdr_bh")
    # Print results with FDR-corrected p-values
    for i, col in enumerate(emotion_cols):
        print(
            f"{col}: Pearson r = {pearson_rs[i]:.3f}, p = {pearson_ps[i]:.2e}, p_FDR = {pearson_p_fdr[i]:.2e}, "
            "95% CI = [{pearson_cis[i][0]:.3f}, {pearson_cis[i][1]:.3f}], n = {ns[i]}"
        )
        print(
            f"{col}: Spearman rho = {spearman_rs[i]:.3f}, p = {spearman_ps[i]:.2e}, p_FDR = {spearman_p_fdr[i]:.2e}, "
            "n = {ns[i]}"
        )
        # Add FDR p-values to results
        results[i]["pearson_p_fdr"] = pearson_p_fdr[i]
        results[i]["spearman_p_fdr"] = spearman_p_fdr[i]
    return results


print("--- ALL MICROPHONES ---")
event_corr_summary(land_df_all, "Land", emotion_cols, wind_col_block)
event_corr_summary(sea_df_all, "Sea", emotion_cols, wind_col_block)
event_corr_summary(operation_df_all, "Operation", emotion_cols, wind_col_block)

print("\n--- BRIDGE MICROPHONES ---")
event_corr_summary(land_df_bridge, "Land (Bridge)", emotion_cols, wind_col_block)
event_corr_summary(sea_df_bridge, "Sea (Bridge)", emotion_cols, wind_col_block)
event_corr_summary(
    operation_df_bridge, "Operation (Bridge)", emotion_cols, wind_col_block
)

print("\n--- RADIO MICROPHONES ---")
event_corr_summary(land_df_radio, "Land (Radio)", emotion_cols, wind_col_block)
event_corr_summary(sea_df_radio, "Sea (Radio)", emotion_cols, wind_col_block)
event_corr_summary(
    operation_df_radio, "Operation (Radio)", emotion_cols, wind_col_block
)

--- ALL MICROPHONES ---

Land correlations (n = 609 samples):
Normality test results for Land:
true_wind_speed_gps_mean_block_scaled: W=0.975, p=1.36e-08 (n=609) NOT normal
prediction_arousal: W=0.970, p=9.79e-10 (n=609) NOT normal
prediction_dominance: W=0.975, p=1.19e-08 (n=609) NOT normal
prediction_valence: W=0.880, p=2.95e-21 (n=609) NOT normal
prediction_arousal: Pearson r = 0.065, p = 1.09e-01, p_FDR = 3.26e-01, 95% CI = [{pearson_cis[i][0]:.3f}, {pearson_cis[i][1]:.3f}], n = {ns[i]}
prediction_arousal: Spearman rho = 0.092, p = 2.31e-02, p_FDR = 6.94e-02, n = {ns[i]}
prediction_dominance: Pearson r = 0.020, p = 6.15e-01, p_FDR = 6.15e-01, 95% CI = [{pearson_cis[i][0]:.3f}, {pearson_cis[i][1]:.3f}], n = {ns[i]}
prediction_dominance: Spearman rho = 0.040, p = 3.29e-01, p_FDR = 4.93e-01, n = {ns[i]}
prediction_valence: Pearson r = -0.038, p = 3.48e-01, p_FDR = 5.22e-01, 95% CI = [{pearson_cis[i][0]:.3f}, {pearson_cis[i][1]:.3f}], n = {ns[i]}
prediction_valence: Spearman rho = -0.0

[{'metric': 'prediction_arousal',
  'pearson_r': 0.2666129292831588,
  'pearson_p': 0.2079038022076066,
  'pearson_ci_lower': -0.159286913698232,
  'pearson_ci_upper': 0.6044112841928265,
  'spearman_rho': 0.25201320686917905,
  'spearman_p': 0.23483595435814822,
  'n': 24,
  'pearson_p_fdr': 0.6237114066228199,
  'spearman_p_fdr': 0.3752914022283828},
 {'metric': 'prediction_dominance',
  'pearson_r': 0.11784791568178629,
  'pearson_p': 0.5833956976700332,
  'pearson_ci_lower': -0.3321302846937551,
  'pearson_ci_upper': 0.6013844367308893,
  'spearman_rho': 0.2441785994017434,
  'spearman_p': 0.2501942681522552,
  'n': 24,
  'pearson_p_fdr': 0.8750935465050498,
  'spearman_p_fdr': 0.3752914022283828},
 {'metric': 'prediction_valence',
  'pearson_r': -0.027181816748182697,
  'pearson_p': 0.8996707344767106,
  'pearson_ci_lower': -0.4023811984958924,
  'pearson_ci_upper': 0.3172489753799188,
  'spearman_rho': -0.04091406121883044,
  'spearman_p': 0.849453241093847,
  'n': 24,
  'pearson

## Generate LaTeX table for publication

In [19]:
import pandas as pd


def spearman_bootstrap_ci(x, y, n_boot=1000, alpha=0.05):
    """Bootstrap CI for Spearman's rho."""
    mask = (~pd.isna(x)) & (~pd.isna(y))
    x = x[mask]
    y = y[mask]
    n = len(x)
    if n < 3:
        return (np.nan, np.nan)
    rs = []
    for _ in range(n_boot):
        idx = np.random.choice(n, n, replace=True)
        r, _ = spearmanr(x[idx], y[idx])
        rs.append(r)
    lower = np.percentile(rs, 100 * alpha / 2)
    upper = np.percentile(rs, 100 * (1 - alpha / 2))
    return (lower, upper)


def collect_spearman_table():
    # Define environments and DataFrames
    environments = [
        (
            "Bridge Microphones",
            [
                ("Land", land_df_bridge),
                ("Sea", sea_df_bridge),
                ("Operation", operation_df_bridge),
            ],
        ),
        (
            "Radio Channels",
            [
                ("Land", land_df_radio),
                ("Sea", sea_df_radio),
                ("Operation", operation_df_radio),
            ],
        ),
    ]
    rows = []
    for env, conds in environments:
        for cond, df in conds:
            # Run correlation for all emotion_cols
            pearson_ps = []
            spearman_ps = []
            spearman_rhos = []
            spearman_ns = []
            spearman_cis = []
            for col in emotion_cols:
                x = df[col].values
                y = df[wind_col_block].values
                r_s, p_s, n_s = spearman_corr_stats(x, y)
                ci_s = spearman_bootstrap_ci(x, y)
                spearman_ps.append(p_s)
                spearman_rhos.append(r_s)
                spearman_ns.append(n_s)
                spearman_cis.append(ci_s)
            # FDR correction
            _, spearman_p_fdr, _, _ = multipletests(
                spearman_ps, alpha=0.05, method="fdr_bh"
            )
            for i, col in enumerate(emotion_cols):
                rows.append(
                    {
                        "Environment": env,
                        "Condition": cond,
                        "Emotion": col.replace("prediction_", "").capitalize(),
                        "Rho": spearman_rhos[i],
                        "CI": spearman_cis[i],
                        "p_FDR": spearman_p_fdr[i],
                        "n": spearman_ns[i],
                        "Signif": spearman_p_fdr[i] < 0.05,
                    }
                )
    return rows


def latex_spearman_table(rows):
    header = r"""\begin{table}[ht]
\centering
\caption{Spearman correlations ($\rho$) between wind speed and emotion dimensions for the Bridge Microphones and Radio Channels. $p_\mathrm{FDR}$: FDR-corrected $p$-value. 95\%~CI: bootstrap confidence interval for $\rho$. Significant correlations ($p_\mathrm{FDR}<0.05$) are marked in \textbf{bold}.}
\label{tab:correlation_wind_emotion}
\begin{tabular}{lllcccc}
\toprule
Environment & Condition & Emotion & $\rho$ & 95\% CI & $p_\mathrm{FDR}$ & $n$ \\
\midrule
"""
    body = ""
    prev_env = prev_cond = prev_n = None
    first_row = True
    for row in rows:
        # Insert \hline before a new Environment block (but not before the first block)
        if not first_row and row["Environment"] != prev_env:
            body += r"\hline" + "\n"
        first_row = False

        # Only show repeated values in first row of each block
        env = row["Environment"] if row["Environment"] != prev_env else ""
        cond = row["Condition"] if (row["Condition"] != prev_cond or env) else ""
        n = f"{row['n']}" if (row["n"] != prev_n or cond or env) else ""
        prev_env = row["Environment"]
        prev_cond = row["Condition"]
        prev_n = row["n"]

        rho = f"{row['Rho']:.3f}" if pd.notnull(row["Rho"]) else ""
        ci = row["CI"]
        ci_str = (
            f"[{ci[0]:.3f}, {ci[1]:.3f}]"
            if pd.notnull(ci[0]) and pd.notnull(ci[1])
            else ""
        )
        # Format p-value: if < 0.001, show "<0.001"
        if pd.notnull(row["p_FDR"]):
            if row["p_FDR"] < 0.001:
                pval = r"$<$0.001"
            else:
                pval = f"{row['p_FDR']:.3f}"
        else:
            pval = ""
        # Bold if significant
        if row["Signif"]:
            rho = r"\textbf{" + rho + "}"
            pval = r"\textbf{" + pval + "}"
        body += f"{env} & {cond} & {row['Emotion']} & {rho} & {ci_str} & {pval} & {n} \\\\\n"
    footer = r"""\bottomrule
\end{tabular}
\end{table}
"""
    return header + body + footer


# Generate and print the LaTeX table
rows = collect_spearman_table()
latex_code = latex_spearman_table(rows)
print(latex_code)

\begin{table}[ht]
\centering
\caption{Spearman correlations ($\rho$) between wind speed and emotion dimensions for the Bridge Microphones and Radio Channels. $p_\mathrm{FDR}$: FDR-corrected $p$-value. 95\%~CI: bootstrap confidence interval for $\rho$. Significant correlations ($p_\mathrm{FDR}<0.05$) are marked in \textbf{bold}.}
\label{tab:correlation_wind_emotion}
\begin{tabular}{lllcccc}
\toprule
Environment & Condition & Emotion & $\rho$ & 95\% CI & $p_\mathrm{FDR}$ & $n$ \\
\midrule
Bridge Microphones & Land & Arousal & \textbf{0.132} & [0.046, 0.217] & \textbf{0.010} & 492 \\
 &  & Dominance & 0.056 & [-0.036, 0.141] & 0.317 &  \\
 &  & Valence & -0.019 & [-0.103, 0.071] & 0.671 &  \\
 & Sea & Arousal & \textbf{-0.157} & [-0.240, -0.072] & \textbf{$<$0.001} & 584 \\
 &  & Dominance & \textbf{-0.200} & [-0.281, -0.117] & \textbf{$<$0.001} &  \\
 &  & Valence & \textbf{-0.242} & [-0.316, -0.167] & \textbf{$<$0.001} &  \\
 & Operation & Arousal & -0.086 & [-0.265, 0.104] & 0.439 & 84