In [None]:
# Ensure dependencies are installed when running in hosted notebooks
%pip install -q -r requirements.txt


In [None]:
import urllib.request
from pathlib import Path

BASE_URL = "https://raw.githubusercontent.com/FelixZhan/AtyAN/main/"
HELPER_FILES = [
    "analysis_utils.py",
    "requirements.txt",
    "BP1234-ONSET.csv",
]

for filename in HELPER_FILES:
    dest = Path(filename)
    if dest.exists():
        print(f'{filename} already present, skipping download.')
        continue
    print(f'Downloading {filename}...')
    urllib.request.urlretrieve(f"{BASE_URL}{filename}", dest)
print('Helper files are ready.')


## Imports and shared setup

In [None]:
from analysis_utils import (
    load_base_dataset,
    engineer_baseline_features,
    prepare_univariate_prediction_dataset,
    prepare_persistence_dataset,
    run_univariate_logistic_regressions,
)
from IPython.display import display


In [None]:
RUN_PERSISTENCE = False  # Set True to re-enable persistence/remission analyses

In [None]:
raw_df = load_base_dataset()
feature_df, feature_sets = engineer_baseline_features(raw_df)
print(f'Dataset shape: {raw_df.shape}')
print(f'Feature matrix shape: {feature_df[feature_sets["model_features"]].shape}')


## Univariate prediction of future atypical AN onset

Participants with full AN diagnoses or baseline atypical AN onset are removed to mirror the original risk-prediction experiment. The target labels any mBMI-defined atypical AN onset across waves 1–6.

In [None]:
prediction_df = prepare_univariate_prediction_dataset(
    feature_df, feature_sets['model_features']
)
print('Univariate prediction cohort size:', len(prediction_df))
onset_logistic = run_univariate_logistic_regressions(
    prediction_df, feature_sets['model_features'], target_col='aan_onset_anywave'
)
display(onset_logistic)

In [None]:
onset_counts = prediction_df['aan_onset_anywave'].value_counts(dropna=False).rename('count')
print('Onset class counts:')
display(onset_counts.to_frame())


## Univariate persistence vs. remission analyses

Set `RUN_PERSISTENCE = True` above to execute the persistence/remission cohort cells. The dataset retains participants with baseline or mBMI-defined onset who have complete wave-1–6 onset data and labels cases that revisit onset after at least one remission wave.

In [None]:
persistence_df = None
if RUN_PERSISTENCE:
    persistence_df = prepare_persistence_dataset(
        feature_df,
        feature_sets['model_features']
    )
    print('Persistence cohort size:', len(persistence_df))
else:
    print('Persistence/remission analyses are disabled. Set RUN_PERSISTENCE = True to enable them.')


In [None]:
if persistence_df is not None:
    persistence_counts = persistence_df['aan_persistence'].value_counts(dropna=False).rename('count')
    print('Persistence class counts:')
    display(persistence_counts.to_frame())
else:
    print('Persistence dataset not loaded. Enable RUN_PERSISTENCE to build it.')


### Onset logistic summaries


In [None]:
display(onset_logistic.head(20))

### Persistence logistic summaries


In [None]:
if RUN_PERSISTENCE and persistence_df is not None:
    persistence_logistic = run_univariate_logistic_regressions(
        persistence_df,
        feature_sets['model_features'],
        target_col='aan_persistence',
    )
    display(persistence_logistic.head(20))
else:
    print('Skipping persistence logistic summaries.')