# Reviewer Feedback

In [1]:
import pandas as pd
import numpy as np

%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
sns.set_theme(
    context="paper",
    style="whitegrid",
    font_scale=1.2,
    rc={'figure.figsize': (10, 10), 'figure.dpi': 300}
)

In [70]:
from common.data import get_data
from common.paths import HEALTHY, ADHD

X, Y, demographics, population = get_data(label_path=ADHD)
ages, sexes = demographics['Age'], demographics['Sex']

print(f'X: {X.shape} | Y: {len(Y.keys())} | Age: {ages.shape} | Population: {population}')

selected_target = "WISC_FSIQ"
y = Y[selected_target]

print(f'y: {y.shape}')

X: (373, 34716) | Y: 6 | Age: (373,) | Population: adhd
y: (373,)


In [59]:
from common.model_weight import save_model_weight
from common.scoring import unimetric_scorer, multimetric_scorer, N_PERM, SCORING, RKF_10_10
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.model_selection import cross_validate, permutation_test_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from common.binning import bin_by_random_equivalent_size, bin_by_sliding_window, bin_to_approximate_size

## Random Sampling of Entire Dataset

In [52]:
model = 'ridge'

all_scores = []

for current_run in range(30):
    X_bins, y_bins, bin_labels = bin_by_random_equivalent_size(X, y, bin_sizes=(106, 106, 106))
    
    if current_run % 5 == 0:
        print(current_run)

    for selected_bin in range(3):
        X_cv = X_bins[selected_bin]
        y_cv = y_bins[selected_bin]
        age_group = f'Bin {selected_bin + 1}'
        
        estimators = [StandardScaler(), RidgeCV(alphas=[a for a in range(1, 10000, 1000)], 
                                                scoring=unimetric_scorer, cv=RKF_10_10)]
        pipe = make_pipeline(*estimators)
        pipe.fit(X_cv, y_cv)
        ridge_cv = pipe['ridgecv']
        all_scores.append(ridge_cv.best_score_)
        # print(f'Bin: {age_group} | Target: {selected_target} | Alpha: {ridge_cv.alpha_} | Score: {ridge_cv.best_score_:.2f}')

print(f'Avg score {np.mean(all_scores)} for {len(all_scores)} scores | Target: {selected_target}')

0
5
10
15
20
25
Avg score 0.12459945147120938 for 90 scores | Target: WISC_WMI


## Sliding Age Windows

In [64]:
all_weights = []
all_sliding_windows, age_windows = bin_by_sliding_window(X, y, ages)

for sliding_window, age_window in zip(all_sliding_windows, age_windows):
    X_cv = sliding_window[0]
    y_cv = sliding_window[1]
    X_cv, y_cv = bin_to_approximate_size(X_cv, y_cv)
    
    estimators = [StandardScaler(), RidgeCV(alphas=[a for a in range(1, 10000, 1000)], 
                                            scoring=unimetric_scorer, cv=RKF_10_10)]
    pipe = make_pipeline(*estimators)
    pipe.fit(X_cv, y_cv)
    ridge_cv = pipe['ridgecv']
    all_weights.append(ridge_cv.coef_)
    
    print(f'Window: {age_window}\t| N: {len(X_cv)}\t| Target: {selected_target}\t| Score: {ridge_cv.best_score_:.2f}')

print(f'Num weights: {len(all_weights)}')

Window: (6, 8)	| N: 69	| Target: WISC_WMI	| Score: 0.14
Window: (7, 9)	| N: 75	| Target: WISC_WMI	| Score: 0.03
Window: (8, 10)	| N: 75	| Target: WISC_WMI	| Score: 0.11
Window: (9, 11)	| N: 75	| Target: WISC_WMI	| Score: 0.28
Window: (10, 12)	| N: 75	| Target: WISC_WMI	| Score: 0.13
Window: (11, 13)	| N: 75	| Target: WISC_WMI	| Score: -0.04
Window: (12, 15)	| N: 75	| Target: WISC_WMI	| Score: -0.10
Num weights: 7


## Sliding Age Windows + Cross Prediction with Next Window

In [71]:
all_sliding_windows, age_windows = bin_by_sliding_window(X, y, ages)
windows = list(zip(all_sliding_windows, age_windows))

for window_num in range(len(windows) - 2):
    X_cv = windows[window_num][0][0]
    y_cv = windows[window_num][0][1]
    X_cv, y_cv = bin_to_approximate_size(X_cv, y_cv)
    age_window = windows[window_num][1]
    
    estimators = [StandardScaler(), RidgeCV(alphas=[a for a in range(1, 10000, 1000)], 
                                            scoring=unimetric_scorer, cv=RKF_10_10)]
    pipe = make_pipeline(*estimators)
    pipe.fit(X_cv, y_cv)
    ridge_cv = pipe['ridgecv']

    X_cv = windows[window_num + 1][0][0]
    y_cv = windows[window_num + 1][0][1]
    X_cv, y_cv = bin_to_approximate_size(X_cv, y_cv)
    next_window_score = ridge_cv.score(X_cv, y_cv)

    X_cv = windows[window_num + 2][0][0]
    y_cv = windows[window_num + 2][0][1]
    X_cv, y_cv = bin_to_approximate_size(X_cv, y_cv)
    next_next_window_score = ridge_cv.score(X_cv, y_cv)
    
    # print(f'Window: {age_window}\t\t| N: {len(X_cv)}\t| Target: {selected_target}\t| Score: {ridge_cv.best_score_:.2f}')
    print(f'Windows: {age_window} -> {windows[window_num + 1][1]} \t| Next Window Score: {next_window_score:.2f}')
    print(f'Windows: {age_window} -> {windows[window_num + 2][1]}  \t| Next Next Window Score: {next_next_window_score:.2f}')

print(f'Num weights: {len(all_weights)}')

Windows: (6, 8) -> (7, 9) 	| Next Window Score: 0.21
Windows: (6, 8) -> (8, 10)  	| Next Next Window Score: 0.00
Windows: (7, 9) -> (8, 10) 	| Next Window Score: 0.05
Windows: (7, 9) -> (9, 11)  	| Next Next Window Score: 0.03
Windows: (8, 10) -> (9, 11) 	| Next Window Score: 0.13
Windows: (8, 10) -> (10, 12)  	| Next Next Window Score: -0.16
Windows: (9, 11) -> (10, 12) 	| Next Window Score: 0.07
Windows: (9, 11) -> (11, 13)  	| Next Next Window Score: -0.06
Windows: (10, 12) -> (11, 13) 	| Next Window Score: 0.15
Windows: (10, 12) -> (12, 15)  	| Next Next Window Score: -0.03
Num weights: 7


## Age Windows ICC

In [24]:
import pingouin as pg

In [45]:
results = []

for index in range(7):
    window_one, window_two, window_three = index, index + 1, index + 2

    if 7 < window_three:
        continue
    
    icc_data = pd.DataFrame([all_weights[window_one], all_weights[window_two]]).melt(
        var_name='connection', value_name='weight', ignore_index=False)
    icc_data['cv_run_num'] = icc_data.index
    icc = pg.intraclass_corr(data=icc_data, targets='connection', raters='cv_run_num', ratings='weight').round(3)
    icc1 = icc[icc['Type'] == 'ICC1'].to_dict('records')[0]
    results.append({
        'Target': selected_target,
        'age_windows_one': age_windows[window_one],
        'age_windows_two': age_windows[window_two],
        **icc1})

    icc_data = pd.DataFrame([all_weights[window_one], all_weights[window_three]]).melt(
        var_name='connection', value_name='weight', ignore_index=False)
    icc_data['cv_run_num'] = icc_data.index
    icc = pg.intraclass_corr(data=icc_data, targets='connection', raters='cv_run_num', ratings='weight').round(3)
    icc1 = icc[icc['Type'] == 'ICC1'].to_dict('records')[0]
    results.append({
        'Target': selected_target,
        'age_windows_one': age_windows[window_one],
        'age_windows_two': age_windows[window_three],
        **icc1})

results_df = pd.DataFrame(results)
results_df = results_df.drop(columns=['Type', 'Description', 'df1', 'df2'])
display(results_df)

Unnamed: 0,Target,age_windows_one,age_windows_two,ICC,F,pval,CI95%
0,WISC_PSI,"(6, 8)","(7, 9)",0.708,5.844,0.0,"[0.7, 0.71]"
1,WISC_PSI,"(6, 8)","(8, 10)",0.013,1.026,0.007,"[0.0, 0.02]"
2,WISC_PSI,"(7, 9)","(8, 10)",0.338,2.019,0.0,"[0.33, 0.35]"
3,WISC_PSI,"(7, 9)","(9, 11)",-0.015,0.971,0.997,"[-0.03, -0.0]"
4,WISC_PSI,"(8, 10)","(9, 11)",0.471,2.782,0.0,"[0.46, 0.48]"
5,WISC_PSI,"(8, 10)","(10, 12)",0.031,1.064,0.0,"[0.02, 0.04]"
6,WISC_PSI,"(9, 11)","(10, 12)",0.471,2.78,0.0,"[0.46, 0.48]"
7,WISC_PSI,"(9, 11)","(11, 13)",0.003,1.005,0.316,"[-0.01, 0.01]"
8,WISC_PSI,"(10, 12)","(11, 13)",0.379,2.219,0.0,"[0.37, 0.39]"
9,WISC_PSI,"(10, 12)","(12, 14)",-0.006,0.988,0.869,"[-0.02, 0.0]"
