In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from nilearn.decoding import Decoder


RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

In [None]:
BIDS_ROOT = Path(r"C:/Users/duart/Desktop/Tese/Mapping_Tese/mapping_tese/data/BIDS-somatosensory/BIDS-somatosensory")
DERIVATIVES = BIDS_ROOT / "derivatives" / "fmriprep"


subject = "sub-p0001"
session = "ses-01"
task = "task-S1Map"
space = "MNI152NLin2009cAsym"

n_runs = 4
TR = 6.0

In [None]:
from datetime import datetime

RESULTS_DIR = Path("results")
FIGURES_DIR = RESULTS_DIR / "figures"
FIGURES_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_LOG = RESULTS_DIR / "outputs.txt"
log_file = open(OUTPUT_LOG, 'w', encoding='utf-8')

original_print = print
def print(*args, **kwargs):
    original_print(*args, **kwargs)
    original_print(*args, **kwargs, file=log_file, flush=True)

print(f"Results will be saved to: {RESULTS_DIR.resolve()}")
print(f"Figures directory: {FIGURES_DIR.resolve()}")
print(f"Output log: {OUTPUT_LOG.resolve()}")
print(f"Analysis started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Random seed (numpy): {RANDOM_SEED}")
print("="*70)

### Event files all runs

In [None]:
all_events = []
for run in range(1, n_runs + 1):
    events_path = BIDS_ROOT / subject / session / "func" / f"{subject}_{session}_{task}_run-{run}_events.tsv"
    events = pd.read_csv(events_path, sep='\t')
    events['run'] = run
    all_events.append(events)

events_df = pd.concat(all_events, ignore_index=True)

# remove Baseline and Jitter
stim_events = events_df[~events_df['trial_type'].isin(['Baseline', 'Jitter'])].copy()

print(f"Total events loaded: {len(events_df)}")
print(f"Stimulation events: {len(stim_events)}")
print(f"Unique conditions: {stim_events['trial_type'].nunique()}")
print(f"\nConditions: {sorted(stim_events['trial_type'].unique())}")
print(f"\nSamples per condition:")
print(stim_events['trial_type'].value_counts().sort_index())

In [None]:
from nilearn.image import index_img, mean_img, concat_imgs
from nilearn.image import load_img

HRF_DELAY = 5.0
WINDOW = 2

sample_imgs = []
labels = []
groups = [] 

for run in range(1, n_runs + 1):

    func_path = (DERIVATIVES / subject / session / "func" /
                 f"{subject}_{session}_{task}_run-{run}_space-{space}_desc-preproc_bold.nii.gz")

    img = load_img(str(func_path))
    run_events = stim_events[stim_events['run'] == run].sort_values('onset')

    run_length = img.shape[3]

    print(f"Run {run} loaded: {img.shape}")

    for _, event in run_events.iterrows():

        peak_volume = int(round((event['onset'] + HRF_DELAY) / TR))
        if peak_volume >= run_length:
            continue

        vols = list(range(max(0, peak_volume-WINDOW),
                          min(run_length, peak_volume+WINDOW+1)))

        window_img = index_img(img, vols)
        averaged = mean_img(window_img)

        sample_imgs.append(averaged)
        labels.append(event['trial_type'])
        groups.append(run)

X = concat_imgs(sample_imgs)
y = np.array(labels)
groups = np.array(groups)

print("\nFinal dataset shape:", X.shape)
print("Samples:", len(y))
print("Classes:", len(np.unique(y)))
print("Runs:", np.unique(groups))
print("Samples per run:")
print(pd.Series(groups).value_counts().sort_index())


In [None]:
from sklearn.model_selection import PredefinedSplit

test_fold = groups - 1 
cv = PredefinedSplit(test_fold)
print(f"Cross-validation: Leave-one-run-out (n_folds={cv.get_n_splits()})")

In [None]:
from nilearn.datasets import fetch_atlas_harvard_oxford
from nilearn.image import new_img_like

atlas = fetch_atlas_harvard_oxford('cort-maxprob-thr25-2mm')
s1_indices = [i for i, lab in enumerate(atlas.labels) if 'Postcentral Gyrus' in str(lab) and i != 0]
if len(s1_indices) == 0:
    s1_index = atlas.labels.index('Postcentral Gyrus')
    s1_indices = [s1_index]

atlas_img = atlas.maps
atlas_data = atlas_img.get_fdata()
mask_data = np.isin(atlas_data, s1_indices).astype('uint8')
s1_mask = new_img_like(atlas_img, mask_data)

print(f"S1 mask created from atlas indices: {s1_indices}")
print("Selected labels:")
for i in s1_indices:
    print(f"  - {atlas.labels[i]}")

In [None]:
decoder = Decoder(
    estimator='svc',           
    mask=s1_mask,             
    standardize='zscore_sample',  
    screening_percentile=20,   
    cv=cv,                     
    n_jobs=-1,
    scoring='accuracy',
)

decoder.fit(X, y)

In [None]:
from nilearn.image import index_img

y_pred_cv = np.empty_like(y, dtype=object)
fold_scores = []

print("=" * 70)
print("DECODING RESULTS (Leave-one-run-out CV)")
print("=" * 70)

for fold_i, (train_idx, test_idx) in enumerate(cv.split(np.zeros(len(y))), 1):
    left_out_run = np.unique(groups[test_idx])
    left_out_run = int(left_out_run[0]) if len(left_out_run) == 1 else left_out_run

    X_train = index_img(X, train_idx)
    X_test = index_img(X, test_idx)
    
    decoder_fold = Decoder(
        estimator='svc',
        mask=s1_mask,
        standardize='zscore_sample',
        screening_percentile=20,
        cv=None,
        n_jobs=-1,
        scoring='accuracy',
    )

    decoder_fold.fit(X_train, y[train_idx])
    y_pred_cv[test_idx] = decoder_fold.predict(X_test)

    fold_acc = np.mean(y_pred_cv[test_idx] == y[test_idx])
    fold_scores.append(fold_acc)

    print(f"  Fold {fold_i} (left-out run {left_out_run}): {fold_acc:.3f}")

fold_scores = np.array(fold_scores)
mean_accuracy = float(np.mean(fold_scores))
std_accuracy = float(np.std(fold_scores))

print(f"\n{'=' * 70}")
print(f"Mean Accuracy: {mean_accuracy:.3f} ± {std_accuracy:.3f}")
print(f"Performance:   {mean_accuracy*100:.1f}%")
print(f"{'=' * 70}")

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

folds = range(1, len(fold_scores) + 1)

ax.bar(folds, fold_scores, color='steelblue', alpha=0.7, edgecolor='black', label='Fold accuracy')
ax.axhline(y=mean_accuracy, color='red', linestyle='--', linewidth=2, label=f'Mean: {mean_accuracy:.3f}')

ax.set_xlabel('CV Fold (Left-out run)', fontsize=12, fontweight='bold')
ax.set_ylabel('Accuracy', fontsize=12, fontweight='bold')
ax.set_title('Leave-One-Run-Out Decoding Accuracy', fontsize=14, fontweight='bold')
ax.set_xticks(list(folds))
ax.set_ylim([0, max(fold_scores) * 1.2])
ax.legend(fontsize=11)
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
fig.savefig(FIGURES_DIR / 'fold_accuracy.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
from nilearn import plotting

first_condition = list(decoder.coef_img_.keys())[0]
coef_img = decoder.coef_img_[first_condition]

fig = plt.figure(figsize=(12, 4))
plotting.plot_stat_map(
    coef_img,
    title=f'Feature Weights - Condition {first_condition}',
    cut_coords=5,
    display_mode='z',
    cmap='cold_hot',
    figure=fig
)

fig.savefig(FIGURES_DIR / f'weights_{first_condition}.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, balanced_accuracy_score

y_pred = y_pred_cv.copy()
conditions = np.unique(y)
cm = confusion_matrix(y, y_pred, labels=conditions)
per_condition_accuracy = cm.diagonal() / cm.sum(axis=1)

print("=" * 70)
print("PER-CONDITION ACCURACY (Out-of-fold CV)")
print("=" * 70)

for i, condition in enumerate(conditions):
    acc = per_condition_accuracy[i]
    samples = cm[i].sum()
    print(f"{condition}: {acc:.3f} ({acc*100:.1f}%) - {samples} samples")

print(f"\n{'=' * 70}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y, y_pred):.3f}")
print(f"{'=' * 70}")

fig, ax = plt.subplots(figsize=(12, 10))
im = ax.imshow(cm, cmap='Blues', aspect='auto')

ax.set_xticks(np.arange(len(conditions)))
ax.set_yticks(np.arange(len(conditions)))
ax.set_xticklabels(conditions, rotation=45, ha='right')
ax.set_yticklabels(conditions)

ax.set_xlabel('Predicted Condition', fontsize=12, fontweight='bold')
ax.set_ylabel('True Condition', fontsize=12, fontweight='bold')
ax.set_title('Confusion Matrix - Somatotopic Decoding (Out-of-fold CV)', fontsize=14, fontweight='bold')

for i in range(len(conditions)):
    for j in range(len(conditions)):
        text = ax.text(j, i, cm[i, j],
                      ha="center", va="center", color="black", fontsize=9)

plt.colorbar(im, ax=ax, label='Number of Samples')
plt.tight_layout()
fig.savefig(FIGURES_DIR / 'confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
from scipy.stats import binomtest

n_conditions = len(np.unique(y))
chance_level = 1.0 / n_conditions
total_samples = len(y)
correct_predictions = int(np.sum(y == y_pred))

# H0 = random classification at chance level
p_value = binomtest(correct_predictions, total_samples, chance_level, alternative='greater')

print("=" * 70)
print("STATISTICAL SIGNIFICANCE TESTING (Out-of-fold CV)")
print("=" * 70)
print(f"\nChance level (1/n_conditions): {chance_level:.3f} ({chance_level*100:.1f}%)")
print(f"Total samples: {total_samples}")
print(f"Observed correct predictions: {correct_predictions}")
print(f"Expected correct predictions (chance): {int(total_samples * chance_level)}")
print(f"\nBinomial Test (one-tailed, greater than chance):")
print(f"  p-value: {p_value.pvalue:.2e}")

if p_value.pvalue < 0.001:
    print("  Result: Significantly above chance (p < 0.001) ***")
elif p_value.pvalue < 0.05:
    print("  Result: Significantly above chance (p < 0.05) *")
else:
    print(f"  Result: Not significantly above chance (p = {p_value.pvalue:.3f})")


In [None]:
from nilearn.masking import apply_mask, unmask


X_features = decoder.masker_.transform(X)
cov = np.cov(X_features, rowvar=False)
pattern_imgs = {}
for condition, weight_img in decoder.coef_img_.items():
    w = apply_mask(weight_img, decoder.mask_img_)
    pattern = cov @ w #-> Haufe et al. (2014) transformation to get activation patterns
    pattern_imgs[condition] = unmask(pattern, decoder.mask_img_)

# subset of conditions to visualize (first 6)
n_to_plot = min(6, len(conditions))
selected_conditions = conditions[:n_to_plot]
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

vmax = np.percentile(np.abs(apply_mask(list(pattern_imgs.values()), decoder.mask_img_)), 99)
for idx, condition in enumerate(selected_conditions):
    ax = axes[idx]
    coef_img = pattern_imgs[condition]
    
    plotting.plot_stat_map(
        coef_img,
        title=f'Pattern - {condition}',
        cut_coords=3,
        display_mode='z',
        cmap='RdBu_r',
        figure=fig,
        axes=ax,
        colorbar=False,
        threshold=None,
        vmax=vmax,
        symmetric_cbar=True,
    )

for idx in range(n_to_plot, len(axes)):
    axes[idx].set_visible(False)

plt.suptitle('Somatotopic Activation Patterns for Selected Conditions (Haufe-transformed)', 
             fontsize=14, fontweight='bold', y=0.98)
plt.tight_layout()
fig.savefig(FIGURES_DIR / 'activation_patterns.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
print("\n" + "="*70)
print("WEIGHT STATISTICS FOR EACH CONDITION")
print("="*70)

for condition in conditions[:5]:
    pattern_img = pattern_imgs[condition]
    data = pattern_img.get_fdata()
    nonzero = data[np.abs(data) > 0]
    if len(nonzero) == 0:
        print(f"\n{condition}: empty map")
        continue

    print(f"\n{condition}:")
    print(f"  Min pattern: {nonzero.min():.4f}")
    print(f"  Max pattern: {nonzero.max():.4f}")
    print(f"  Mean |pattern|: {np.mean(np.abs(nonzero)):.4f}")
    print(f"  Std pattern: {nonzero.std():.4f}")

    p95 = np.percentile(np.abs(nonzero), 95)
    p99 = np.percentile(np.abs(nonzero), 99)

    print(f"  Voxels > 95th percentile: {np.sum(np.abs(nonzero) > p95)}")
    print(f"  Voxels > 99th percentile: {np.sum(np.abs(nonzero) > p99)}")

In [None]:
from scipy.ndimage import label

print("\n" + "="*70)
print("PEAK ACTIVATION LOCATIONS (MNI COORDINATES)")
print("="*70)

for condition in conditions[:5]:
    img = pattern_imgs[condition]
    data = img.get_fdata()
    affine = img.affine

    nonzero = np.abs(data) > 0
    if np.sum(nonzero) == 0:
        print(f"\n{condition}: empty map")
        continue
    
    # Find voxels in the top 1% of absolute weights
    threshold = np.percentile(np.abs(data[nonzero]),99)
    mask = np.abs(data) >= threshold

    #remove small clusters
    labeled, n = label(mask)
    if n == 0:
        print(f"\n{condition}: no suprathreshold clusters")
        continue
    
    sizes = [(labeled==i).sum() for i in range(1, n+1)]
    largest = np.argmax(sizes) + 1
    cluster = labeled == largest

    peak_index = np.unravel_index(np.argmax(np.abs(data) * cluster), data.shape)
    peak_value = data[peak_index]

    #(voxel->MNI)
    peak_mni = affine @ np.array([*peak_index,1])
    print(f"\n{condition}:")
    print(f"  Peak MNI coords: x={peak_mni[0]:.1f}, y={peak_mni[1]:.1f}, z={peak_mni[2]:.1f}")
    print(f"  Pattern value: {peak_value:.4f}")
    print(f"  Cluster size: {cluster.sum()} voxels")

    if peak_mni[0] < -5:
        hemisphere = "LEFT"

    elif peak_mni[0] > 5:
        hemisphere = "RIGHT"
    
    else:
        hemisphere = "MID"
    
    print(f"Hemisphere: {hemisphere}")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

ax = axes[0]
ax.bar(range(1, len(fold_scores) + 1), fold_scores,
       color='steelblue', alpha=0.7, edgecolor='black', label='Fold accuracy')
ax.axhline(y=mean_accuracy, color='red', linestyle='--', linewidth=2,
           label=f'Mean: {mean_accuracy:.3f}')
ax.axhline(y=chance_level, color='gray', linestyle=':', linewidth=2,
           label=f'Chance: {chance_level:.3f}')
ax.set_xlabel('CV Fold (Left-out run)', fontsize=11, fontweight='bold')
ax.set_ylabel('Accuracy', fontsize=11, fontweight='bold')
ax.set_title('CV Fold Stability', fontsize=12, fontweight='bold')
ax.set_xticks(range(1, len(fold_scores) + 1))
ax.legend()
ax.grid(axis='y', alpha=0.3)

ax = axes[1]
stats_data = {
    'Mean': mean_accuracy,
    'Std Dev': std_accuracy,
    'Min': float(np.min(fold_scores)),
    'Max': float(np.max(fold_scores)),
    'Range': float(np.max(fold_scores) - np.min(fold_scores))
}

y_pos = np.arange(len(stats_data))
values = list(stats_data.values())
colors_stats = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']

bars = ax.barh(y_pos, values, color=colors_stats, alpha=0.7, edgecolor='black')
ax.set_yticks(y_pos)
ax.set_yticklabels(list(stats_data.keys()))
ax.set_xlabel('Value', fontsize=11, fontweight='bold')
ax.set_title('Cross-Validation Performance Statistics', fontsize=12, fontweight='bold')

for bar, value in zip(bars, values):
    ax.text(value, bar.get_y() + bar.get_height()/2, f'{value:.3f}',
            ha='left', va='center', fontweight='bold')

plt.tight_layout()
fig.savefig(FIGURES_DIR / 'cv_stability.png', dpi=300, bbox_inches='tight')
plt.show()

print("=" * 70)
print("CROSS-VALIDATION STABILITY")
print("=" * 70)
print(f"Mean Accuracy:        {mean_accuracy:.3f}")
print(f"Standard Deviation:   {std_accuracy:.3f}")
print(f"Min Accuracy:         {np.min(fold_scores):.3f}")
print(f"Max Accuracy:         {np.max(fold_scores):.3f}")
print(f"Range:                {np.max(fold_scores) - np.min(fold_scores):.3f}")
print(f"Coefficient of Variation: {std_accuracy / mean_accuracy:.3f}")
print(f"{'=' * 70}")

## Summary Report

In [None]:
print("\n" + "="*70)
print("DECODING ANALYSIS SUMMARY")
print("="*70)
print(f"\nParticipant: {subject}, Session: {session}")
print(f"Task: {task}")
print(f"Number of runs: {n_runs}")
print(f"Number of electrodes (conditions): {n_conditions}")
print(f"Total samples: {len(y)}")

print("\nCLASSIFIER SETUP:")
print("  Model: Support Vector Classifier (Linear kernel)")
print("  Feature standardization: Z-score (per sample)")
print("  Feature selection: 20th percentile screening")
print("  CV: Leave-one-run-out")

print("\nRESULTS (Out-of-fold CV):")
print(f"  Mean Decoding Accuracy: {mean_accuracy:.3f} ({mean_accuracy*100:.1f}%)")
print(f"  Chance Level: {chance_level:.3f} ({chance_level*100:.1f}%)")
print(f"  Balanced Accuracy: {balanced_accuracy_score(y, y_pred):.3f}")
print("  Statistical Significance: p < 0.001 ***" if p_value.pvalue < 0.001 else f"  Statistical Significance: p = {p_value.pvalue:.3f}")

print("\nCONSISTENCY (across runs):")
print(f"  Std Dev (folds): {std_accuracy:.3f}")
print(f"  Min Fold Accuracy: {np.min(fold_scores):.3f}")
print(f"  Max Fold Accuracy: {np.max(fold_scores):.3f}")

print("\nINTERPRETATION:")
if mean_accuracy > chance_level and p_value.pvalue < 0.05:
    print("  ✓ Electrode-specific activations are decodable from fMRI above chance")
elif mean_accuracy > chance_level:
    print("  • Above chance, but not statistically significant")
else:
    print("  • Near chance; poor decodability")

print("="*70 + "\n")


In [None]:
print("\n" + "="*70)
print(f"Analysis completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*70)
log_file.close()
print = original_print 
print(f"\n✓ All outputs saved to: {OUTPUT_LOG.resolve()}")
print(f"✓ All figures saved to: {FIGURES_DIR.resolve()}")