In [6]:
import pandas
import numpy as np
import nibabel as nib
from scipy.stats import zscore
import os

# Input

## HCP subject

One subject example taken from Human Connectome Project

- 119 rows: Representing 119 brain regions of interest (ROIs).

- 1200 columns: Representing time points or temporal samples.

In [4]:
sub1_input = "/data/etosato/RHOSTS/Input/subject1_left.txt"

In [5]:
np.loadtxt(sub1_input).shape

(1200, 119)

## Lorenzo's sample

### Load data

In [None]:
# Load nii.gz data
fmri = nib.load('/data/etosato/RHOSTS/Preprocessing/HCP_rsfMRI/134829').get_fdata()     

In [23]:
# Load atlas
atlas = nib.load('cortex_100.nii.gz').get_fdata()

In [None]:
print("Data shape: ", fmri.shape)
print("Atlas shape: ", atlas.shape)

### Mask averaging

In [38]:
# Define the number of ROIs
n_rois = 100

# Define the number of timepoints (T)
T = fmri.shape[3]                          

# (T, n_rois)
ts = np.zeros((T, n_rois))                

# Loop over ROI
for roi in range(1, n_rois+1):
    # 1. Create the mask for the current ROI
    mask = ( roi == atlas)                              # (X, Y, Z) bool

    # 2. If there are no voxel for that roi, skip
    if not np.any(mask):
        continue

    # 3. Select the fMRI voxels belonging to this ROI --> (n_voxels, T)
    fmri_voxels = fmri[mask, :]

    # 4. Average across voxels to get a single time series --> (T,)
    roi_ts = fmri_voxels.mean(axis = 0)

    # 5. Store ROI time series as the correct column (roi - 1)
    ts[:, roi - 1] = roi_ts

In [39]:
fmri_voxels.shape

(863, 3600)

### Z-score

In [42]:
ts_z = zscore(ts, axis=0)

print("Final shape (T x ROI):", ts_z.shape)

## Txt

In [None]:
out_path = f"{s_id}_ts_zscore.txt"
np.savetxt(out_path, ts_z, fmt="%.6f")

print("Saved:", out_path)

# Things

In [2]:
import h5py

src_hd5 = "/Output/sub1_left_weighted.hd5"  # large file
dst_hd5 = "/Output_temp/sub1_output_proj_small.hd5"  # reduced file

with h5py.File(src_hd5, "r") as src, h5py.File(dst_hd5, "w") as dst:
    # take the first five datasets in sorted order
    keys = sorted(src.keys(), key=lambda x: int(x))[:5]

    for k in keys:
        data = src[k][:]
        dst.create_dataset(k, data=data)

print("Created reduced HDF5 with datasets:", keys)


Created reduced HDF5 with datasets: ['0', '1', '2', '3', '4']
