# Split time series in half
For every raw file with more than 80 frames total, split the files in two, save the name of each and create the input files to re-compute the similarity matrices on the subsets.

In [1]:
# Imports
import os
import numpy as np
import pandas as pd
import nibabel as nib

In [2]:
raw_path = '/data1/guilimin/data/abide/preproc/Common_ngs_lp/'
out_path = '/data1/guilimin/data/abide/preproc/split_half_ngs_lp'
pheno_path = '/data1/guilimin/data/abide/pheno/merged_abide_full_maybe.csv'
ftmp = 'fmri_{:07}_session_{}_run{}.nii.gz'
ptmp = 'fmri_{:07}_session_{}_run{}_{}.nii.gz'

In [3]:
if not os.path.isdir(out_path):
    os.makedirs(out_path)

In [4]:
pheno = pd.read_csv(pheno_path)
# New pheno with extra column for part
s_pheno = pd.DataFrame(columns=list(pheno.keys()) + ['part'])

In [5]:
for index, row in pheno.iterrows():
    f_name = ftmp.format(row.SUB_ID, row.session, row.run)
    f_path = os.path.join(raw_path, f_name)
    f_img = nib.load(f_path)
    f_head = f_img.get_header()
    d_size = f_head.get_data_shape()[-1]

    if d_size > 80:
        # See if the files already exist
        #a_name = ptmp.format(row.SUB_ID, row.session, row.run, 'a')
        b_name = ptmp.format(row.SUB_ID, row.session, row.run, 'b')
        #a_path = os.path.join(out_path, a_name)
        b_path = os.path.join(out_path, b_name)
        
        #row['part'] = 'a'
        #s_pheno = s_pheno.append(row)
        row['part'] = 'b'
        s_pheno = s_pheno.append(row)
        if os.path.isfile(b_path):# and os.path.isfile(a_path):
            continue

        # Load the data since the file matches our criteria
        data = f_img.get_data()
        
        # Break the thing in two
        t = data.shape[-1]
        #a = data[..., np.int(np.floor(t/2)):]
        b = data[..., :np.int(np.floor(t/2))]
        
        # Create the images
        #a_img = nib.Nifti1Image(a, header=f_head, affine=f_img.get_affine())
        b_img = nib.Nifti1Image(b, header=f_head, affine=f_img.get_affine())
        # Create file names
        #a_name = ptmp.format(row.SUB_ID, row.session, row.run, 'a')
        b_name = ptmp.format(row.SUB_ID, row.session, row.run, 'b')
        
        # Save the images out
        #print('Saving {} part a to {}'.format(row.SUB_ID, os.path.join(out_path, a_name)))
        #nib.save(a_img, os.path.join(out_path, a_name))
        print('Saving {} part b to {}'.format(row.SUB_ID, os.path.join(out_path, b_name)))
        nib.save(b_img, os.path.join(out_path, b_name))

Saving 51459 part b to /data1/guilimin/data/abide/preproc/split_half_ngs_lp/fmri_0051459_session_1_run1_b.nii.gz
Saving 51465 part b to /data1/guilimin/data/abide/preproc/split_half_ngs_lp/fmri_0051465_session_1_run1_b.nii.gz
Saving 51467 part b to /data1/guilimin/data/abide/preproc/split_half_ngs_lp/fmri_0051467_session_1_run1_b.nii.gz
Saving 51473 part b to /data1/guilimin/data/abide/preproc/split_half_ngs_lp/fmri_0051473_session_1_run1_b.nii.gz
Saving 51474 part b to /data1/guilimin/data/abide/preproc/split_half_ngs_lp/fmri_0051474_session_1_run1_b.nii.gz
Saving 51484 part b to /data1/guilimin/data/abide/preproc/split_half_ngs_lp/fmri_0051484_session_1_run1_b.nii.gz
Saving 51485 part b to /data1/guilimin/data/abide/preproc/split_half_ngs_lp/fmri_0051485_session_1_run1_b.nii.gz
Saving 51486 part b to /data1/guilimin/data/abide/preproc/split_half_ngs_lp/fmri_0051486_session_1_run1_b.nii.gz
Saving 51487 part b to /data1/guilimin/data/abide/preproc/split_half_ngs_lp/fmri_0051487_session

In [6]:
s_pheno.to_csv('/data1/guilimin/data/abide/pheno/split_abide_full_a.csv', index=False)

In [8]:
s_pheno['part']

0      b
1      b
2      b
3      b
4      b
5      b
6      b
7      b
8      b
9      b
10     b
11     b
12     b
13     b
14     b
15     b
18     b
19     b
20     b
24     b
25     b
27     b
28     b
29     b
30     b
33     b
34     b
35     b
37     b
38     b
      ..
537    b
538    b
539    b
540    b
541    b
542    b
543    b
544    b
545    b
546    b
547    b
548    b
549    b
550    b
551    b
552    b
553    b
554    b
555    b
556    b
557    b
558    b
559    b
560    b
561    b
562    b
563    b
565    b
566    b
567    b
Name: part, dtype: object