In [1]:
import nibabel as nb
import numpy as np
import os
import pandas as pd
import re
import math

In [2]:
# note the permutations we already have in perms
perms = set()

# the proportion of items in the array to sample
proportion = 0.1

# the number of permuations
times = 10

# the size of the array
n_vols = 11

# sample 
samp = tuple(np.random.randint(n_vols, size = math.ceil(proportion * n_vols)))

# add the sample to the set
perms.add(samp)

print("first sample:", samp)
for n in range(times):
    while samp in perms:
        print("getting a new sample")
        samp = tuple(np.random.randint(n_vols, size = math.ceil(proportion * n_vols)))
        print("new sample:", samp)
        print("new sample in set?:", samp in perms)

    perms.add(samp)
print(perms)

first sample: (8, 6)
getting a new sample
new sample: (7, 7)
new sample in set?: False
getting a new sample
new sample: (5, 4)
new sample in set?: False
getting a new sample
new sample: (1, 10)
new sample in set?: False
getting a new sample
new sample: (7, 8)
new sample in set?: False
getting a new sample
new sample: (8, 2)
new sample in set?: False
getting a new sample
new sample: (9, 3)
new sample in set?: False
getting a new sample
new sample: (8, 7)
new sample in set?: False
getting a new sample
new sample: (10, 1)
new sample in set?: False
getting a new sample
new sample: (6, 4)
new sample in set?: False
getting a new sample
new sample: (10, 4)
new sample in set?: False
{(6, 4), (5, 4), (8, 2), (10, 4), (9, 3), (7, 7), (8, 7), (10, 1), (1, 10), (8, 6), (7, 8)}


In [3]:
def simulate_motion(input_nomotion, input_motion, proportion, n_simulations, noise, subject):
    
    out = []
    template = '../output/sub-{subject}_acq-{noise}_run-{permutation}_dwi.nii.gz'
    
    # read in the two images
    print("reading in images")
    input_nomotion = nb.load(input_nomotion)
    input_motion = nb.load(input_motion)
    nomotion_arr = input_nomotion.get_fdata(dtype = np.float32)
    motion_arr = input_motion.get_fdata(dtype = np.float32)
    assert motion_arr.shape[-1] == nomotion_arr.shape[-1]

    # the number of volumes to sample from
    n_vols = nomotion_arr.shape[-1]
    print("number of volumes to sample from:", n_vols)

    # track the samples here
    samples = set()

    # first sample
    print("first permuation...")
    s_array = np.random.randint(n_vols, size = math.ceil(proportion * n_vols))
    s_tup = tuple(s_array)
    samples.add(s_tup)
    #print(s_tup)
    
    x = nomotion_arr.copy()
    x[...,s_array] = motion_arr[..., s_array]
    x = nb.Nifti1Image(x, input_nomotion.affine, header=input_nomotion.header)
    outfile = template.format(subject=subject, noise=noise, permutation=str(int(proportion*100)) + 'perc' + str(0).zfill(2))
    x.to_filename(outfile)
    out.append(outfile)

    for n in range(n_simulations-1):
        print("permuation", n+1)
        # now look for a new sample; break the loop when the sample is not in set
        while s_tup in samples:
            #print("getting a new sample")
            s_array = np.random.randint(n_vols, size = math.ceil(proportion * n_vols))
            s_tup = tuple(s_array)
            #print("new sample:", s_tup)
            #print("new sample in set?:", s_tup in samples)

        samples.add(s_tup)

        x = nomotion_arr.copy()
        x[...,s_array] = motion_arr[..., s_array]
        x = nb.Nifti1Image(x, input_nomotion.affine, header=input_nomotion.header)
        outfile = template.format(subject=subject, noise=noise, permutation=str(int(proportion*100))+'perc' + str(n+1).zfill(2))
        x.to_filename(outfile)
        out.append(outfile)
    return out, samples

In [4]:
img2 = '../data/realistic/lowmotion/sub-DSIQ5/dwi/sub-DSIQ5_acq-realistic_run-lowmotion_dwi.nii.gz'
img1 = '../data/realistic/nomotion/sub-DSIQ5/dwi/sub-DSIQ5_acq-realistic_run-nomotion_dwi.nii.gz'

simulate_motion(img1, img2, 0.2, 3, 'realistic', 'tester')

reading in images
number of volumes to sample from: 279
first permuation...
permuation 1
permuation 2


(['../output/sub-tester_acq-realistic_run-20perc00_dwi.nii.gz',
  '../output/sub-tester_acq-realistic_run-20perc01_dwi.nii.gz',
  '../output/sub-tester_acq-realistic_run-20perc02_dwi.nii.gz'],
 {(74,
   71,
   65,
   193,
   115,
   165,
   0,
   56,
   170,
   80,
   47,
   129,
   16,
   253,
   35,
   13,
   32,
   203,
   66,
   194,
   113,
   4,
   268,
   112,
   164,
   222,
   276,
   148,
   7,
   124,
   257,
   115,
   118,
   130,
   190,
   120,
   79,
   3,
   49,
   44,
   41,
   53,
   158,
   244,
   112,
   99,
   156,
   82,
   223,
   128,
   81,
   274,
   233,
   208,
   95,
   113),
  (130,
   129,
   189,
   94,
   153,
   111,
   89,
   217,
   60,
   86,
   225,
   111,
   59,
   186,
   93,
   157,
   2,
   172,
   63,
   121,
   42,
   27,
   191,
   126,
   184,
   242,
   223,
   10,
   208,
   63,
   267,
   44,
   230,
   186,
   26,
   231,
   246,
   186,
   194,
   216,
   70,
   35,
   49,
   110,
   77,
   189,
   55,
   103,
   244,
   107,
   1,


In [5]:
dwi = []
for root, dirs, files in os.walk("../data"):
    for file in files:
        if file.endswith('dwi.nii.gz'):
            dwi.append(root+"/"+file)

In [6]:
dwi

['../data/noisefree/lowmotion/sub-HASC55/dwi/sub-HASC55_acq-noisefree_run-lowmotion_dwi.nii.gz',
 '../data/noisefree/lowmotion/sub-HCP/dwi/sub-HCP_acq-noisefree_run-lowmotion_dwi.nii.gz',
 '../data/noisefree/lowmotion/sub-ABCD/dwi/sub-ABCD_acq-noisefree_run-lowmotion_dwi.nii.gz',
 '../data/noisefree/lowmotion/sub-DSIQ5/dwi/sub-DSIQ5_acq-noisefree_run-lowmotion_dwi.nii.gz',
 '../data/noisefree/nomotion/sub-HASC55/dwi/sub-HASC55_acq-noisefree_run-nomotion_dwi.nii.gz',
 '../data/noisefree/nomotion/sub-HCP/dwi/sub-HCP_acq-noisefree_run-nomotion_dwi.nii.gz',
 '../data/noisefree/nomotion/sub-ABCD/dwi/sub-ABCD_acq-noisefree_run-nomotion_dwi.nii.gz',
 '../data/noisefree/nomotion/sub-DSIQ5/dwi/sub-DSIQ5_acq-noisefree_run-nomotion_dwi.nii.gz',
 '../data/realistic/lowmotion/sub-HASC55/dwi/sub-HASC55_acq-realistic_run-lowmotion_dwi.nii.gz',
 '../data/realistic/lowmotion/sub-HCP/dwi/sub-HCP_acq-realistic_run-lowmotion_dwi.nii.gz',
 '../data/realistic/lowmotion/sub-ABCD/dwi/sub-ABCD_acq-realistic_ru

In [7]:
rows = []
for x in dwi:
    
    fields = x.split("/")
    noise = fields[2]
    motion = fields[3]
    sub = fields[4].replace("sub-", "")
    row = {"noise": noise, "motion": motion, "subject": sub, "image":x}
    rows.append(row)

In [8]:
df = pd.DataFrame(rows).sort_values("subject").reset_index(drop=True)
df

Unnamed: 0,image,motion,noise,subject
0,../data/noisefree/lowmotion/sub-ABCD/dwi/sub-A...,lowmotion,noisefree,ABCD
1,../data/noisefree/nomotion/sub-ABCD/dwi/sub-AB...,nomotion,noisefree,ABCD
2,../data/realistic/lowmotion/sub-ABCD/dwi/sub-A...,lowmotion,realistic,ABCD
3,../data/realistic/nomotion/sub-ABCD/dwi/sub-AB...,nomotion,realistic,ABCD
4,../data/noisefree/lowmotion/sub-DSIQ5/dwi/sub-...,lowmotion,noisefree,DSIQ5
5,../data/noisefree/nomotion/sub-DSIQ5/dwi/sub-D...,nomotion,noisefree,DSIQ5
6,../data/realistic/lowmotion/sub-DSIQ5/dwi/sub-...,lowmotion,realistic,DSIQ5
7,../data/realistic/nomotion/sub-DSIQ5/dwi/sub-D...,nomotion,realistic,DSIQ5
8,../data/noisefree/lowmotion/sub-HASC55/dwi/sub...,lowmotion,noisefree,HASC55
9,../data/noisefree/nomotion/sub-HASC55/dwi/sub-...,nomotion,noisefree,HASC55


In [9]:
outputs_10per_images = {}
outputs_10per_indeces = {}
outputs_25per_images = {}
outputs_25per_indeces = {}

for x in range(len(df))[0::2]:
    name = df.loc[x+1,'subject'] + "_" + df.loc[x+1,'noise']
    outputs_10per_images[name], outputs_10per_indeces[name] = simulate_motion(df.loc[x+1,'image'], df.loc[x,'image'], 0.1, 20, df.loc[x+1,'subject'], df.loc[x+1,'noise'])
    outputs_25per_images[name], outputs_25per_indeces[name] = simulate_motion(df.loc[x+1,'image'], df.loc[x,'image'], 0.25, 20, df.loc[x+1,'subject'], df.loc[x+1,'noise'])

reading in images
number of volumes to sample from: 103
first permuation...
permuation 1
permuation 2
permuation 3
permuation 4
permuation 5
permuation 6
permuation 7
permuation 8
permuation 9
permuation 10
permuation 11
permuation 12
permuation 13
permuation 14
permuation 15
permuation 16
permuation 17
permuation 18
permuation 19
reading in images
number of volumes to sample from: 103
first permuation...
permuation 1
permuation 2
permuation 3
permuation 4
permuation 5
permuation 6
permuation 7
permuation 8
permuation 9
permuation 10
permuation 11
permuation 12
permuation 13
permuation 14
permuation 15
permuation 16
permuation 17
permuation 18
permuation 19
reading in images
number of volumes to sample from: 103
first permuation...
permuation 1
permuation 2
permuation 3
permuation 4
permuation 5
permuation 6
permuation 7
permuation 8
permuation 9
permuation 10
permuation 11
permuation 12
permuation 13
permuation 14
permuation 15
permuation 16
permuation 17
permuation 18
permuation 19
r

In [10]:
outputs_10per_indeces

{'ABCD_noisefree': {(9, 67, 20, 31, 41, 94, 1, 65, 31, 62, 44),
  (10, 54, 4, 17, 56, 51, 69, 33, 30, 98, 45),
  (10, 59, 43, 13, 86, 67, 28, 78, 96, 35, 48),
  (12, 87, 87, 85, 66, 13, 32, 63, 45, 89, 70),
  (18, 101, 32, 41, 67, 29, 77, 65, 102, 38, 77),
  (20, 40, 11, 99, 34, 83, 69, 102, 90, 73, 75),
  (22, 54, 47, 87, 30, 84, 2, 1, 24, 13, 49),
  (25, 53, 37, 40, 75, 95, 84, 34, 47, 1, 90),
  (26, 85, 15, 72, 15, 27, 5, 48, 92, 31, 88),
  (29, 83, 96, 42, 69, 47, 100, 87, 51, 68, 84),
  (34, 89, 79, 80, 31, 67, 39, 30, 31, 87, 13),
  (36, 72, 60, 8, 18, 28, 21, 66, 63, 67, 41),
  (40, 100, 91, 60, 12, 12, 74, 38, 30, 45, 29),
  (47, 12, 39, 55, 84, 44, 44, 30, 10, 68, 64),
  (48, 6, 31, 9, 45, 24, 0, 63, 14, 89, 10),
  (58, 30, 102, 41, 52, 56, 21, 12, 38, 49, 100),
  (60, 102, 14, 66, 25, 74, 42, 25, 46, 25, 77),
  (62, 13, 44, 49, 30, 86, 38, 87, 62, 37, 3),
  (75, 51, 48, 50, 44, 82, 54, 66, 67, 65, 0),
  (93, 26, 69, 79, 88, 71, 22, 88, 44, 92, 67)},
 'ABCD_realistic': {(0, 75

In [11]:
for key, val in outputs_10per_indeces.items():
    print(key)
    x = pd.DataFrame(val)
    x.columns = ["volume_"+str(i) for i in x.columns]
    x.index = ["permuation_"+str(i) for i in x.index]
    print(x)
    name = key + "_10per.csv"
    x.to_csv("../output/{}".format(name))

ABCD_noisefree
               volume_0  volume_1  volume_2  volume_3  volume_4  volume_5  \
permuation_0         26        85        15        72        15        27   
permuation_1         93        26        69        79        88        71   
permuation_2         10        54         4        17        56        51   
permuation_3         29        83        96        42        69        47   
permuation_4         34        89        79        80        31        67   
permuation_5         20        40        11        99        34        83   
permuation_6         58        30       102        41        52        56   
permuation_7         18       101        32        41        67        29   
permuation_8         10        59        43        13        86        67   
permuation_9         40       100        91        60        12        12   
permuation_10         9        67        20        31        41        94   
permuation_11        62        13        44        49        

In [12]:
for key, val in outputs_25per_indeces.items():
    print(key)
    x = pd.DataFrame(val)
    x.columns = ["volume_"+str(i) for i in x.columns]
    x.index = ["permuation_"+str(i) for i in x.index]
    print(x)
    name = key + "_25per.csv"
    x.to_csv("../output/{}".format(name))

ABCD_noisefree
               volume_0  volume_1  volume_2  volume_3  volume_4  volume_5  \
permuation_0          6        38        97         7       102        36   
permuation_1         56        13        23        46        24        51   
permuation_2         76        78        64        84        68        89   
permuation_3         85        77        78        39        12        55   
permuation_4         63         5        90        63         3        97   
permuation_5         33        71        12        71        49        54   
permuation_6         46        85         9        11        64        53   
permuation_7         84        80        91        52        98        17   
permuation_8         69        45        69        91        14        20   
permuation_9         98        51        50        40        47         2   
permuation_10        16        26         5        27        44        91   
permuation_11        29        94         2        21        