In [1]:
import nibabel as nb
import numpy as np
import os
import pandas as pd
import re
import math

In [2]:
# note the permutations we already have in perms
perms = set()

# the proportion of items in the array to sample
proportion = 0.1

# the number of permuations
times = 10

# the size of the array
n_vols = 11

# sample 
samp = tuple(np.random.randint(n_vols, size = math.ceil(proportion * n_vols)))

# add the sample to the set
perms.add(samp)

print("first sample:", samp)
for n in range(times):
    while samp in perms:
        print("getting a new sample")
        samp = tuple(np.random.randint(n_vols, size = math.ceil(proportion * n_vols)))
        print("new sample:", samp)
        print("new sample in set?:", samp in perms)

    perms.add(samp)
print(perms)

first sample: (4, 10)
getting a new sample
new sample: (8, 0)
new sample in set?: False
getting a new sample
new sample: (5, 6)
new sample in set?: False
getting a new sample
new sample: (4, 1)
new sample in set?: False
getting a new sample
new sample: (5, 0)
new sample in set?: False
getting a new sample
new sample: (10, 5)
new sample in set?: False
getting a new sample
new sample: (1, 5)
new sample in set?: False
getting a new sample
new sample: (5, 7)
new sample in set?: False
getting a new sample
new sample: (2, 2)
new sample in set?: False
getting a new sample
new sample: (5, 2)
new sample in set?: False
getting a new sample
new sample: (7, 4)
new sample in set?: False
{(4, 10), (10, 5), (5, 6), (8, 0), (5, 7), (5, 2), (7, 4), (1, 5), (5, 0), (2, 2), (4, 1)}


In [25]:
def simulate_motion(input_nomotion, input_motion, proportion, n_simulations, noise, subject):
    
    out = []
    template = '../output/sub-{subject}_acq-{noise}_run-{permutation}_dwi.nii.gz'
    
    # read in the two images
    print("reading in images")
    input_nomotion = nb.load(input_nomotion)
    input_motion = nb.load(input_motion)
    nomotion_arr = input_nomotion.get_fdata(dtype = np.float32)
    motion_arr = input_motion.get_fdata(dtype = np.float32)
    assert motion_arr.shape[-1] == nomotion_arr.shape[-1]

    # the number of volumes to sample from
    n_vols = nomotion_arr.shape[-1]
    print("number of volumes to sample from:", n_vols)

    # track the samples here
    samples = set()

    # first sample
    print("first permuation...")
    s_array = np.random.randint(n_vols, size = math.ceil(proportion * n_vols))
    s_tup = tuple(s_array)
    samples.add(s_tup)
    #print(s_tup)
    
    x = nomotion_arr.copy()
    x[...,s_array] = motion_arr[..., s_array]
    x = nb.Nifti1Image(x, input_nomotion.affine, header=input_nomotion.header)
    outfile = template.format(subject=subject, noise=noise, permutation=str(int(proportion*100)) + 'perc' + str(0).zfill(2))
    x.to_filename(outfile)
    out.append(outfile)

    for n in range(n_simulations-1):
        print("permuation", n+1)
        # now look for a new sample; break the loop when the sample is not in set
        while s_tup in samples:
            #print("getting a new sample")
            s_array = np.random.randint(n_vols, size = math.ceil(proportion * n_vols))
            s_tup = tuple(s_array)
            #print("new sample:", s_tup)
            #print("new sample in set?:", s_tup in samples)

        samples.add(s_tup)

        x = nomotion_arr.copy()
        x[...,s_array] = motion_arr[..., s_array]
        x = nb.Nifti1Image(x, input_nomotion.affine, header=input_nomotion.header)
        outfile = template.format(subject=subject, noise=noise, permutation=str(int(proportion*100))+'perc' + str(n+1).zfill(2))
        x.to_filename(outfile)
        out.append(outfile)
    return out, samples

In [18]:
img2 = '../data/realistic/lowmotion/sub-DSIQ5/dwi/sub-DSIQ5_acq-realistic_run-lowmotion_dwi.nii.gz'
img1 = '../data/realistic/nomotion/sub-DSIQ5/dwi/sub-DSIQ5_acq-realistic_run-nomotion_dwi.nii.gz'

simulate_motion(img1, img2, 0.2, 3, 'realistic', 'tester')

reading in images
number of volumes to sample from: 279
first permuation:
(248, 118, 46, 25, 163, 64, 246, 218, 125, 188, 155, 34, 172, 57, 0, 170, 154, 179, 265, 41, 57, 148, 107, 38, 184, 180, 96, 33, 5, 202, 240, 157, 167, 242, 201, 231, 44, 147, 213, 277, 47, 203, 166, 195, 11, 178, 263, 61, 257, 105, 102, 157, 3, 58, 189, 273)
permuation 0
getting a new sample
new sample: (60, 265, 219, 42, 204, 190, 133, 208, 138, 71, 84, 266, 122, 273, 176, 163, 16, 96, 223, 146, 226, 149, 261, 23, 56, 59, 260, 24, 141, 49, 257, 277, 111, 62, 8, 57, 235, 231, 202, 27, 57, 257, 161, 1, 211, 40, 255, 57, 127, 7, 80, 180, 79, 41, 240, 249)
new sample in set?: False
permuation 1
getting a new sample
new sample: (79, 186, 6, 198, 227, 120, 39, 37, 94, 159, 38, 71, 111, 248, 243, 206, 0, 139, 247, 148, 208, 140, 84, 246, 9, 224, 141, 28, 103, 150, 198, 54, 165, 153, 94, 3, 92, 197, 267, 84, 101, 139, 80, 146, 226, 153, 226, 143, 102, 122, 224, 126, 89, 174, 15, 30)
new sample in set?: False


(['../output/subject-tester_acq-realistic_run-20perc00_dwi.nii.gz',
  '../output/subject-tester_acq-realistic_run-20perc01_dwi.nii.gz',
  '../output/subject-tester_acq-realistic_run-20perc02_dwi.nii.gz'],
 {(60,
   265,
   219,
   42,
   204,
   190,
   133,
   208,
   138,
   71,
   84,
   266,
   122,
   273,
   176,
   163,
   16,
   96,
   223,
   146,
   226,
   149,
   261,
   23,
   56,
   59,
   260,
   24,
   141,
   49,
   257,
   277,
   111,
   62,
   8,
   57,
   235,
   231,
   202,
   27,
   57,
   257,
   161,
   1,
   211,
   40,
   255,
   57,
   127,
   7,
   80,
   180,
   79,
   41,
   240,
   249),
  (79,
   186,
   6,
   198,
   227,
   120,
   39,
   37,
   94,
   159,
   38,
   71,
   111,
   248,
   243,
   206,
   0,
   139,
   247,
   148,
   208,
   140,
   84,
   246,
   9,
   224,
   141,
   28,
   103,
   150,
   198,
   54,
   165,
   153,
   94,
   3,
   92,
   197,
   267,
   84,
   101,
   139,
   80,
   146,
   226,
   153,
   226,
   143,
   102,
 

In [27]:
dwi = []
for root, dirs, files in os.walk("../data"):
    for file in files:
        if file.endswith('dwi.nii.gz'):
            dwi.append(root+"/"+file)

In [28]:
dwi

['../data/noisefree/lowmotion/sub-HASC55/dwi/sub-HASC55_acq-noisefree_run-lowmotion_dwi.nii.gz',
 '../data/noisefree/lowmotion/sub-HCP/dwi/sub-HCP_acq-noisefree_run-lowmotion_dwi.nii.gz',
 '../data/noisefree/lowmotion/sub-ABCD/dwi/sub-ABCD_acq-noisefree_run-lowmotion_dwi.nii.gz',
 '../data/noisefree/lowmotion/sub-DSIQ5/dwi/sub-DSIQ5_acq-noisefree_run-lowmotion_dwi.nii.gz',
 '../data/noisefree/nomotion/sub-HASC55/dwi/sub-HASC55_acq-noisefree_run-nomotion_dwi.nii.gz',
 '../data/noisefree/nomotion/sub-HCP/dwi/sub-HCP_acq-noisefree_run-nomotion_dwi.nii.gz',
 '../data/noisefree/nomotion/sub-ABCD/dwi/sub-ABCD_acq-noisefree_run-nomotion_dwi.nii.gz',
 '../data/noisefree/nomotion/sub-DSIQ5/dwi/sub-DSIQ5_acq-noisefree_run-nomotion_dwi.nii.gz',
 '../data/realistic/lowmotion/sub-HASC55/dwi/sub-HASC55_acq-realistic_run-lowmotion_dwi.nii.gz',
 '../data/realistic/lowmotion/sub-HCP/dwi/sub-HCP_acq-realistic_run-lowmotion_dwi.nii.gz',
 '../data/realistic/lowmotion/sub-ABCD/dwi/sub-ABCD_acq-realistic_ru

In [29]:
rows = []
for x in dwi:
    
    fields = x.split("/")
    noise = fields[2]
    motion = fields[3]
    sub = fields[4].replace("sub-", "")
    row = {"noise": noise, "motion": motion, "subject": sub, "image":x}
    rows.append(row)

In [30]:
df = pd.DataFrame(rows).sort_values("subject").reset_index(drop=True)
df

Unnamed: 0,image,motion,noise,subject
0,../data/noisefree/lowmotion/sub-ABCD/dwi/sub-A...,lowmotion,noisefree,ABCD
1,../data/noisefree/nomotion/sub-ABCD/dwi/sub-AB...,nomotion,noisefree,ABCD
2,../data/realistic/lowmotion/sub-ABCD/dwi/sub-A...,lowmotion,realistic,ABCD
3,../data/realistic/nomotion/sub-ABCD/dwi/sub-AB...,nomotion,realistic,ABCD
4,../data/noisefree/lowmotion/sub-DSIQ5/dwi/sub-...,lowmotion,noisefree,DSIQ5
5,../data/noisefree/nomotion/sub-DSIQ5/dwi/sub-D...,nomotion,noisefree,DSIQ5
6,../data/realistic/lowmotion/sub-DSIQ5/dwi/sub-...,lowmotion,realistic,DSIQ5
7,../data/realistic/nomotion/sub-DSIQ5/dwi/sub-D...,nomotion,realistic,DSIQ5
8,../data/noisefree/lowmotion/sub-HASC55/dwi/sub...,lowmotion,noisefree,HASC55
9,../data/noisefree/nomotion/sub-HASC55/dwi/sub-...,nomotion,noisefree,HASC55


In [31]:
outputs_10per_images = {}
outputs_10per_indeces = {}
outputs_25per_images = {}
outputs_25per_indeces = {}

for x in range(len(df))[0::2]:
    name = df.loc[x+1,'subject'] + "_" + df.loc[x+1,'noise']
    outputs_10per_images[name], outputs_10per_indeces[name] = simulate_motion(df.loc[x+1,'image'], df.loc[x,'image'], 0.1, 20, df.loc[x+1,'subject'], df.loc[x+1,'noise'])
    outputs_25per_images[name], outputs_25per_indeces[name] = simulate_motion(df.loc[x+1,'image'], df.loc[x,'image'], 0.25, 20, df.loc[x+1,'subject'], df.loc[x+1,'noise'])

reading in images
number of volumes to sample from: 103
first permuation...
permuation 1
permuation 2
permuation 3
permuation 4
permuation 5
permuation 6
permuation 7
permuation 8
permuation 9
permuation 10
permuation 11
permuation 12
permuation 13
permuation 14
permuation 15
permuation 16
permuation 17
permuation 18
permuation 19
reading in images
number of volumes to sample from: 103
first permuation...
permuation 1
permuation 2
permuation 3
permuation 4
permuation 5
permuation 6
permuation 7
permuation 8
permuation 9
permuation 10
permuation 11
permuation 12
permuation 13
permuation 14
permuation 15
permuation 16
permuation 17
permuation 18
permuation 19
reading in images
number of volumes to sample from: 103
first permuation...
permuation 1
permuation 2
permuation 3
permuation 4
permuation 5
permuation 6
permuation 7
permuation 8
permuation 9
permuation 10
permuation 11
permuation 12
permuation 13
permuation 14
permuation 15
permuation 16
permuation 17
permuation 18
permuation 19
r

In [32]:
outputs_10per_indeces

{'ABCD_noisefree': {(0, 100, 81, 36, 99, 72, 0, 48, 83, 29, 56),
  (1, 27, 50, 19, 25, 98, 33, 98, 92, 15, 52),
  (11, 20, 100, 27, 17, 98, 63, 20, 2, 19, 42),
  (14, 76, 75, 18, 75, 1, 23, 63, 97, 24, 89),
  (24, 32, 29, 55, 24, 74, 26, 34, 15, 56, 22),
  (27, 73, 78, 4, 30, 11, 80, 87, 76, 27, 61),
  (28, 9, 72, 100, 31, 21, 42, 70, 61, 33, 90),
  (36, 90, 72, 49, 65, 33, 64, 12, 94, 76, 66),
  (57, 48, 93, 28, 40, 61, 59, 44, 73, 92, 10),
  (62, 89, 28, 53, 80, 72, 86, 39, 51, 13, 59),
  (67, 80, 31, 102, 10, 42, 95, 63, 71, 32, 31),
  (71, 14, 13, 63, 43, 69, 82, 66, 95, 67, 94),
  (73, 68, 79, 69, 33, 80, 30, 33, 22, 20, 99),
  (74, 65, 100, 43, 87, 85, 21, 48, 26, 78, 72),
  (75, 6, 1, 37, 66, 85, 91, 90, 20, 89, 71),
  (82, 68, 21, 16, 72, 25, 40, 90, 16, 12, 24),
  (84, 64, 4, 78, 34, 59, 23, 94, 58, 90, 68),
  (89, 14, 22, 4, 75, 10, 22, 81, 10, 44, 39),
  (96, 59, 39, 19, 2, 55, 66, 50, 79, 28, 63),
  (102, 12, 26, 81, 2, 23, 56, 22, 95, 63, 31)},
 'ABCD_realistic': {(5, 20, 

In [39]:
for key, val in outputs_10per_indeces.items():
    print(key)
    x = pd.DataFrame(val)
    x.columns = ["volume_"+str(i) for i in x.columns]
    x.index = ["permuation_"+str(i) for i in x.index]
    print(x)
    name = key + "_10per.csv"
    x.to_csv("../output/{}".format(name))

ABCD_noisefree
               volume_0  volume_1  volume_2  volume_3  volume_4  volume_5  \
permuation_0         67        80        31       102        10        42   
permuation_1         82        68        21        16        72        25   
permuation_2          0       100        81        36        99        72   
permuation_3         84        64         4        78        34        59   
permuation_4         62        89        28        53        80        72   
permuation_5         89        14        22         4        75        10   
permuation_6         57        48        93        28        40        61   
permuation_7          1        27        50        19        25        98   
permuation_8         14        76        75        18        75         1   
permuation_9         28         9        72       100        31        21   
permuation_10        27        73        78         4        30        11   
permuation_11        96        59        39        19        

In [40]:
for key, val in outputs_25per_indeces.items():
    print(key)
    x = pd.DataFrame(val)
    x.columns = ["volume_"+str(i) for i in x.columns]
    x.index = ["permuation_"+str(i) for i in x.index]
    print(x)
    name = key + "_25per.csv"
    x.to_csv("../output/{}".format(name))

ABCD_noisefree
               volume_0  volume_1  volume_2  volume_3  volume_4  volume_5  \
permuation_0        100        97        58        70        57        32   
permuation_1          2        27        14        59        11        29   
permuation_2         77        42        93        70        85        94   
permuation_3         80        87        69        55         5        85   
permuation_4         24        96        41        10        51        19   
permuation_5         37        92         0        61        13        90   
permuation_6         69        32        67        49         0        41   
permuation_7         64        34        69        66        17        30   
permuation_8         36        20        59        25        55        19   
permuation_9        100        75        61        16        20        53   
permuation_10        47        77        16        20        59        41   
permuation_11       101       100        67        30        