# Suprema Simulation


## Pip Install

In [None]:
!pip install dask
!pip install crtoolbox
!pip install numpy
!pip install matplotlib
!pip install distributed
!pip install pyblm

## Imports

In [None]:
# blm imports
from pyblm.blm import _main as blm
import yaml

# Dask imports
from dask.distributed import Client, as_completed
from dask_jobqueue import SLURMCluster

# Basic imports
import os
import sys
import shutil
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

# Import supporting functions
from crtoolbox.lib.boundary import *
from crtoolbox.lib.regression import *

# Import bootstrap functions
from crtoolbox.bootstrap import *

# Import data generation
from crtoolbox.tests.generate_2d_data import *

## Setup

In [None]:
# Set output directory
out_dir = '/well/nichols/users/inf852/sup_sim'

# If output directory does not exist, create it
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

## Ground Truth Analysis

Set up ground truth analysis.

In [None]:
# Ground truth directory
gt_dir = os.path.join(out_dir, 'ground_truth')

# If ground truth directory does not exist, create it
if not os.path.exists(gt_dir):
    os.makedirs(gt_dir)

# Construct the inputs.yaml for the ground truth analysis with the following parameters:
inputs = {
    "MAXMEM": '2**34',
    "M_files": "/users/nichols/inf852/Biobank1_Mfiles_2021.txt",
    "OutputCovB": '0',
    "X": "/users/nichols/inf852/Biobank1_X_2021.csv",
    "Y_files": "/users/nichols/inf852/Biobank1_Yfiles_2021.txt",
    "analysis_mask": "/well/win/software/packages/fsl/5.0.11/data/standard/MNI152_T1_2mm_brain_mask.nii.gz",
    "contrasts": [
        {
            "c1": {
                "name": "Tcontrast1",
                "vector": '[1, 0, 0, 0, 0, 0]'
            }
        }
    ],
    "outdir": gt_dir,
    "sim": "1",
}

# Write the inputs.yaml file
with open(os.path.join(gt_dir, 'inputs.yml'), 'w') as outfile:
    yaml.dump(inputs, outfile, default_flow_style=False)


Run BLM analysis

In [None]:
# Run the ground truth analysis
blm(os.path.join(gt_dir, 'inputs.yml'))

# Wait until 'nb.txt' exists in the ground truth directory
while not os.path.exists(os.path.join(gt_dir, 'nb.txt')):
    pass

# Wait until 'nb.txt' is removed from the ground truth directory
while os.path.exists(os.path.join(gt_dir, 'nb.txt')):
    pass

# The analysis should now be complete. We can now define the ground truth mu and sigma files
mu_file = os.path.join(gt_dir, 'blm_vox_beta.nii')

# Load the mu file
mu = nib.load(mu_file).get_fdata()

# Make sure the mu file is 3D by removing any singleton dimensions
mu = np.squeeze(mu[0, :, :, :])

# Save the mu file back to blm_vox_beta.nii
nib.save(nib.Nifti1Image(mu, np.eye(4)), mu_file)

## Run 1000 Random n=100 Analyses


In [None]:
# Set number of simulation instances
n_sim = 1#000

# Set number of subjects
n_sub = 1000

# Threshold c=1% BOLD signal change
c = 10

# Loop over simulation instances
for i in range(n_sim):

    # Set the output directory for this simulation instance
    sim_dir = os.path.join(out_dir, 'sim_{:04d}'.format(i))

    # If the output directory does not exist, create it
    if not os.path.exists(sim_dir): 
        os.makedirs(sim_dir)

    # Randomly select n_sub subject indices between 0 and 2021
    sub_idx = np.random.choice(2021, n_sub, replace=False)

    # Construct the Y list file by taking the lines of Biobank1_Yfiles_2021.txt corresponding to the selected subject indices
    with open("/users/nichols/inf852/Biobank1_Yfiles_2021.txt") as f:
        lines = f.readlines()

    with open(os.path.join(sim_dir, 'Yfiles.txt'), 'w') as f:
        for idx in sub_idx:
            f.write(lines[idx])

    # Construct the M list file by taking the lines of Biobank1_Mfiles_2021.txt corresponding to the selected subject indices
    with open("/users/nichols/inf852/Biobank1_Mfiles_2021.txt") as f:
        lines = f.readlines()

    with open(os.path.join(sim_dir, 'Mfiles.txt'), 'w') as f:
        for idx in sub_idx:
            f.write(lines[idx])

    # Construct the X file by taking the lines of Biobank1_X_2021.csv corresponding to the selected subject indices
    X = pd.read_csv("/users/nichols/inf852/Biobank1_X_2021.csv", header=None)
    X = X.iloc[sub_idx, :]
    X.to_csv(os.path.join(sim_dir, 'X.csv'), header=False, index=False)


    # Construct the inputs.yaml for this simulation instance with the following parameters:
    inputs = {
        "MAXMEM": '2**34',
        "M_files": os.path.join(sim_dir, 'Mfiles.txt'),
        "OutputCovB": '0',
        "X": os.path.join(sim_dir, 'X.csv'),
        "Y_files": os.path.join(sim_dir, 'Yfiles.txt'),
        "analysis_mask": "/well/win/software/packages/fsl/5.0.11/data/standard/MNI152_T1_2mm_brain_mask.nii.gz",
        "contrasts": [
            {
                "c1": {
                    "name": "Tcontrast1",
                    "vector": '[1, 0, 0, 0, 0, 0]'
                }
            }
        ],
        "outdir": sim_dir,
        "sim": "1",
    }

    # Write the inputs.yaml file
    with open(os.path.join(sim_dir, 'inputs.yml'), 'w') as outfile:
        yaml.dump(inputs, outfile, default_flow_style=False)

    # Run the simulation instance
    blm(os.path.join(sim_dir, 'inputs.yml'))

    # Wait until 'nb.txt' exists in the simulation instance directory
    while not os.path.exists(os.path.join(sim_dir, 'nb.txt')):
        pass

    # Wait until 'nb.txt' is removed from the simulation instance directory
    while os.path.exists(os.path.join(sim_dir, 'nb.txt')):
        pass

    # The analysis should now be complete. We can now define the muhat and sigma files
    betahat_file = os.path.join(gt_dir, 'blm_vox_beta.nii')

    # Read the betahat file
    betahat = nib.load(betahat_file).get_fdata()

    # Loop through Y files and construct residuals
    for j in range(n_sub):

        # Load the Y file by reading its name from the Yfiles.txt file as the jth line
        with open(os.path.join(sim_dir, 'Yfiles.txt')) as f:
            lines = f.readlines()

        Y_file = lines[j].strip()

        # Load the Y file
        Y = nib.load(Y_file).get_fdata()

        # Compute the residuals
        res = Y - X.reshape(X.shape + (1,1,1)) @ betahat_file # MARKER to fix

        # Residual file name
        res_fname = os.path.join(sim_dir, 'res_{:04d}.nii'.format(j))

        # Save the residual
        nib.save(nib.Nifti1Image(res, np.eye(4)), res_fname)
                 
        # Append the residual to the list of residuals
        if j == 0:
            res_fnames = [res_fname]
        else:
            res_fnames.append(res_fname)
        
