# Pybids transformers and generating BIDS stats models

Adapted from the slides made by Jeanette Mumford:
https://docs.google.com/presentation/d/1Bsfx9K4jz-YveUA4JpmqK-s1LnFNqFaGemEDbgFBoOc/edit#slide=id.g1269976e58a_0_85

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from os.path import join
from bids import BIDSLayout
from bids.tests import get_test_data_path

In [3]:
dataset = pd.DataFrame({
    "particiant_id": ["sub-01", "sub-02", "sub-03", "sub-04",],
    "sex": ["M", "M", "F", "F"],
    "age": [25, 18, 22, 25]
})
dataset

Unnamed: 0,particiant_id,sex,age
0,sub-01,M,25
1,sub-02,M,18
2,sub-03,F,22
3,sub-04,F,25


In [4]:
run = pd.DataFrame({
    "onset": [20, 37.5, 60, 180, 182.5, 230],
    "duration": [2, 2, 2, 2, 2, 2],
    "trial_type": ["word", "word", "word", "pseudoword", "pseudoword", "pseudoword"],
    "rt_pretend": [0.5, 0.6, 0.55, 0.5, 0.7, 0.8],
})
run

Unnamed: 0,onset,duration,trial_type,rt_pretend
0,20.0,2,word,0.5
1,37.5,2,word,0.6
2,60.0,2,word,0.55
3,180.0,2,pseudoword,0.5
4,182.5,2,pseudoword,0.7
5,230.0,2,pseudoword,0.8


Add amplitude as it seems necessary

In [5]:
run["amplitude"] = [1, 1, 1, 1, 1, 1]
print(run)

   onset  duration  trial_type  rt_pretend  amplitude
0   20.0         2        word        0.50          1
1   37.5         2        word        0.60          1
2   60.0         2        word        0.55          1
3  180.0         2  pseudoword        0.50          1
4  182.5         2  pseudoword        0.70          1
5  230.0         2  pseudoword        0.80          1


Tryng to manually convert to this to a collection...

But do we need all of this or can is there a moe light weight version?

**Also how do you do it on dataset level stuff?**

In [6]:
from bids.variables import SparseRunVariable
from bids.variables.entities import RunInfo
from bids.variables.collections import BIDSRunVariableCollection, BIDSVariableCollection
import numpy as np

run_info = [RunInfo({'subject': '01'}, 600, 2, 'dummy.nii.gz', 300)]
print(run_info)

var = SparseRunVariable(name='var', data=run, run_info=run_info, source='events')
collection = BIDSRunVariableCollection([var])

print(collection)

[RunInfo(entities={'subject': '01'}, duration=600, tr=2, image='dummy.nii.gz', n_vols=300)]
<bids.variables.collections.BIDSRunVariableCollection object at 0x7f8add1142e0>


---

## Factor

```json
{"Instructions":
 [
     {"Name": "Factor",
      "Input": "sex"}
 ]
}
```

In [7]:
from bids.modeling.transformations.munge import Factor

In [8]:
Factor?

[0;31mInit signature:[0m [0mFactor[0m[0;34m([0m[0mcollection[0m[0;34m,[0m [0mvariables[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      <no docstring>
[0;31mFile:[0m           ~/github/pybids/bids/modeling/transformations/munge.py
[0;31mType:[0m           ABCMeta
[0;31mSubclasses:[0m     


In [9]:
layout_path = join(get_test_data_path(), 'ds005')
layout = BIDSLayout(layout_path)
c = layout.get_collections('dataset', merge=True)

Factor(c, 'sex')

c.to_df().head()

Unnamed: 0,subject,age,sex.0,sex.1,suffix
0,1,28.0,1.0,0.0,participants
1,2,21.0,0.0,1.0,participants
2,3,27.0,0.0,1.0,participants
3,4,25.0,1.0,0.0,participants
4,5,20.0,0.0,1.0,participants


## Factor and product

```json
{"Instructions":
 [
     {"Name": "Factor",
      "Input": "sex"},
     {"Name": "Product",
      "Input": ["sex.1", "age"],
      "Output": "ageM"},
     {"Name": "Product",
      "Input": ["sex.0", "age"],
      "Output": "ageF"}
 ]
}
```

In [10]:
from bids.modeling.transformations.compute import Product

In [11]:
Product?

[0;31mInit signature:[0m [0mProduct[0m[0;34m([0m[0mcollection[0m[0;34m,[0m [0mvariables[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      <no docstring>
[0;31mFile:[0m           ~/github/pybids/bids/modeling/transformations/compute.py
[0;31mType:[0m           ABCMeta
[0;31mSubclasses:[0m     


In [12]:
layout_path = join(get_test_data_path(), 'ds005')
layout = BIDSLayout(layout_path)
c = layout.get_collections('dataset', merge=True)

Factor(c, 'sex')
Product(c, ["sex.1", "age"], output="ageM")
Product(c, ["sex.0", "age"], output="ageF")

c.to_df().head()

Unnamed: 0,subject,age,ageF,ageM,sex.0,sex.1,suffix
0,1,28.0,28.0,0.0,1.0,0.0,participants
1,2,21.0,0.0,21.0,0.0,1.0,participants
2,3,27.0,0.0,27.0,0.0,1.0,participants
3,4,25.0,25.0,0.0,1.0,0.0,participants
4,5,20.0,0.0,20.0,0.0,1.0,participants


## Threshold

```json
{"Instructions":
 [
     {"Name": "Threshold",
      "Input": "age",
      "Threshold": 20,
      "binarize": true,
      "Output": "age_gt_20"}
 ]
}
```

In [13]:
from bids.modeling.transformations.compute import Threshold

In [14]:
Threshold?

[0;31mInit signature:[0m [0mThreshold[0m[0;34m([0m[0mcollection[0m[0;34m,[0m [0mvariables[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Threshold and/or binarize a variable.

Parameters
----------
data :obj:`pandas.Series` or :obj:`pandas.DataFrame`
    The pandas structure to threshold.
threshold : float
    The value to binarize around (values above will
    be assigned 1, values below will be assigned 0).
binarize : bool
    If True, binarizes all non-zero values (i.e., every
    non-zero value will be set to 1).
above : bool
    Specifies which values to retain with respect to the
    cut-off. If True, all value above the threshold will be kept; if
    False, all values below the threshold will be kept. Defaults to
    True.
signed : bool
    Specifies whether to treat the threshold as signed
    (default) or unsigned. For example, when passing above=True and
    threshold

If “Binarize” is False (default) it zeros values below threshold.  Can also add “Above”: true if you want to reverse the threshsold. 

In [15]:
layout_path = join(get_test_data_path(), 'ds005')
layout = BIDSLayout(layout_path)
c = layout.get_collections('dataset', merge=True)

Threshold(c, "age", threshold=20, binarize=True, output="age_gt_20")

c.to_df().head()

Unnamed: 0,subject,age,age_gt_20,sex,suffix
0,1,28,1,0,participants
1,2,21,1,1,participants
2,3,27,1,1,participants
3,4,25,1,0,participants
4,5,20,1,1,participants


## Scale

```json
{"Instructions":
 [
     {"Name": "Scale",
      "Input": "age",
      "Output": "age_centered_scaled"},
     {"Name": "Scale",
      "Input": "age",
      "Demean": true,
      "Rescale": false,
      "Output": "age_centered_not_scaled"},
 ]
}
```

In [16]:
from bids.modeling.transformations.compute import Scale

In [17]:
Scale?

[0;31mInit signature:[0m [0mScale[0m[0;34m([0m[0mcollection[0m[0;34m,[0m [0mvariables[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Scale a variable.

Parameters
----------
data : :obj:`pandas.Series` or :obj:`pandas.DataFrame`
    The variables to scale.
demean : bool
    If True, demean each column.
rescale : bool
    If True, divide variables by their standard deviation.
replace_na : str
    Whether/when to replace missing values with 0. If
    None, no replacement is performed. If 'before', missing values are
    replaced with 0's before scaling. If 'after', missing values are
    replaced with 0 after scaling.

Notes
-----
If a constant column is passed in, and replace_na is None or 'before', an
exception will be raised.
[0;31mFile:[0m           ~/github/pybids/bids/modeling/transformations/compute.py
[0;31mType:[0m           ABCMeta
[0;31mSubclasses:[0m     


In [18]:
layout_path = join(get_test_data_path(), 'ds005')
layout = BIDSLayout(layout_path)
c = layout.get_collections('dataset', merge=True)

Scale(c, "age", output="age_centered_scaled")
Scale(c, "age", demean=True, rescale=False, output="age_centered_not_scaled")

c.to_df().head()

Unnamed: 0,subject,age,age_centered_not_scaled,age_centered_scaled,sex,suffix
0,1,28.0,5.9375,2.073992,0.0,participants
1,2,21.0,-1.0625,-0.371135,1.0,participants
2,3,27.0,4.9375,1.724688,1.0,participants
3,4,25.0,2.9375,1.02608,0.0,participants
4,5,20.0,-2.0625,-0.720439,1.0,participants


## And / Or / Not

```json
{"Instructions":
 [
     {"Name": "Factor",
      "Input": "sex"},
     {"Name": "Threshold",
      "Input": "age",
      "Threshold": 20,
      "binarize": true,
      "Output": "age_gt_20"}
 ]
}
```


In [19]:
from bids.modeling.transformations.compute import And_
from bids.modeling.transformations.compute import Or_
from bids.modeling.transformations.compute import Not

In [20]:
And_?

[0;31mInit signature:[0m [0mAnd_[0m[0;34m([0m[0mcollection[0m[0;34m,[0m [0mvariables[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Logical AND on two or more variables.

Parameters
----------
dfs : list of :obj:`pandas.DataFrame`
    variables to enter into the conjunction.
[0;31mFile:[0m           ~/github/pybids/bids/modeling/transformations/compute.py
[0;31mType:[0m           ABCMeta
[0;31mSubclasses:[0m     


In [21]:
layout_path = join(get_test_data_path(), 'ds005')
layout = BIDSLayout(layout_path)
c = layout.get_collections('dataset', merge=True)

Factor(c, 'sex')
Threshold(c, "age", threshold=20, binarize=True, output="age_gt_20")

And_(c, ["sex.1", "age_gt_20"], output="men_older_than_20")

c.to_df().head()

Unnamed: 0,subject,age,age_gt_20,men_older_than_20,sex.0,sex.1,suffix
0,1,28.0,1.0,0.0,1.0,0.0,participants
1,2,21.0,1.0,1.0,0.0,1.0,participants
2,3,27.0,1.0,1.0,0.0,1.0,participants
3,4,25.0,1.0,0.0,1.0,0.0,participants
4,5,20.0,1.0,1.0,0.0,1.0,participants


## Generating BIDS stats models

In [22]:
from bids.modeling import auto_model
import json

layout_path = join(get_test_data_path(), 'ds005')
layout = BIDSLayout(layout_path)

# because the test datasets of pybids have no images 
# we need to give it a dummy scan_length for this to run
model = auto_model(layout, scan_length=600, one_vs_rest=True)

with open("model-ds005_smdl.json", "w") as outfile:
    json.dump(model[0], outfile)

In [23]:
!cat model-ds005_smdl.json

{"Name": "ds005_mixedgamblestask", "Description": "Autogenerated model for the mixedgamblestask task from ds005", "Input": {"Task": "mixedgamblestask"}, "Nodes": [{"Level": "Run", "Name": "Run", "Transformations": [{"Name": "Factor", "Input": ["trial_type"]}, {"Name": "Convolve", "Input": ["trial_type.parametric gain"]}], "Model": {"X": ["trial_type.parametric gain"]}, "Contrasts": [{"Name": "run_parametric gain", "ConditionList": ["trial_type.parametric gain"], "Weights": [1.0], "Test": "t"}]}, {"Level": "Subject", "Name": "Subject", "Model": {"X": ["run_parametric gain"]}, "Contrasts": [{"Name": "subject_run_parametric gain", "ConditionList": ["run_parametric gain"], "Weights": [1], "Test": "FEMA"}]}, {"Level": "Dataset", "Name": "Dataset", "Model": {"X": ["subject_run_parametric gain"]}, "Contrasts": [{"Name": "dataset_subject_run_parametric gain", "ConditionList": ["subject_run_parametric gain"], "Weights": [1], "Test": "t"}]}]}