In [1]:
# First, import the necessary modules and functions
import os
from pprint import pprint

from repo2data.repo2data import Repo2Data

import nimare

# Install the data if running locally, or points to cached data if running on neurolibre
DATA_REQ_FILE = os.path.join("../binder/data_requirement.json")
FIG_DIR = os.path.abspath("../images")

# Download data
repo2data = Repo2Data(DATA_REQ_FILE)
data_path = repo2data.install()
data_path = os.path.join(data_path[0], "data")

---- repo2data starting ----
/home/data/nbc/misc-projects/Salo_NiMARE/conda_env/lib/python3.7/site-packages/repo2data
Config from file :
../binder/data_requirement.json
Destination:
./../data/nimare-paper

Info : ./../data/nimare-paper already downloaded


In [2]:
from nimare import io

sleuth_dset1 = nimare.io.convert_sleuth_to_dataset(
    os.path.join(data_path, "contrast-CannabisMinusControl_space-talairach_sleuth.txt")
)
sleuth_dset2 = nimare.io.convert_sleuth_to_dataset(
    os.path.join(data_path, "contrast-ControlMinusCannabis_space-talairach_sleuth.txt")
)
print(sleuth_dset1)
print(sleuth_dset2)

# Save the Datasets to files for future use
if not os.path.isfile(os.path.join(data_path, "sleuth_dset1.pkl.gz")):
    sleuth_dset1.save(os.path.join(data_path, "sleuth_dset1.pkl.gz"))

if not os.path.isfile(os.path.join(data_path, "sleuth_dset2.pkl.gz")):
    sleuth_dset2.save(os.path.join(data_path, "sleuth_dset2.pkl.gz"))

Dataset(41 experiments, space='ale_2mm')
Dataset(41 experiments, space='ale_2mm')


In [3]:
from nimare import extract

# Download the desired version of Neurosynth from GitHub.
files = extract.fetch_neurosynth(
    data_dir=data_path,
    version="7",
    source="abstract",
    vocab="terms",
    overwrite=False,
)
pprint(files)
neurosynth_db = files[0]

# Convert the files to a Dataset.
# This may take a while (~10 minutes)
neurosynth_dset = io.convert_neurosynth_to_dataset(
    coordinates_file=neurosynth_db["coordinates"],
    metadata_file=neurosynth_db["metadata"],
    annotations_files=neurosynth_db["features"],
)
print(neurosynth_dset)

# Save the Dataset for later use.
if not os.path.isfile(os.path.join(data_path, "neurosynth_dataset.pkl.gz")):
    neurosynth_dset.save(os.path.join(data_path, "neurosynth_dataset.pkl.gz"))

INFO:nimare.extract.utils:Dataset found in ./../data/nimare-paper/data/neurosynth



INFO:nimare.extract.extract:Searching for any feature files matching the following criteria: [('source-abstract', 'vocab-terms', 'data-neurosynth', 'version-7')]


Downloading data-neurosynth_version-7_coordinates.tsv.gz


File exists and overwrite is False. Skipping.
Downloading data-neurosynth_version-7_metadata.tsv.gz


File exists and overwrite is False. Skipping.
Downloading data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz


File exists and overwrite is False. Skipping.
Downloading data-neurosynth_version-7_vocab-terms_vocabulary.txt


File exists and overwrite is False. Skipping.
[{'coordinates': '/home/data/nbc/misc-projects/Salo_NiMARE/data/nimare-paper/data/neurosynth/data-neurosynth_version-7_coordinates.tsv.gz',
  'features': [{'features': '/home/data/nbc/misc-projects/Salo_NiMARE/data/nimare-paper/data/neurosynth/data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz',
                'vocabulary': '/home/data/nbc/misc-projects/Salo_NiMARE/data/nimare-paper/data/neurosynth/data-neurosynth_version-7_vocab-terms_vocabulary.txt'}],
  'metadata': '/home/data/nbc/misc-projects/Salo_NiMARE/data/nimare-paper/data/neurosynth/data-neurosynth_version-7_metadata.tsv.gz'}]




Dataset(14371 experiments, space='mni152_2mm')


In [4]:
neurosynth_dset_first_500 = neurosynth_dset.slice(neurosynth_dset.ids[:500])
print(neurosynth_dset)

if not os.path.isfile(os.path.join(data_path, "neurosynth_dataset_first500.pkl.gz")):
    neurosynth_dset_first_500.save(os.path.join(data_path, "neurosynth_dataset_first500.pkl.gz"))

Dataset(14371 experiments, space='mni152_2mm')


In [5]:
# Download the desired version of NeuroQuery from GitHub.
files = extract.fetch_neuroquery(
    data_dir=data_path,
    version="1",
    source="combined",
    vocab="neuroquery6308",
    type="tfidf",
    overwrite=False,
)
pprint(files)
neuroquery_db = files[0]

# Convert the files to a Dataset.
# This may take a while (~10 minutes)
neuroquery_dset = io.convert_neurosynth_to_dataset(
    coordinates_file=neuroquery_db["coordinates"],
    metadata_file=neuroquery_db["metadata"],
    annotations_files=neuroquery_db["features"],
)
print(neuroquery_dset)

# Save the Dataset for later use.
if not os.path.isfile(os.path.join(data_path, "neuroquery_dataset.pkl.gz")):
    neuroquery_dset.save(os.path.join(data_path, "neuroquery_dataset.pkl.gz"))

INFO:nimare.extract.utils:Dataset found in ./../data/nimare-paper/data/neuroquery



INFO:nimare.extract.extract:Searching for any feature files matching the following criteria: [('source-combined', 'vocab-neuroquery6308', 'type-tfidf', 'data-neuroquery', 'version-1')]


Downloading data-neuroquery_version-1_coordinates.tsv.gz


File exists and overwrite is False. Skipping.
Downloading data-neuroquery_version-1_metadata.tsv.gz


File exists and overwrite is False. Skipping.
Downloading data-neuroquery_version-1_vocab-neuroquery6308_source-combined_type-tfidf_features.npz


File exists and overwrite is False. Skipping.
Downloading data-neuroquery_version-1_vocab-neuroquery6308_vocabulary.txt


File exists and overwrite is False. Skipping.
[{'coordinates': '/home/data/nbc/misc-projects/Salo_NiMARE/data/nimare-paper/data/neuroquery/data-neuroquery_version-1_coordinates.tsv.gz',
  'features': [{'features': '/home/data/nbc/misc-projects/Salo_NiMARE/data/nimare-paper/data/neuroquery/data-neuroquery_version-1_vocab-neuroquery6308_source-combined_type-tfidf_features.npz',
                'vocabulary': '/home/data/nbc/misc-projects/Salo_NiMARE/data/nimare-paper/data/neuroquery/data-neuroquery_version-1_vocab-neuroquery6308_vocabulary.txt'}],
  'metadata': '/home/data/nbc/misc-projects/Salo_NiMARE/data/nimare-paper/data/neuroquery/data-neuroquery_version-1_metadata.tsv.gz'}]






Dataset(13459 experiments, space='mni152_2mm')
