In [1]:
import pandas as pd
import requests
import json
from urllib.parse import urlparse
from tqdm import tqdm

# Step 1: Get the actual latest version tag
def get_latest_snapshot(dataset_id: str):
    graphql_url = "https://openneuro.org/crn/graphql"
    query = """
      query ($id: ID!) {
        dataset(id: $id) {
          snapshots {
            tag
          }
        }
      }
    """
    res = requests.post(graphql_url, json={"query": query, "variables": {"id": dataset_id}})
    res.raise_for_status()
    snaps = res.json()["data"]["dataset"]["snapshots"]
    if not snaps:
        raise Exception("No snapshots found.")
    latest = sorted([s["tag"] for s in snaps], reverse=True)[0]
    return latest

# Step 2: Fetch JSON file URLs from that version
def get_json_urls(dataset_id: str, version_tag: str):
    graphql_url = "https://openneuro.org/crn/graphql"
    query = """
      query ($id: ID!, $tag: String!) {
        snapshot(datasetId: $id, tag: $tag) {
          files {
            filename
            urls
          }
        }
      }
    """
    res = requests.post(graphql_url, json={"query": query, "variables": {"id": dataset_id, "tag": version_tag}})
    if res.status_code != 200:
        raise Exception(f"GraphQL error fetching version {version_tag}: {res.status_code}")
    files = res.json()["data"]["snapshot"]["files"]
    return {f["filename"]: f["urls"][0] for f in files if f["filename"].endswith(".json")}

# Utility: flatten nested JSON
def flatten_json(y):
    out = {}
    def _flatten(x, name=""):
        if isinstance(x, dict):
            for k,v in x.items():
                _flatten(v, name + k + ".")
        elif isinstance(x, list):
            for i,v in enumerate(x):
                _flatten(v, name + str(i) + ".")
        else:
            out[name[:-1]] = x
    _flatten(y)
    return out

# Main loader function
def jsons_to_dataframe(openneuro_url: str):
    parsed = urlparse(openneuro_url)
    parts = parsed.path.strip("/").split("/")
    dataset_id = parts[1]
    version = parts[3] if len(parts) > 3 and parts[2] == "versions" else None

    if version is None:
        version = get_latest_snapshot(dataset_id)
        print(f"No version specified — using latest: {version}")
    else:
        print(f"Using specified version: {version}")

    json_urls = get_json_urls(dataset_id, version)
    print(f"Found {len(json_urls)} JSON files for version {version}")

    records = []
    for fname, url in tqdm(json_urls.items(), desc="Downloading JSONs"):
        r = requests.get(url)
        r.raise_for_status()
        flat = flatten_json(r.json())
        flat["__file__"] = fname
        records.append(flat)

    df = pd.DataFrame(records).set_index("__file__")
    return df

In [2]:

# Example use:
df = jsons_to_dataframe("https://openneuro.org/datasets/ds005264")
df.head()


No version specified — using latest: 1.0.0
Found 2 JSON files for version 1.0.0


Downloading JSONs: 100%|██████████| 2/2 [00:00<00:00,  2.73it/s]


Unnamed: 0_level_0,Name,BIDSVersion,License,Authors.0,Authors.1,Authors.2,Authors.3,Authors.4,Authors.5,Authors.6,...,Handed.Description,Handed.Levels.R,Handed.Levels.L,Gender.Description,Gender.Levels.M,Gender.Levels.F,Gender.Levels.NB,Group.Description,Group.Levels.control,Group.Levels.pd
__file__,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
dataset_description.json,SoCal Kinesia and Incentivization for Parkinso...,1.8.0,CC0,Neil M. Dundon,Elizabeth Rizor,Joanne Stasiak,Jingyi Wang,Kiana Sabugo,Christina Villaneuva,Parker Barandon,...,,,,,,,,,,
participants.json,,,,,,,,,,,...,Handedness of the participant,Right,Left,Gender of the participant,Male,Female,Nonbinary,Experimental group of the participant,Healthy control group,Parkinson's Disease group
