In [1]:
from pathlib import Path

from bio_mcp.cache.load import load_biotools, load_galaxy_singularity
from bio_mcp.mcp.server import search_containers, describe_container

## Loading snapshots

Galaxy containers from the CVMFS and tool metadata from e.g. biotools are indexed by lowercased tool names so they can be mapped together.

In [2]:
bt = load_biotools(Path("/home/ubuntu/bio-mcp/data/scrnaseq_biotools.yaml"))
gx = load_galaxy_singularity(
    Path("/home/ubuntu/bio-mcp/data/scrnaseq_galaxy_cvmfs.json")
)

In [3]:
g = gx.get("cellbender")
g

[{'entry_name': 'cellbender:0.3.0--pyhdfd78af_0',
  'tool_name': 'cellbender',
  'tag': '0.3.0--pyhdfd78af_0',
  'path': '/cvmfs/singularity.galaxyproject.org/all/cellbender:0.3.0--pyhdfd78af_0',
  'size_bytes': 719122432,
  'mtime': 1703918753.0},
 {'entry_name': 'cellbender:0.3.2--pyhdfd78af_0',
  'tool_name': 'cellbender',
  'tag': '0.3.2--pyhdfd78af_0',
  'path': '/cvmfs/singularity.galaxyproject.org/all/cellbender:0.3.2--pyhdfd78af_0',
  'size_bytes': 781340672,
  'mtime': 1723531319.0}]

In [4]:
bt

{'cellbender': {'biocontainers': None,
  'biotools': 'CellBender',
  'bunya': ['0.3.0'],
  'description': 'a deep generative model for unsupervised removal of background noise from scRNA-seq datasets.\n\nCellBender is a software package for eliminating technical artifacts from high-throughput single-cell RNA sequencing (scRNA-seq) data.\n\nWelcome to CellBender’s documentation! — CellBender documentation.\n\nFree document hosting provided by Read the Docs.\n\nStephen J Fleming, John C Marioni, and Mehrtash Babadi. CellBender remove-background: a deep generative model for unsupervised removal of background noise from scRNA-seq datasets. bioRxiv 791699; doi: https://doi.org/10.1101/791699.',
  'edam-inputs': [],
  'edam-operations': ['Quantification',
   'Data retrieval',
   'Expression analysis'],
  'edam-outputs': [],
  'edam-topics': ['Gene expression', 'RNA', 'RNA-Seq'],
  'galaxy': None,
  'homepage': 'https://github.com/broadinstitute/CellBender',
  'id': 'cellbender',
  'license':

In [5]:
gx.keys()

dict_keys(['fastqc', 'multiqc', 'r-seurat', 'r-seurat-scripts', 'scanpy-scripts', 'scanpy', 'seurat-scripts', 'cellbender', 'r-seurat-data', 'r-seurat-disk', 'scanpy-cli'])

## `search_containers`

In [None]:
search_containers("bcftools", gx)  # Should return []

[]

In [4]:
search_containers("seurat", gx)

[[{'entry_name': 'r-seurat:2.3.4--r341h2d50403_0',
   'tool_name': 'r-seurat',
   'tag': '2.3.4--r341h2d50403_0',
   'path': '/cvmfs/singularity.galaxyproject.org/all/r-seurat:2.3.4--r341h2d50403_0',
   'size_bytes': 338092032,
   'mtime': 1566654088.0},
  {'entry_name': 'r-seurat:1.4.0.16--r341h2d50403_1',
   'tool_name': 'r-seurat',
   'tag': '1.4.0.16--r341h2d50403_1',
   'path': '/cvmfs/singularity.galaxyproject.org/all/r-seurat:1.4.0.16--r341h2d50403_1',
   'size_bytes': 316043264,
   'mtime': 1566653712.0},
  {'entry_name': 'r-seurat:2.3.4--r351h9d2a408_1',
   'tool_name': 'r-seurat',
   'tag': '2.3.4--r351h9d2a408_1',
   'path': '/cvmfs/singularity.galaxyproject.org/all/r-seurat:2.3.4--r351h9d2a408_1',
   'size_bytes': 245907456,
   'mtime': 1566654342.0},
  {'entry_name': 'r-seurat:2.3.4--r341h9d2a408_1',
   'tool_name': 'r-seurat',
   'tag': '2.3.4--r341h9d2a408_1',
   'path': '/cvmfs/singularity.galaxyproject.org/all/r-seurat:2.3.4--r341h9d2a408_1',
   'size_bytes': 231788544

In [5]:
search_containers("cellranger", gx)  # near match

[[{'entry_name': 'cellbender:0.3.0--pyhdfd78af_0',
   'tool_name': 'cellbender',
   'tag': '0.3.0--pyhdfd78af_0',
   'path': '/cvmfs/singularity.galaxyproject.org/all/cellbender:0.3.0--pyhdfd78af_0',
   'size_bytes': 719122432,
   'mtime': 1703918753.0},
  {'entry_name': 'cellbender:0.3.2--pyhdfd78af_0',
   'tool_name': 'cellbender',
   'tag': '0.3.2--pyhdfd78af_0',
   'path': '/cvmfs/singularity.galaxyproject.org/all/cellbender:0.3.2--pyhdfd78af_0',
   'size_bytes': 781340672,
   'mtime': 1723531319.0}]]

In [6]:
search_containers("multiqc", gx)

[[{'entry_name': 'multiqc:1.3--py35_1',
   'tool_name': 'multiqc',
   'tag': '1.3--py35_1',
   'path': '/cvmfs/singularity.galaxyproject.org/all/multiqc:1.3--py35_1',
   'size_bytes': 453115904,
   'mtime': 1567308610.0},
  {'entry_name': 'multiqc:1.0--py36_1',
   'tool_name': 'multiqc',
   'tag': '1.0--py36_1',
   'path': '/cvmfs/singularity.galaxyproject.org/all/multiqc:1.0--py36_1',
   'size_bytes': 330293248,
   'mtime': 1567309386.0},
  {'entry_name': 'multiqc:1.0--py35_1',
   'tool_name': 'multiqc',
   'tag': '1.0--py35_1',
   'path': '/cvmfs/singularity.galaxyproject.org/all/multiqc:1.0--py35_1',
   'size_bytes': 329994240,
   'mtime': 1567302753.0},
  {'entry_name': 'multiqc:1.0--py36_4',
   'tool_name': 'multiqc',
   'tag': '1.0--py36_4',
   'path': '/cvmfs/singularity.galaxyproject.org/all/multiqc:1.0--py36_4',
   'size_bytes': 404537344,
   'mtime': 1566114554.0},
  {'entry_name': 'multiqc:1.0--py27_4',
   'tool_name': 'multiqc',
   'tag': '1.0--py27_4',
   'path': '/cvmfs/s

In [8]:
search_containers(["cellbender", "multiqc"], gx)

[[{'entry_name': 'cellbender:0.3.0--pyhdfd78af_0',
   'tool_name': 'cellbender',
   'tag': '0.3.0--pyhdfd78af_0',
   'path': '/cvmfs/singularity.galaxyproject.org/all/cellbender:0.3.0--pyhdfd78af_0',
   'size_bytes': 719122432,
   'mtime': 1703918753.0},
  {'entry_name': 'cellbender:0.3.2--pyhdfd78af_0',
   'tool_name': 'cellbender',
   'tag': '0.3.2--pyhdfd78af_0',
   'path': '/cvmfs/singularity.galaxyproject.org/all/cellbender:0.3.2--pyhdfd78af_0',
   'size_bytes': 781340672,
   'mtime': 1723531319.0}],
 [{'entry_name': 'multiqc:1.3--py35_1',
   'tool_name': 'multiqc',
   'tag': '1.3--py35_1',
   'path': '/cvmfs/singularity.galaxyproject.org/all/multiqc:1.3--py35_1',
   'size_bytes': 453115904,
   'mtime': 1567308610.0},
  {'entry_name': 'multiqc:1.0--py36_1',
   'tool_name': 'multiqc',
   'tag': '1.0--py36_1',
   'path': '/cvmfs/singularity.galaxyproject.org/all/multiqc:1.0--py36_1',
   'size_bytes': 330293248,
   'mtime': 1567309386.0},
  {'entry_name': 'multiqc:1.0--py35_1',
   't

## `describe_container()`

In [11]:
describe_container(gx, bt, "cellbender")

[{'container': 'cellbender',
  'versions': [{'entry_name': 'cellbender:0.3.0--pyhdfd78af_0',
    'tool_name': 'cellbender',
    'tag': '0.3.0--pyhdfd78af_0',
    'path': '/cvmfs/singularity.galaxyproject.org/all/cellbender:0.3.0--pyhdfd78af_0',
    'size_bytes': 719122432,
    'mtime': 1703918753.0},
   {'entry_name': 'cellbender:0.3.2--pyhdfd78af_0',
    'tool_name': 'cellbender',
    'tag': '0.3.2--pyhdfd78af_0',
    'path': '/cvmfs/singularity.galaxyproject.org/all/cellbender:0.3.2--pyhdfd78af_0',
    'size_bytes': 781340672,
    'mtime': 1723531319.0}],
  'metadata': {'biocontainers': None,
   'biotools': 'CellBender',
   'bunya': ['0.3.0'],
   'description': 'a deep generative model for unsupervised removal of background noise from scRNA-seq datasets.\n\nCellBender is a software package for eliminating technical artifacts from high-throughput single-cell RNA sequencing (scRNA-seq) data.\n\nWelcome to CellBender’s documentation! — CellBender documentation.\n\nFree document hosting 

In [12]:
for tool in ["cellbender"]:
    print(tool)

cellbender
