In [1]:
#@title Input file(s) and parameters

## Upload the files

#@markdown * Open the ðŸ—€ Files panel to the left
#@markdown * Upload your query fasta file(s) in the root directory.
#@markdown * Optionally, adjust padding (residues added to trimmed domain sequence) if needed:
pad_n = 30 # @param {"type":"integer"}
pad_c = 30 # @param {"type":"integer"}
#@markdown * Adjust the minimum score (the default might result in false positives if the sequences are not necessirily rhodopsins):
min_score = 15 # @param {"type":"integer"}
#@markdown * â–¹ Run the workflow

import os
os.environ['PAD_N'] = str(pad_n)
os.environ['PAD_C'] = str(pad_c)
os.environ['MIN_SCORE'] = str(min_score)


In [2]:
#@title Checking the input

from pathlib import Path

fasta_files = list(Path('.').glob('*.fa*', case_sensitive=False))

if len(fasta_files) == 0:
    raise FileNotFoundError(f"No fasta files found in the folder")

In [3]:
#@title Install dependencies

%%bash

ENV_DIR=/content/mmb/envs/env

if [ ! -s /usr/bin/micromamba ]; then
    echo "Downloading micromamba"
    url=https://micro.mamba.pm/api/micromamba/linux-64/latest
    wget -qO- $url | tar -C /usr/ -xj bin/micromamba &
fi
if [ -z "$(command -v zstd)" ]; then
    echo "Installing zstd"
    apt install -y zstd &> /dev/null
fi
wait
if [ ! -d $ENV_DIR ]; then
    echo "Setting up the environmemt"
    mkdir -p $ENV_DIR
    url=https://github.com/BejaLab/opsintools/releases/download/v0.4-beta/opsintools-v0.4-beta_conda_env_linux-64.tar.zst
    wget -qO- $url | tar -I "zstd -T4" -xf - -C $ENV_DIR
    (cd $ENV_DIR && ./bin/conda-unpack)
fi

Downloading micromamba
Installing zstd
Setting up the environmemt


In [4]:
#@title Download reference data

import os, subprocess
from pathlib import Path

ROOT = "/content/mmb"
ENV = "env"

os.environ['MAMBA_ROOT_PREFIX'] = ROOT
os.environ['PATH'] = f":{ROOT}/bin:{ROOT}/condabin:/content/bin/:" + os.environ['PATH']
os.environ['CONDA_PREFIX'] = ROOT
os.environ['CONDA_SHLVL'] = '1'
os.environ['CONDA_DEFAULT_ENV'] = 'base'
os.environ['CONDA_PROMPT_MODIFIER'] = 'base'

if not os.path.exists("data"):
    print("Downloading the data")
    subprocess.run("micromamba run -n env opsindata -d data", shell=True, check=True)


Downloading the data


In [5]:
%%bash

shopt -s nullglob
shopt -s nocaseglob
fasta_files=(*.fa*)
for fasta_file in "${fasta_files[@]}"; do
    prefix=$(basename -- "$fasta_file")
    micromamba run -n env opsinmaphmm -i "$fasta_file" -t 2 -o "output/$prefix" -d data/*/* --pad-n "$PAD_N" --pad-c "$PAD_C" --min-score "$MIN_SCORE"
done

[38;20m2026-02-19 17:26:01 - INFO - Checking the input[0m
[38;20m2026-02-19 17:26:01 - INFO - Doing the searches[0m
[38;20m2026-02-19 17:26:06 - INFO - hmmsearch finished[0m
[38;20m2026-02-19 17:26:07 - INFO - Finished[0m


In [6]:
#@title Output

from google.colab import files
import shutil
from pprint import pprint

shutil.make_archive('opsinmaphmm_output', 'zip', 'output')
files.download("opsinmaphmm_output.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>