In [None]:
#@title Input file(s) and parameters

#@markdown ### To clean-up existing PDB files:

#@markdown * Open the ðŸ—€ Files panel to the left
#@markdown * Upload your pdb file(s) in the root directory

#@markdown ### To fetch and clean a protein structures from PDB:

accessions = "" # @param {"type":"string"}

#@markdown ### Settings:

#@markdown Comma-separated list of chains:
chains = "" # @param {"type":"string"}
#@markdown Comma-separated list of **non-covalent** ligands to **retain** (all removed by default):
retain_ligands = "" # @param {"type":"string"}
#@markdown Comma-separated list of **covalent** ligands to remove (none removed by default):
remove_ligands = "" # @param {"type":"string"}
#@markdown Re-map LYR atoms (split into LYS and RET):
remap_lyr = True # @param {"type":"boolean"}
#@markdown Remove hydrogen atoms
remove_h = True # @param {"type":"boolean"}
#@markdown Remove water molecules
remove_w = True # @param {"type":"boolean"}
#@markdown Remove alternative conformations
remove_alt = True # @param {"type":"boolean"}


import os, re
os.environ['ACCESSIONS'] = re.sub(r"\s", "", accessions.upper())
os.environ['CHAINS'] = re.sub(r"\s", "", chains.upper())
os.environ['RETAIN_LIGANDS'] = re.sub(r"\s", "", retain_ligands.upper())
os.environ['REMOVE_LIGANDS'] = re.sub(r"\s", "", remove_ligands.upper())
os.environ['REMAP_LYR'] = str(remap_lyr)
os.environ['REMOVE_H'] = str(remove_h)
os.environ['REMOVE_W'] = str(remove_w)
os.environ['REMOVE_ALT'] = str(remove_alt)
os.environ['SPLIT_LYR'] = str(split_lyr)


In [None]:
#@title Checking the input

from pathlib import Path

pdb_files = list(Path('.').glob('*.pdb', case_sensitive=False))

if len(pdb_files) == 0 and not accessions:
    raise FileNotFoundError(f"No PDB files found in the folder and no accession provided")

In [None]:
#@title Install dependencies

%%bash

ENV_DIR=/content/mmb/envs/env

if [ ! -s /usr/bin/micromamba ]; then
    echo "Downloading micromamba"
    url=https://micro.mamba.pm/api/micromamba/linux-64/latest
    wget -qO- $url | tar -C /usr/ -xj bin/micromamba &
fi
if [ -z "$(command -v zstd)" ]; then
    echo "Installing zstd"
    apt install -y zstd &> /dev/null
fi
wait
if [ ! -d $ENV_DIR ]; then
    echo "Setting up the environmemt"
    mkdir -p $ENV_DIR
    url=https://github.com/BejaLab/opsintools/releases/download/v0.4-beta/opsintools-v0.4-beta_conda_env_linux-64.tar.zst
    wget -qO- $url | tar -I "zstd -T4" -xf - -C $ENV_DIR
    (cd $ENV_DIR && ./bin/conda-unpack)
fi

In [None]:
#@title Download reference data

import os, subprocess
from pathlib import Path

ROOT = "/content/mmb"
ENV = "env"

os.environ['MAMBA_ROOT_PREFIX'] = ROOT
os.environ['PATH'] = f":{ROOT}/bin:{ROOT}/condabin:/content/bin/:" + os.environ['PATH']
os.environ['CONDA_PREFIX'] = ROOT
os.environ['CONDA_SHLVL'] = '1'
os.environ['CONDA_DEFAULT_ENV'] = 'base'
os.environ['CONDA_PROMPT_MODIFIER'] = 'base'

if not os.path.exists("data"):
    print("Downloading the data")
    subprocess.run("micromamba run -n env opsindata -d data", shell=True, check=True)


In [None]:
#@title Run the pipeline

%%bash

args=()
if [ ! -z "$CHAINS" ]; then
  args+=(-c "$CHAINS")
fi
if [ ! -z "$RETAIN_LIGANDS" ]; then
  args+=(--ligands "$RETAIN_LIGANDS")
fi
if [ ! -z "$REMOVE_LIGANDS" ]; then
  args+=(--ligands-remove "$REMOVE_LIGANDS")
fi
if [ "$REMAP_LYR" == False ]; then
  args+=(--L)
fi
if [ "$REMOVE_W" == False ]; then
  args+=(--L)
fi
if [ "$REMOVE_H" == False ]; then
  args+=(--H)
fi
if [ "$REMOVE_H" == False ]; then
  args+=(--W)
fi
if [ "$REMOVE_ALT" == False ]; then
  args+=(--A)
fi

mkdir -p output

shopt -s nullglob
shopt -s nocaseglob
pdb_files=(*.pdb)
for pdb_file in "${pdb_files[@]}"; do
    prefix=$(basename -- "$pdb_file")
    echo opsinpdb -i "$pdb_file" -o "output/$prefix" "${args[@]}"
    micromamba run -n env opsinpdb -i "$pdb_file" -o "output/$prefix" "${args[@]}"
done

IFS=, read -r -a accessions <<< "$ACCESSIONS"
for acc in "${accessions[@]}"; do
    micromamba run -n env opsinpdb -a "$acc" -o "output/$acc.pdb" "${args[@]}"
done


In [None]:
#@title Output

from google.colab import files
import shutil
from pprint import pprint

shutil.make_archive('opsinpdb_output', 'zip', 'output')
files.download("opsinpdb_output.zip")
