In [1]:
#export
"""
This is for functions that are actually biology-related
"""
from k1lib.bioinfo.cli.init import settings, BaseCli
import k1lib.bioinfo.cli as cli
import os
from typing import Iterator
__all__ = ["go",
           "transcribe", "translate", "medAa", "longAa"]

In [2]:
#export
def go(term:int):
    """Looks up a GO term"""
    if settings["oboFile"] is None and not os.path.exists("go.obo"):
        answer = input("""No gene ontology obo file specified! You can:
- Specify the file using `settings['oboFile']='/some/folder/go.obo'`
- Download this automatically to file `go.obo`

You want to download this automatically? (y/n) """)
        if answer.lower().startswith("y"):
            url = "http://current.geneontology.org/ontology/go.obo"
            print(f"Downloading from {url}...      ", end="")
            cli.wget(url); print("Finished!")
        else: return print("Aborted")
    file = settings["oboFile"] or "go.obo"; term = f"{term}".rjust(7, "0")
    cli.cat(file) | cli.grep(f"id: GO:{term}", 0, 10) > cli.stdout
    print(f"https://www.ebi.ac.uk/QuickGO/GTerm?id=GO:{term}")
    if settings["lookupImgs"]:
        class Repr:
            def _repr_html_(self):
                return f"""<img src="http://amigo.geneontology.org/visualize?mode=amigo&term_data_type=string&format=png&inline=false&term_data=GO%3A{term}" />"""
        return Repr()

In [3]:
#export
class transcribe(BaseCli):
    """Transcribes (DNA -> RNA) incoming rows"""
    def __ror__(self, it:Iterator[str]):
        if isinstance(it, str): it = [it]
        for line in it:
            yield line.lower().replace("t", "u")

In [4]:
#export
ntAa = {"UUU": "F", "UUC": "F", "UUA": "L", "UUG": "L",
        "UCU": "S", "UCC": "S", "UCA": "S", "UCG": "S",
        "UAU": "Y", "UAC": "Y", "UAA": "*", "UAG": "*",
        "UGU": "C", "UGC": "C", "UGA": "*", "UGG": "W",

        "CUU": "L", "CUC": "L", "CUA": "L", "CUG": "L",
        "CCU": "P", "CCC": "P", "CCA": "P", "CCG": "P",
        "CAU": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
        "CGU": "R", "CGC": "R", "CGA": "R", "CGG": "R",

        "AUU": "I", "AUC": "I", "AUA": "I", "AUG": "M",
        "ACU": "T", "ACC": "T", "ACA": "T", "ACG": "T",
        "AAU": "N", "AAC": "N", "AAA": "K", "AAG": "K",
        "AGU": "S", "AGC": "S", "AGA": "R", "AGG": "R",

        "GUU": "V", "GUC": "V", "GUA": "V", "GUG": "V",
        "GCU": "A", "GCC": "A", "GCA": "A", "GCG": "A",
        "GAU": "D", "GAC": "D", "GAA": "E", "GAG": "E",
        "GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G"}
_shortAa = {v:v for v in ntAa.values()}
_medAa = {
    "F": "Phe", "L": "Leu", "I": "Ile", "M": "Met", "V": "Val",
    "S": "Ser", "P": "Pro", "T": "Thr", "A": "Ala", "Y": "Tyr",
    "*": "Stop", "H": "His", "Q": "Gln", "N": "Asn", "K": "Lys",
    "D": "Asp", "E": "Glu", "C": "Cys", "W": "Trp", "R": "Arg",
    "G": "Gly", "U": "Sec", "?": "?"
}
_longAa = {
    "F": "Phenylalanine", "L": "Leucine", "I": "Isoleucine", "M": "Methionine", "V": "Valine",
    "S": "Serine", "P": "Proline", "T": "Threonine", "A": "Alanine", "Y": "Tyrosine",
    "*": "Stop", "H": "Histidine", "Q": "Glutamine", "N": "Asparagine", "K": "Lysine",
    "D": "AsparticAcid", "E": "GlutamicAcid", "C": "Cysteine", "W": "Tryptophan", "R": "Arginine",
    "G": "Glycine", "U": "Selenocysteine", "?": "?"
}

In [5]:
#export
class translate(BaseCli):
    def __init__(self, length:int=0):
        """Translates incoming rows.

:param length: 0 for short (L), 1 for med (Leu), 2 for long (Leucine)"""
        self.delim = "" if length == 0 else " "
        self.dict = [_shortAa, _medAa, _longAa][length]
    def __ror__(self, it:Iterator[str]):
        if isinstance(it, str): it = [it]
        it = it | transcribe()
        for line in it:
            line = line.replace(" ", "")
            answer = ""; n = len(line)
            for i in range(0, n - n % 3, 3):
                codon = line[i:i+3].upper()
                answer += (self.dict[ntAa[codon]] if codon in ntAa else "?") + self.delim
            yield answer
class medAa(BaseCli):
    """Converts short aa sequence to medium one"""
    def __ror__(self, it:Iterator[str]):
        if isinstance(it, str): it = [it]
        for line in it:
            yield " ".join(_medAa[c] for c in line)
class longAa(BaseCli):
    """Converts short aa sequence to long one"""
    def __ror__(self, it:Iterator[str]):
        if isinstance(it, str): it = [it]
        for line in it:
            yield " ".join(_longAa[c] for c in line)

In [1]:
!../../../export.py bioinfo/cli/bio

Current dir: /home/kelvin/repos/labs/k1lib, ../../../export.py
rm: cannot remove '__pycache__': No such file or directory
Found existing installation: k1lib 0.1.6
Uninstalling k1lib-0.1.6:
  Successfully uninstalled k1lib-0.1.6
running install
running bdist_egg
running egg_info
creating k1lib.egg-info
writing k1lib.egg-info/PKG-INFO
writing dependency_links to k1lib.egg-info/dependency_links.txt
writing requirements to k1lib.egg-info/requires.txt
writing top-level names to k1lib.egg-info/top_level.txt
writing manifest file 'k1lib.egg-info/SOURCES.txt'
reading manifest file 'k1lib.egg-info/SOURCES.txt'
writing manifest file 'k1lib.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build
creating build/lib
creating build/lib/k1lib
copying k1lib/_learner.py -> build/lib/k1lib
copying k1lib/data.py -> build/lib/k1lib
copying k1lib/selector.py -> build/lib/k1lib
copying k1lib/imports.py -> build/lib/k1lib
copying k1lib