In [None]:
import Bio
Bio.__version__

In [None]:
from Bio import SeqIO
from Bio.KEGG.REST import *
from Bio.KEGG.KGML import KGML_parser
from Bio.Graphics.KGML_vis import KGMLCanvas
from Bio.Graphics.ColorSpiral import ColorSpiral

from IPython.display import Image, HTML

import random

Going through https://nbviewer.jupyter.org/github/widdowquinn/notebooks/blob/master/Biopython_KGML_intro.ipynb

In [None]:
# A bit of code that will help us display the PDF output
def PDF(filename):
    return HTML('<iframe src=%s width=700 height=350></iframe>' % filename)

# A bit of helper code to shorten long text
def head(text, lines=10):
    """ Print the first lines lines of the passed text.
    """
    print ('\n'.join(text.split('\n')[:lines] + ['[...]']))

In [None]:
# Kyoto Encyclopedia of Genes and Genomes
print(kegg_info("kegg").read())

In [None]:
print(kegg_info("sey").read())

In [None]:
# Escherichia coli K-12 MG1655 KEGG Genes Database
sey_pathways = kegg_list('pathway', 'sey').read().strip().split("\n")
sey_pathways = [[k.split('\t')[0], k.split('\t')[1]] for k in sey_pathways]

In [None]:
kegg_get(sey_pathways[0][0]).read()

In [None]:
len(sey_pathways)

In [None]:
[c[1] for c in sey_pathways if 'pyruvate' in c[1]]

In [None]:
gmt_symbol = []
for pathway in sey_pathways:
    genes = []
    print(pathway[0])
    path_file = kegg_get(pathway[0]).read()
    current_section = None
    for line in path_file.rstrip().split("\n"):
        section = line[:12].strip()
        if not section == "":
            current_section = section
        if current_section == 'GENE':
            if ';' in line:
                gene_identifiers, gene_description = line[12:].split("; ")[0:2]
                gene_id, gene_symbol = gene_identifiers.split()
            else: 
                gene_id, gene_symbol = line[12:].strip().split()[0:2]
            if not gene_symbol in genes:
                genes.append(gene_symbol)
    gmt_symbol.append([pathway[0], pathway[1]] + genes)

In [None]:
import pandas as pd
#gdf = pd.DataFrame(gmt)
gdf_symbol = pd.DataFrame(gmt_symbol).T

In [None]:
test_pathway = list(gdf_symbol.iloc[2: ,0].dropna().values)
test_pathway.remove("hypothetical")
test_pathway.remove('alcohol')
test_pathway

In [None]:
gdf.to_csv("./15-09-sey.gmt", sep="\t")
gdf_symbol.to_csv("./15-09-sey_symbol.gmt", sep="\t")

In [None]:
gdf.sample(5)

In [None]:
print(kegg_list('C01290+G00092').read())

In [None]:
path:sey00640

In [None]:
for line in kegg_get("path:sey00190").read().split('\n'):
    if 'L24009' in line:
        print(line)

In [None]:
Image(kegg_get("sey00190", "image").read())

In [None]:
# What are these???
# Render reference fatty-acid biosynthesis
#Image(kegg_get("ko00061", "image").read()) #KEGG orthologes
Image(kegg_get("eco00061", "image").read()) # E.coli version
#Image(kegg_get("rn00061", "image").read())

In [None]:
pathway = KGML_parser.read(kegg_get("sey00190", "kgml"))
print(pathway)

In [None]:
for o in pathway.orthologs:
    for a in o.graphics:
        if 'K00330' in a.name:
            print(a.bgcolor)
        print(a.name)

In [None]:
element = pathway.orthologs[1].graphics[0]
attrs = [element.name, element.x, element.y, element.coords, element.type, 
         element.width, element.height, element.fgcolor, element.bgcolor, 
         element.bounds, element.centre]
print ('\n'.join([str(attr) for attr in attrs]))

In [None]:
orthologs = [e for e in pathway.orthologs]
for o in orthologs:
    for g in o.graphics:
        print(g.name)

In [None]:
# for orth in pathway.orthologs:
#     element = orth.graphics[0]
#     attrs = [element.name, element.x, element.y, element.coords, element.type, 
#          element.width, element.height, element.fgcolor, element.bgcolor, 
#          element.bounds, element.centre]
#     print ('\n'.join([str(attr) for attr in attrs]))

In [None]:
# Helper function to convert colour as RGB tuple to hex string
def rgb_to_hex(rgb):
    rgb = tuple([int(255*val) for val in rgb])
    return '#' + ''.join([hex(val)[2:] for val in rgb]).upper()

In [None]:
pathway = KGML_parser.read(kegg_get("sey00020", "kgml"))
canvas = KGMLCanvas(pathway, import_imagemap=True)


# Define arbitrary colours
colorspiral = ColorSpiral()
colorlist = colorspiral.get_colors(len(pathway.orthologs))

# Change the colours of ortholog elements
for color, element in zip(colorlist, pathway.orthologs):
    for graphic in element.graphics:
        graphic.bgcolor = rgb_to_hex(color)
        graphic.name = graphic.name.strip('...')

In [None]:
canvas = KGMLCanvas(pathway, import_imagemap=True)
canvas.draw("fab_map_new_colours.pdf")
PDF("fab_map_new_colours.pdf")

In [None]:
pathway = KGML_parser.read(kegg_get("sey00520", "kgml"))

# Change the widths of reaction entries elements
for element in pathway.orthologs:
    
    for graphic in element.graphics:
        print(graphic.name)
        print(graphic.type)
        print(graphic.width)
     #   graphic.width = random.randrange(1, 10, 1)