In [None]:
# %load ../snippets/basic_settings.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path


sns.set_context("notebook", font_scale=1.1)
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 100)
plt.rcParams["figure.figsize"] = (16, 12)
plt.rcParams['savefig.dpi'] = 200
plt.rcParams['figure.autolayout'] = False
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['font.size'] = 16
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 14
plt.rcParams['text.usetex'] = False  # True activates latex output in fonts!
plt.rcParams['font.family'] = "serif"
plt.rcParams['font.serif'] = "cm"
pd.set_option('display.float_format', lambda x: '{:,.2f}'.format(x))

In [None]:
data = pd.read_csv("../../../data/10_21_pathway_enrichment/final_results_annotated.csv", index_col=0)

In [None]:
df = data.dropna(subset=['KEGG_Pathway']).copy()
tcadf = df[df.KEGG_Pathway.str.contains('ko00020')]
glycdf = df[df.KEGG_Pathway.str.contains('ko00010')]

In [None]:
df.columns

In [None]:
tcadf.groupby('day')['CI'].median()

In [None]:
tcadf

In [None]:
sns.lineplot(data=tcadf, x='day', y='z-score', color='black', estimator="mean", ci=95)
sns.swarmplot(data=tcadf, x='day', y='z-score', color='black')

sns.lineplot(data=glycdf, x='day', y='z-score', color='blue', estimator="mean", ci=95)
sns.swarmplot(data=glycdf, x='day', y='z-score', color='blue', alpha=0.3)
#plt.yscale('log')

In [None]:
kegg_pathways = [p.split(',') if len(p) > 1 else p for p in df.KEGG_Pathway.unique() ]
kegg_pathways = set([k for l in kegg_pathways for k in l if 'ko' in k])

In [None]:
kegg_pathways

In [None]:
gene2path = df[['gene', 'KEGG_Pathway']].drop_duplicates().set_index('gene').to_dict()['KEGG_Pathway']

In [None]:
path2gene = {}
for path in kegg_pathways:
    path2gene[path] = []
    for gene, values in gene2path.items():
        if path in values:
            path2gene[path].append(gene)
            

In [None]:
ko2gene_df = pd.concat({k: pd.Series(v) for k, v in path2gene.items()}).reset_index()
ko2gene_df = ko2gene_df.drop('level_1', axis=1)
ko2gene_df.columns = ['KEGG_Pathway', 'gene']
to_keep = ko2gene_df.KEGG_Pathway.value_counts().reset_index()
to_keep = to_keep[to_keep.KEGG_Pathway > 4]['index'].values
to_keep
ko2gene_df = ko2gene_df[ko2gene_df.KEGG_Pathway.isin(to_keep)]
ko2gene_df= ko2gene_df.merge(df[['gene', 'day', 'z-score', 'CI']], how='left', on='gene')

In [None]:
test = ko2gene_df.groupby(['KEGG_Pathway', 'day']).agg({'z-score':['median']}).reset_index()

In [None]:
test.columns = ['KEGG_Pathway', 'day', 'z-score']

In [None]:
plt.figure( figsize=(4, 45),)
sns.scatterplot(data=test, x="day", y="KEGG_Pathway", size="z-score", hue='z-score', palette = 'Blues_r', legend=False, 
                sizes=(1, 500), )

In [None]:
test2 = test.pivot(index='KEGG_Pathway', columns='day').fillna(0)

In [None]:
sns.clustermap(test2,  col_cluster=False, figsize=(3, 30), yticklabels=True)

In [None]:
path2gene['ko00190']

In [None]:
from Bio import SeqIO
from Bio.KEGG.REST import *
from Bio.KEGG.KGML import KGML_parser
from Bio.Graphics.KGML_vis import KGMLCanvas
from Bio.Graphics.ColorSpiral import ColorSpiral

from IPython.display import Image, HTML

import random

# A bit of code that will help us display the PDF output
def PDF(filename):
    return HTML('<iframe src=%s width=700 height=350></iframe>' % filename)

# A bit of helper code to shorten long text
def head(text, lines=10):
    """ Print the first lines lines of the passed text.
    """
    print ('\n'.join(text.split('\n')[:lines] + ['[...]']))

In [None]:
pathway = KGML_parser.read(kegg_get("ko01130", "kgml"))
canvas = KGMLCanvas(pathway, import_imagemap=True)
canvas.draw("fab_map_with_image.pdf")
PDF("fab_map_with_image.pdf")

In [None]:
lps_kos = []
for orth in pathway.orthologs:
    lps_kos.append(orth.graphics[0].name.strip('...'))

In [None]:
test = df[['gene', 'KEGG_ko', 'z-score', 'day']].copy()
test['KEGG_ko'] = test.KEGG_ko.apply(lambda x: x.split(";")[0].strip('ko:') if ';' in x else x.strip("ko:"))
td1 = test[test.day == 'd1']

In [None]:
td1col = td1[td1.KEGG_ko.isin(lps_kos)].groupby('KEGG_ko').median()
ncolor = td1col['z-score'].values
ncolor = [colors.to_hex(sm.to_rgba(x)) for x in ncolor]
td1col['col'] = ncolor
ncolor
coldict = td1col.to_dict()['col']

In [None]:
for element in pathway.orthologs:
    for graphic in element.graphics:
        if graphic.name in coldict.keys():
            graphic.bgcolor = coldict[graphic.name]
        else:
            graphic.bgcolor = '#f7f6ff'

In [None]:
canvas = KGMLCanvas(pathway, import_imagemap=True)
canvas.draw("fab_map_new_colours.pdf")
PDF("fab_map_new_colours.pdf")

In [None]:
# Colors
vmin= -9
vmax=2
cmap = plt.cm.coolwarm
from matplotlib import colors
divnorm=colors.TwoSlopeNorm(vmin=vmin, vcenter=0., vmax=vmax)
sm = plt.cm.ScalarMappable(cmap=cmap,norm=divnorm)

In [None]:
ncolor[0]

In [None]:
lps_kos

In [None]:
path2gene['ko01130']