In [None]:
%load_ext watermark
%watermark -a Filippo_Valle -p pandas,numpy,graph_tool,cloudpickle -m -v -g

In [None]:
import graph_tool.all as gt
import pandas as pd
import numpy as np
import cloudpickle as pickle
import sys
sys.path.append("../trisbm")
from trisbm import trisbm
import logging
logger = logging.getLogger("cnv_linked")
hdl = logging.StreamHandler()
hdl.setFormatter(logging.Formatter("%(asctime)s - %(message)s"))
logger.addHandler(hdl)
logger.setLevel(logging.DEBUG)

In [None]:
with open("brca/tetrasbm/trisbm/trisbm.pkl", "rb") as file:
    model = pickle.load(file)

In [None]:
vertexmap = {}
for i,node in enumerate(np.concatenate((model.documents, model.words, *model.keywords))):
    vertexmap[node]=i

In [None]:
def get_regulatory_edge():
    for gene in model.words:
        for gene_cnv in model.keywords[1]:
            if gene == gene_cnv[2:]:
                yield (vertexmap[gene], vertexmap[gene_cnv], 1.)
regulatory_edges = get_regulatory_edge()

In [None]:
print(model.g)
model.g.save("tmp.xml.gz") #need to be stored to remove edges later

In [None]:
model.g.add_edge_list(regulatory_edges, eprops=[model.g.ep["count"]])

In [None]:
print(model.g)

In [None]:
state = model.state.levels[1].copy(g=model.g, bs=model.state.get_bs() + [np.zeros(1)] * 4 , sampling = True)

S1 = state.entropy()

for i in range(200):
    logger.debug(i)
    state.multiflip_mcmc_sweep(niter=50, beta=np.inf, verbose=True)
    
print(model.get_mdl(), S1, state.entropy())

In [None]:
colmap = model.g.vertex_properties["color"] = model.g.new_vertex_property("vector<double>")
#https://medialab.github.io/iwanthue/
colors = [  [174,80,209],
            [108,192,70],
            [207, 170, 60],
            [131,120,197],
            [126,138,65],
            [201,90,138],
            [87,172,125],
            [213,73,57],
            [85,175,209],
            [193,120,81]]
for v in model.g.vertices():
    k = model.g.vertex_properties['kind'][v]
    if k < 10:
        color = np.array(colors[k])/255.
    else:
        color = np.array([187, 129, 164])/255.
    colmap[v] = color

In [None]:
model.state.draw(
     subsample_edges = 15000,
     hedge_pen_width=8, 
     hvertex_size=25,
     edge_pen_width = model.g.ep["count"],
     vertex_color=colmap,
     vertex_fill_color=colmap,
    output="cnv_linked.png"
)

In [None]:
model.state.levels[1] = state
model.g = gt.load_graph("tmp.xml.gz")

In [None]:
import os

In [None]:
os.chdir("brca")
os.system("rm -rf cnv_linked && mkdir -p cnv_linked")
os.chdir("cnv_linked")
os.system("mkdir -p trisbm")
os.chdir("trisbm")
model.save_data()
model.dump_model()
os.chdir("../../../")