In [None]:
%load_ext watermark
%watermark -a Filippo_Valle -p pandas,numpy,graph_tool,cloudpickle -m -v -g

In [None]:
import graph_tool.all as gt
import pandas as pd
import numpy as np
import cloudpickle as pickle
import sys
sys.path.append("../trisbm")
from trisbm import trisbm

In [None]:
with open("brca/trisbm/model.pkl", "rb") as file:
    model = pickle.load(file)

In [None]:
#network_file = "mirdip/HUMAN.mirDIP_top90k.Translated.tsv"
network_file = "tarbase/HUMAN.TarBase.Translated.tsv"

In [None]:
df_net = pd.read_csv("../regulatory/"+network_file, sep="\t", header=None)
df_net[0]=[s.replace("miR","mir") for s in df_net[0]]
df_conversion = pd.read_csv("miRNA.txt")
df_gene_conversion = pd.read_csv("https://www.genenames.org/cgi-bin/download/custom?col=gd_app_sym&col=gd_pub_ensembl_id&status=Approved&status=Entry%20Withdrawn&hgnc_dbtag=on&order_by=gd_app_sym_sort&format=text&submit=submit", sep="\t").dropna(how="any", axis=0)
df_net = df_net[df_net[0].isin(model.keywords)]
df_net = df_net[df_net[0].isin(df_conversion["miRBase ID"])]
df_net = df_net[df_net[1].isin(df_gene_conversion["Approved symbol"])]
df_gene_conversion = df_gene_conversion.set_index("Approved symbol")

In [None]:
vertexmap = {}
for i,node in enumerate(np.concatenate((model.documents, model.words, model.keywords))):
    vertexmap[node]=i

In [None]:
def get_regulatory_edge():
    for mirna, target in df_net.values:
        gene = df_gene_conversion.at[target, "Ensembl gene ID"]
        if (mirna in model.keywords) and (gene in model.words):
            #print(mirna, target, gene)
            yield (vertexmap[mirna], vertexmap[gene], 1.)
regulatory_edges = get_regulatory_edge()

In [None]:
print(model.g)
model.g.save("tmp.xml.gz") #need to be stored to remove edges later

In [None]:
model.g.add_edge_list(regulatory_edges, eprops=[model.g.ep["count"]])

In [None]:
print(model.g)

In [None]:
state = model.state.copy(g=model.g, bs=model.state.get_bs() + [np.zeros(1)] * 4, sampling = True)

S1 = state.entropy()

for i in range(100):
    state.multiflip_mcmc_sweep(niter=10, beta=np.inf)
    
print(model.get_mdl(), S1, state.entropy())

In [None]:
colmap = model.g.vertex_properties["color"] = model.g.new_vertex_property("vector<double>")
#https://medialab.github.io/iwanthue/
colors = [  [174,80,209],
            [108,192,70],
            [207, 170, 60],
            [131,120,197],
            [126,138,65],
            [201,90,138],
            [87,172,125],
            [213,73,57],
            [85,175,209],
            [193,120,81]]
for v in model.g.vertices():
    k = model.g.vertex_properties['kind'][v]
    if k < 10:
        color = np.array(colors[k])/255.
    else:
        color = np.array([187, 129, 164])/255.
    colmap[v] = color

In [None]:
model.state.draw(
     subsample_edges = 15000,
     hedge_pen_width=8, 
     hvertex_size=25,
     edge_pen_width = model.g.ep["count"],
     vertex_color=colmap,
     vertex_fill_color=colmap,
    output="tarbase.png"
)

In [None]:
model.state = state
model.g = gt.load_graph("tmp.xml.gz")

In [None]:
import os

In [None]:
# necessary to be compatible with older versions of trisbm
model.nbranches = 1 # not stored in older versions
model.keywords = [model.keywords] # new format
model.groups = {} # should not be cached

In [None]:
os.chdir("brca")
os.system("rm -rf tarbase && mkdir -p tarbase")
os.chdir("tarbase")
os.system("mkdir -p trisbm")
os.chdir("trisbm")
model.save_data()
model.dump_model()
os.chdir("../../../")

In [None]:
os.chdir("../../")