# What do the models and their interaction graphs look like?

------------
## Table of content

* **"Loading" of the ensemble of models**
* [**Creation of an interaction graph summarizing the diversity of the ensemble of models.**](#graph)

------------

In [1]:
import mpbn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
modelname = []
modelname.extend([f"models/bn{n}.bnet" for n in range(1000)])

solutions = list()
for i,n in enumerate(modelname):
    solutions.append(mpbn.MPBooleanNetwork(f"{n}"))

`clauses_per_model` : list of models, each model being a dictionary
* keys = **model's nodes** 
* value = a set (-> **the clauses constituting the node's function in the model**)

In [3]:
# For each model, I store the clauses per node
clauses_per_model = list() #Each element of this list will correspond to a model, it's a dict linking nodes and their clauses.
for i, model in enumerate(solutions): #We go through the 1000 models.
    clauses_of_a_node = dict() #The dict that will link a node to its clauses.
    for node in model.keys(): #We go through the nodes of the model.
        #if not isinstance(model[node],frozenset):
        if model[node]==True or model[node]==False :
            clauses_of_a_node[node] = model[node]
        else:
            clauses_of_a_node[node] = mpbn.minibn.struct_of_dnf(model.ba, model[node]) #We store the clauses ruling this node.
    clauses_per_model.append(clauses_of_a_node) #Now that all nodes are linked with their clauses, we save this information as a new element in clauses_per_model.

`influences` : dictionnary
* keys = **model's nodes** 
* value = dictionary  
     + keys = **its inhibitors and activators**
     + value = **number of models having this inhibitor/activator in the node's function**

`constantes` : dictionnary
* keys = **model's nodes with at least one constant function in a model (constant function : 1 or 0)**
* value = dictionnary
     + key = **value of the constant function (TRUE for 1, FALSE for 0)**
     + value = **number of models having 1 or 0 as function for the node**

In [4]:
influences = dict()
constantes = dict()
for model in clauses_per_model:
    for node, influenceurs_set in model.items():
        temp = set()
        if node not in influences:
            influences[node] = dict()
        #print(influenceurs_set)
        #print(type(influenceurs_set))
        #print(isinstance(influenceurs_set, bool))
        if influenceurs_set != True and influenceurs_set != False:
            for conjonctions in influenceurs_set:
                for clause in conjonctions:
                    if clause[0] not in temp:
                        temp.add(clause[0])
                        if clause[0] not in influences[node]:
                            influences[node][clause[0]] = dict()
                            influences[node][clause[0]][clause[1]] = 1
                        elif clause[1] not in influences[node][clause[0]]:
                            influences[node][clause[0]][clause[1]] = 1
                        else:
                            influences[node][clause[0]][clause[1]] += 1
        else:
            if node not in constantes:
                constantes[node] = dict()
            if influenceurs_set not in constantes[node]:
                constantes[node][influenceurs_set] = 1
            else:
                constantes[node][influenceurs_set] += 1

## Creation of the influence graph, with: <a class="anchor" id="graph"></a>

* on the arcs, the number of models (among the 1000) that have the influence
* dark blue highlighting of nodes with a constant function in at least one model
* on each node, the number of different functions possible on the 1000 models

In [5]:
import graphviz

In [6]:
from typing import List, Dict


def get_arcs_size(l_input: List, min_intensity: int = 1, max_intensity: int = 10) -> Dict[int, str]:
    res = {}
    get_arc_size_rec(l_input, res, min_intensity, max_intensity)
    return res
       
def get_arc_size_rec(l_rec: List, res: Dict[int, str], inf: int, sup: int, left: bool = False):
    max_diff = 0
    index_max_diff = 0
    for i in range(len(l_rec) - 1):
        diff = l_rec[i + 1] - l_rec[i]
        if diff > max_diff:
            max_diff = diff
            index_max_diff = i

    if max_diff == 0 or inf == sup:
        for element in l_rec:
            if left:
                res[element] = str(inf)
            else:
                res[element] = str(sup)
    else:
        get_arc_size_rec(l_rec[0:index_max_diff+1], res, inf, (inf+sup)//2, True)
        get_arc_size_rec(l_rec[index_max_diff+1:], res, (inf+sup)//2 + 1, sup)

In [7]:
occurrences = dict()
for target, sources in influences.items():
    for source, infl in sources.items():
        for influence, occurrence in infl.items():
            if occurrence not in occurrences:
                occurrences[occurrence] = list()
            occurrences[occurrence].append((source,target))

max_intensity = 8
intensity = get_arcs_size(sorted(occurrences.keys()), max_intensity=max_intensity)
intensity[1000]=str(max_intensity+5)

In [None]:
# Quick exploration: number of arcs present in X models
for occurrence, arcs in sorted(occurrences.items()):
    print(f"{len(arcs):2d} arcs présents dans {occurrence:3d} modèles")

In [8]:
models_per_edge = graphviz.Digraph('models_per_edge', comment='Display the number of models having each edge in its influence graph.')
models_per_edge.graph_attr['ratio'] = '1.1'

In [9]:
fonctions_differentes = dict()
fonctions_differentes_avec_occurrence = dict()

for node in influences.keys():
    fonctions_differentes[node] = set()
    fonctions_differentes_avec_occurrence[node] = dict()
    for model in clauses_per_model:
        fonctions_differentes[node].add(model[node])
        if model[node] not in fonctions_differentes_avec_occurrence[node]:
            fonctions_differentes_avec_occurrence[node][model[node]] = 1
        else:
            fonctions_differentes_avec_occurrence[node][model[node]] += 1

In [10]:
# Quick exploration: listing of the nodes with an invariable function in the ensemble of models
noeuds_communs_a_tout_modele = list()
for node, functions in fonctions_differentes.items():
    #print(f"{node}: {len(functions)}")
    if len(functions) == 1:
        noeuds_communs_a_tout_modele.append(node)

print(f"Nodes common to all models :\n{noeuds_communs_a_tout_modele}")

Nodes common to all models :
['ATF7', 'EBF1', 'FOXO3', 'FOXP2', 'GATA1', 'GFI1B', 'KLF1', 'NRF1', 'RELA', 'STAT6', 'TBP', 'ZEB2']


#### Nodes are colored and shaped according to their variability through the ensemble of models:
* bright yellow if identical function in all models
* pale yellow if 2 possible functions
* beige if 3 possible functions

#### Arcs are colored according to the activating/inhibiting influence
* green/red

In [11]:
# Graph: creation of non-constant nodes:

for node in fonctions_differentes:
    if node not in constantes:
        if len(fonctions_differentes[node]) == 1:
            models_per_edge.node(node, label=f"{node}\n{len(fonctions_differentes[node])} func", fillcolor="darkgoldenrod2", style="rounded,filled,bold", shape="oval", fontcolor="black", fontname="arial bold", fontsize="50pt")
        elif len(fonctions_differentes[node]) == 2:
            models_per_edge.node(node, label=f"{node}\n{len(fonctions_differentes[node])} func", fillcolor="lightgoldenrod1" , style="rounded,filled", shape="oval", fontsize="40pt")
        elif len(fonctions_differentes[node]) == 3:
            models_per_edge.node(node, label=f"{node}\n{len(fonctions_differentes[node])} func", fillcolor="cornsilk" , style="rounded,filled", shape="oval", fontsize="35pt")
        elif len(fonctions_differentes[node]) < 10:
            models_per_edge.node(node, label=f"{node}\n{len(fonctions_differentes[node])} func", fillcolor="white" , style="rounded,filled", shape="oval", fontsize="30pt")
        else:
            models_per_edge.node(node, label=f"{node}\n{len(fonctions_differentes[node])} func", fillcolor="white", style="rounded,filled,dotted", shape="oval", fontsize="30pt")

In [12]:
# Graph: creation of constant nodes:

for constante, influence in constantes.items():
    for signe, occurrence in influence.items():
        #signe = 1 if signe == True else 0
        if len(fonctions_differentes[constante]) == 1:
            models_per_edge.node(constante, label=f"{constante}\n{len(fonctions_differentes[constante])} func\n{signe}: {occurrence}", fillcolor="darkgoldenrod2", style="rounded,filled,bold", shape="box", fontcolor="black", fontname="arial bold", fontsize="45pt")
        elif len(fonctions_differentes[constante]) == 2:
            models_per_edge.node(constante, label=f"{constante}\n{len(fonctions_differentes[constante])} func\n{signe}: {occurrence}", fillcolor="lightgoldenrod1", style="rounded,filled", shape="box", fontsize="35pt")
        elif len(fonctions_differentes[constante]) == 3:
            models_per_edge.node(constante, label=f"{constante}\n{len(fonctions_differentes[constante])} func\n{signe}: {occurrence}", fillcolor="cornsilk", style="rounded,filled", shape="box", fontsize="30pt")
        elif len(fonctions_differentes[constante]) < 10:
            models_per_edge.node(constante, label=f"{constante}\n{len(fonctions_differentes[constante])} func\n{signe}: {occurrence}", fillcolor="white", style="rounded,filled", shape="box", fontsize="25pt")
        else:
            models_per_edge.node(constante, label=f"{constante}\n{len(fonctions_differentes[constante])} func\n{signe}: {occurrence}", fillcolor="white", style="rounded,filled,dotted", shape="box", fontsize="25pt")

In [13]:
# Graph: creation of the arcs, colored according to the activating/inhibiting influence:

occurrences_liste = list()
for target, sources in influences.items():
    #if target=="STAT3":
    for source, infl in sources.items():
        #print(f"{source}, {infl}")
        for influence, occurrence in infl.items():
            occurrences_liste.append(occurrence)
            if influence == True:
                if (source, target) in occurrences[1000]:
                    models_per_edge.edge(source, target, label=f"{occurrence}", penwidth=intensity[occurrence], color="darkgreen", fontcolor="darkgreen", fontname="arial bold", fontsize="30pt", arrowsize="2")
                else:
                    # Ligne commentée correspond au graphe sans utilisation de l'intensité pour définir la largeur de la flèche.
                    #models_per_edge.edge(source, target, label=str(occurrence), penwidth=str(occurrence/100), color="green")
                    models_per_edge.edge(source, target, label=str(occurrence), penwidth=intensity[occurrence], color="darkolivegreen3", fontcolor="darkolivegreen3", fontname="arial bold", fontsize="30pt", arrowsize="2")
            else:
                if (source, target) in occurrences[1000]:
                    models_per_edge.edge(source, target, label=str(occurrence), arrowhead="tee", penwidth=intensity[occurrence], color="firebrick", fontcolor="firebrick", fontname="arial bold", fontsize="30pt", arrowsize="3")
                else:
                    #models_per_edge.edge(source, target, label=str(occurrence), arrowhead="tee", penwidth=str(occurrence/100), color="red"
                    models_per_edge.edge(source, target, label=str(occurrence), arrowhead="tee", penwidth=intensity[occurrence], color="lightcoral", fontcolor="lightcoral", fontname="arial bold", fontsize="30pt", arrowsize="3")
            #print(f"{source} {target} {influence} {occurrence}")

**Saving the graph:**

In [14]:
# Save to a PDF file named "models_per_edge.gv.pdf"
models_per_edge.render(filename="IGstat", directory='data/analysis/', view=True)

'data/analysis/IGstat.pdf'