# Séléction de modèle 

In [1]:
import matplotlib
%matplotlib nbagg
from matplotlib import pyplot

In [2]:
from statiskit import (linalg,
                       core,
                       pgm)

In [3]:
import math
import os

In [4]:
%reload_ext rpy2.ipython

In [5]:
if not 'K' in os.environ:
    os.environ['K'] = str(5)
K = int(os.environ.get('K'))
simulation = !jupyter nbconvert --ExecutePreprocessor.timeout=3600 --to notebook --execute sampling.ipynb --output sampling.ipynb
graph0 = pgm.read_gml(os.path.join('results', 'graph.gml'))
data = core.read_csv(os.path.join('results', 'data.csv'))
data

Unnamed: 0,V0,V1,V2,V3,V4
0,$2.03$,$1.81$,$-0.01$,$0.34$,$-0.31$
1,$-0.96$,$0.94$,$-0.87$,$1.04$,$-0.17$
2,$0.71$,$-0.72$,$0.13$,$0.41$,$0.85$
3,$-1.06$,$0.16$,$-1.45$,$0.54$,$-0.96$
4,$-0.22$,$0.31$,$-0.73$,$0.31$,$-0.37$


In [13]:
import hashlib
identifier = hashlib.md5(str(graph0)).hexdigest()


In [6]:
if not 'LASSO' in os.environ:
    os.environ['LASSO'] = "LINEAIR"
LASSO = os.environ.get('LASSO')
lasso = !jupyter nbconvert --ExecutePreprocessor.timeout=3600 --to notebook --execute LASSO.ipynb --output LASSO.ipynb
lasso

 '[NbConvertApp] Converting notebook LASSO.ipynb to notebook',
 '[NbConvertApp] Executing notebook with kernel: python2',
 '[NbConvertApp] Writing 81454 bytes to LASSO.ipynb']

In [7]:
graphs = []
for filepath in os.listdir('.'):
    if filepath.startswith('lasso_'):
        adjmat = linalg.read_csv(filepath)
        graphs.append(pgm.UndirectedGraph(adjmat))
graphs = sorted(graphs, key = lambda graph: graph.nb_edges)


In [8]:
LLHs = []
BICs = []
AICs = []

%time
for graph in graphs:
    try:
        mle = pgm.graphical_gaussian_estimation(algo='scd',
                                                data=data,
                                                graph=graph)
        dist = mle.estimated.copy()

        LLHs.append(2 * dist.loglikelihood(data))
        
    except Exception as e:
        print e
        LLHs.append(float("nan"))
        BICs.append(LLHs[-1])
        AICs.append(LLHs[-1])
    else:
        BICs.append(LLHs[-1] - dist.nb_parameters * math.log(len(data.events)))
        AICs.append(LLHs[-1] - dist.nb_parameters * 2)



CPU times: user 6 µs, sys: 1 µs, total: 7 µs
Wall time: 16 µs


In [9]:
os.environ['CRITER'] = "bic"
criter = os.environ.get('CRITER')
criter

'bic'

In [10]:
if criter == "bic":
    model = BICs.index(max(BICs))
elif criter == "aic":
    model = AICs.index(max(AICs))
model

15

In [11]:
def TP(g0, g1):
    tp = 0
    for r in range(g0.nb_vertices):
        for c in range(g0.nb_vertices):
            if g0.has_edge(r, c) and g1.has_edge(r, c):
                tp += 1
    return tp

def TN(g0, g1):
    tn = 0
    for r in range(g0.nb_vertices):
        for c in range(g0.nb_vertices):
            if not g0.has_edge(r, c) and not g1.has_edge(r, c):
                tn += 1
    return tn

def FP(g0, g1):
    fp = 0
    for r in range(g0.nb_vertices):
        for c in range(g0.nb_vertices):
            if  g0.has_edge(r, c) and not g1.has_edge(r, c):
                fp += 1
    return fp

def FN(g0, g1):
    fn = 0
    for r in range(g0.nb_vertices):
        for c in range(g0.nb_vertices):
            if not g0.has_edge(r, c) and g1.has_edge(r, c):
                fn += 1
    return fn
tp, tn, fp, fn = TP(graph0, graph), TN(graph0, graph), FP(graph0, graph), FN(graph0, graph)
(tp, tn, fp, fn)

(0, 5, 0, 20)

In [12]:
TPs = []
TNs = []
FPs = []
FNs = []
for graph in graphs:
    TPs.append(TP(graph0, graph))
    TNs.append(TN(graph0, graph))
    FPs.append(FP(graph0, graph))
    FNs.append(FN(graph0, graph))


In [14]:
import math
with open(os.path.join('results', 'model.csv'), 'a') as filehandler:
    filehandler.write(','.join([str(criter), str(TPs[model]), str(TNs[model]), str(FPs[model]), str(FNs[model])]) + '\n')