In [None]:
import os
import sys
import json
import codecs
import pickle
import warnings
import math
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")

In [None]:
import DesignPoint
import HierarchyMiner
import HierarchiesQF
import pysubgroup as ps
import QualityComputer
import ScaleProbaComputer
import Snapshots
import SubgroupMiner
import Target
import TargetWRAcc
import Utils
import WRAccQF

In [None]:
dataDirectory = os.path.join(os.path.dirname(os.getcwd()), 'data/data-vt/')
metadata = pd.read_csv(dataDirectory + 'metadata-vt.csv', index_col = [0])

## 1. SI with update

In [None]:
minerSD = pickle.load(open(dataDirectory + 'minerSD-vt', 'rb'))
dictSdNodesCorresp = {}
for i in range(len(minerSD[0].allSdNodes)) :
    dictSdNodesCorresp[minerSD[0].allSdNodes[i].node.id] = i
subgroupMiner = SubgroupMiner.SubgroupMiner(minerSD, dictSdNodesCorresp)

In [None]:
%%time
nPatterns = 5
df_si_update = pd.DataFrame()
for cpt in range(nPatterns):
    print("Pattern : " + str(cpt + 1))
    qf = HierarchiesQF.HierarchiesQF(subgroupMiner, typeDL = 'uniform', alphaSg = 0.8, betaSelectors = 0.2, gammaDl = 1)
    target = Target.Target(subgroupMiner, typeDL = 'uniform', gammaDl = 1)
    task = ps.SubgroupDiscoveryTask(metadata, target, ps.create_selectors(metadata, ignore=['maxJVM']), result_set_size = 1, depth = 1, qf = qf)
    result = ps.BeamSearch(beam_width = 10).execute(task)
    res = result.to_dataframe(statistics_to_show=["indiceListSnapshots", "lenSubgroup", "patternIds", "patternNames"])
    df_si_update = df_si_update.append(res).reset_index(drop = True)
    indicesListMiners = [int(x) for x in res['indiceListSnapshots'].str.strip('[]').str.split(',')[0]]
    antichainIds = [int(x) for x in res['patternIds'].str.strip('[]').str.split(',')[0]]
    subgroupMiner.updateSdNodesValues(antichainIds, indicesListMiners)

In [None]:
df_si_update

In [None]:
i = 0
Utils.plotOnePattern(df_si_update.iloc[i], minerSD)

In [None]:
Utils.plotOneAvgNormalizedCounters(df_si_update, np.arange(df_si_update.shape[0]), minerSD, 'SI with update')

In [None]:
Utils.plotOneContrastMeasure(df_si_update, np.arange(df_si_update.shape[0]), minerSD, 'SI with update')

In [None]:
Utils.redundVersion1(df_si_update, np.arange(df_si_update.shape[0]), minerSD)

In [None]:
Utils.redundVersion2(df_si_update, np.arange(df_si_update.shape[0]), minerSD)

## 2. SI sans update

In [None]:
minerSDCopy = pickle.load(open(dataDirectory + 'minerSD-vt', 'rb'))
dictSdNodesCorresp = {}
for i in range(len(minerSDCopy[0].allSdNodes)) :
    dictSdNodesCorresp[minerSDCopy[0].allSdNodes[i].node.id] = i
subgroupMinerCopy = SubgroupMiner.SubgroupMiner(minerSDCopy, dictSdNodesCorresp)

In [None]:
%%time
qf = HierarchiesQF.HierarchiesQF(subgroupMinerCopy, typeDL = 'log', alphaSg = 0.4, betaSelectors = 0.2, gammaDl = 1)
target = Target.Target(subgroupMinerCopy, typeDL = 'log', gammaDl = 1)
task = ps.SubgroupDiscoveryTask(metadata, target, ps.create_selectors(metadata, ignore=['maxJVM']), result_set_size = 50, depth = 1, qf = qf)
result = ps.BeamSearch(beam_width = 50).execute(task)
df_si = result.to_dataframe(statistics_to_show=["indiceListSnapshots", "lenSubgroup", "patternIds", "patternNames"])

In [None]:
for cpt, elt in enumerate(qf.antichains) : 
    print(cpt, ':', len(elt[0]), elt[2], elt[3], elt[4])

In [None]:
elt = qf.antichains[117]
print(len(elt[0]), elt[1], elt[2], elt[3], elt[4])

In [None]:
resultOnePattern(elt[0], elt[1], elt[2], minerSDCopy)

In [None]:
colorSi = '#c23616'
lineWidth = 2.5
boxpropsSi = dict(linewidth = lineWidth, color = colorSi)
cappropsSi = dict(linewidth = lineWidth, color = colorSi)
whiskerpropsSi = dict(linewidth = lineWidth, color = colorSi)
flierpropsSi = dict(linewidth = lineWidth, color = colorSi)
medianpropsSi = dict(linewidth = lineWidth, color = colorSi)

In [None]:
import seaborn as sns

l1 = []
l2 = [371, 388, 690, 629, 640, 637, 631, 615, 662, 621, 612, 584, 610, 611, 608, 609, 528, 590, 71, 551, 544, 548, 546, 567, 490, 350, 339, 583, 331, 327, 540, 512, 544, 431, 538, 546, 542, 563, 585, 581, 596, 414, 527, 414, 398, 419, 429, 400, 359, 384, 383, 390, 370, 345, 339, 370, 352, 376, 314, 354, 301, 698, 686, 635, 701, 594, 603, 591, 618, 677, 728, 713, 749, 709, 711, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
for idMiner in elt[0] :
    for sdnode in minerSDCopy[idMiner].allSdNodes :
        if sdnode.node.id == 73 :
            l1.append(sdnode.xHat)            
plt.figure(figsize=(4,4))       

plt.boxplot(l1, labels = ['antichain-Top2'], widths = [0.5], showmeans=False, positions=[1], boxprops = boxpropsSi,capprops = cappropsSi, whiskerprops = whiskerpropsSi,flierprops = flierpropsSi, medianprops = medianpropsSi)
plt.boxplot(l2, labels = ['antichain-Top3'], widths = [0.5], showmeans=False, positions=[2], boxprops = boxpropsSi,capprops = cappropsSi, whiskerprops = whiskerpropsSi,flierprops = flierpropsSi,medianprops = medianpropsSi)
plt.plot(1,100,'x', label = 'quantile', color = 'green', markersize=10, markeredgewidth = 3)
plt.plot(1,2,'+', label = 'xBar', color = 'blue', markersize=10, markeredgewidth = 3)
plt.plot(2,344,'x', color = 'green', markersize=10, markeredgewidth = 3)
plt.plot(2,1,'+', color = 'blue', markersize=10, markeredgewidth = 3)
plt.xticks(fontsize= 12)
plt.yticks(fontsize= 12)
plt.legend(fontsize = 14)
plt.savefig('boxplots', bbox_inches='tight')

In [None]:
from matplotlib import pyplot as plt 
fig, ax = plt.subplots(figsize = (4,2))
xBars = [18,36]
avgs = [94,224]
names = ['fr.infologic.stocks.fichierbase.modele.Produit', 'fr.infologic.outils.persistance']

ax.barh(names[::-1], xBars[::-1], label='xBars', color = '#e74c3c', height = 0.3)
ax.barh(names[::-1], avgs[::-1], left = xBars[::-1], label = 'xHats', color = '#1abc9c', height = 0.3)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
ax.set_title('"serverCode = L-r-02401": 9 snapshots', fontsize = 14)
plt.legend()
plt.savefig('top2-vt', bbox_inches='tight')

In [None]:
df_si

In [None]:
i = 2
Utils.plotOnePattern(df_si.iloc[i], minerSD)

In [None]:
indexes_si = [0, 1, 7, 9, 12, 15, 12, 18, 19]

In [None]:
Utils.plotOneAvgNormalizedCounters(df_si, np.arange(df_si.shape[0]), minerSD, 'SI')

In [None]:
Utils.plotOneAvgNormalizedCounters(df_si, indexes_si, minerSD, 'SI')

In [None]:
Utils.plotOneContrastMeasure(df_si, np.arange(df_si.shape[0]), minerSD, 'SI')

In [None]:
Utils.plotOneContrastMeasure(df_si, indexes_si, minerSD, 'SI')

In [None]:
Utils.redundVersion1(df_si, np.arange(df_si.shape[0]), minerSD)

In [None]:
Utils.redundVersion1(df_si, indexes_si, minerSD)

In [None]:
Utils.redundVersion2(df_si, np.arange(df_si.shape[0]), minerSD)

In [None]:
Utils.redundVersion2(df_si, indexes_si, minerSD)

## 3. Customized WRAcc

In [None]:
n_nodes = len(minerSD[0].allSdNodes)
dict_nodes = {}

In [None]:
for i in range(n_nodes) :
    d_node = {}
    d_node['id'] = minerSD[0].allSdNodes[i].node.id
    d_node['name'] = minerSD[0].allSdNodes[i].node.name
    d_node['xBar'] = minerSD[0].allSdNodes[i].xBar
    listXhats = []
    for j in range(len(minerSD)) :
        listXhats.append(minerSD[j].allSdNodes[i].xHat)
    d_node['xHats'] = listXhats
    dict_nodes[i] = d_node

In [None]:
%%time
qf = WRAccQF.WRAccQF(dict_nodes, 4)
target = TargetWRAcc.TargetWRAcc(dict_nodes, 4)
task = ps.SubgroupDiscoveryTask(metadata, target, ps.create_selectors(metadata), result_set_size = 30, depth = 1, qf = qf)
result = ps.BeamSearch(beam_width = 30).execute(task)
df_wracc = result.to_dataframe(statistics_to_show=["indiceListSnapshots", "lenSubgroup", "patternIds", "patternNames"])   
df_wracc

In [None]:
i = 4
Utils.plotOnePattern(df_wracc.iloc[i], minerSD)

In [None]:
indexes_wracc = [0, 3, 8, 10, 13, 16, 19, 22, 25, 28]

In [None]:
Utils.plotOneAvgNormalizedCounters(df_wracc, np.arange(df_wracc.shape[0]), minerSD, 'WRAcc')

In [None]:
Utils.plotOneAvgNormalizedCounters(df_wracc, indexes_wracc, minerSD, 'WRAcc')

In [None]:
Utils.plotOneContrastMeasure(df_wracc, np.arange(df_wracc.shape[0]), minerSD, 'WRAcc')

In [None]:
Utils.plotOneContrastMeasure(df_wracc, indexes_wracc, minerSD, 'WRAcc')

In [None]:
Utils.redundVersion1(df_wracc, np.arange(df_wracc.shape[0]), minerSD)

In [None]:
Utils.redundVersion1(df_wracc, indexes_wracc, minerSD)

In [None]:
Utils.redundVersion2(df_wracc, np.arange(df_wracc.shape[0]), minerSD)

In [None]:
Utils.redundVersion2(df_wracc, indexes_wracc, minerSD)

## 4. Putting all together

### 4.1 Avg counters

In [None]:
Utils.plotAllAvgsNormalizedCounters([df_si_update, df_si, df_si, df_wracc, df_wracc], [np.arange(5), np.arange(20), indexes_si, np.arange(20), indexes_wracc], minerSD)

### 4.2 Contrast

In [None]:
Utils.plotAllContrastMeasure([df_si_update, df_si, df_si, df_wracc, df_wracc], [np.arange(5), np.arange(20), indexes_si, np.arange(20), indexes_wracc], minerSD)

### 4.3 Redund

In [None]:
Utils.plotAllRedundV2([df_si_update, df_si, df_si, df_wracc, df_wracc], [np.arange(5), np.arange(20), indexes_si, np.arange(20), indexes_wracc], minerSD)