In [1]:
import numpy as np
import pandas as pd
from os import path
from random import choice, shuffle

from ga4stpg.graph import Graph, ReaderORLibrary
from ga4stpg.graph.steiner import (prunning_mst, shortest_path,
                            shortest_path_origin_prim,
                            prunning_kruskal_mst,
                            shortest_path_with_origin)

from ga4stpg.graph.util import (is_steiner_tree, 
                                has_cycle)


In [2]:
from ga4stpg.tree.generate import GenerateBasedPrimRST
from ga4stpg.tree.evaluation import EvaluateTreeGraph
from ga4stpg.tree.pxpartition import PartitionCrossoverSteinerTree as PXST
from ga4stpg.tree.mutate import Prunning

In [3]:
# input
input_file = 'steinc5.txt'
folder_datasets = path.join('..', '..', 'ppgi-stpg-gpx', 'datasets', 'ORLibrary')

filename = path.join(folder_datasets, input_file)
print(filename)

assert path.exists(filename)

reader = ReaderORLibrary()

stpg = reader.parser(filename)

..\..\ppgi-stpg-gpx\datasets\ORLibrary\steinc5.txt


In [4]:
evaluator = EvaluateTreeGraph(stpg)
generator = GenerateBasedPrimRST(stpg)
crossover = PXST(stpg)
prunner   = Prunning(stpg)

In [5]:
%%time

data = list()
data2 = list()
print(stpg.nro_nodes)

for _ in range(50):
    
    # aa = prunner(generator())
    aa = generator()
    is_aa_st, response = is_steiner_tree(aa, stpg)
    aa_cost, _ = evaluator(aa)
    response['origem'] = 'a'
    response['is_st'] = is_aa_st
    data2.append(response)

    
    # bb = prunner(generator())
    bb = generator()
    is_bb_st, response = is_steiner_tree(bb, stpg)
    bb_cost, _ = evaluator(bb)
    response['origem'] = 'b'
    response['is_st'] = is_bb_st
    data2.append(response)
    
    child_c, child_d = crossover(aa, bb)
    
    is_cc_st, response = is_steiner_tree(child_c, stpg)
    cc_cost, _ = evaluator(child_c)
    response['origem'] = 'c'
    response['is_st'] = is_cc_st
    data2.append(response)

    is_dd_st, response = is_steiner_tree(child_d, stpg)
    dd_cost, _ = evaluator(child_d)
    response['origem'] = 'd'
    response['is_st'] = is_dd_st
    data2.append(response)
    
    data.append([is_aa_st, is_bb_st, is_cc_st, is_dd_st, aa_cost, bb_cost, cc_cost, dd_cost])

500
Wall time: 4.83 s


In [6]:
%%time

col = ['is_aa_st', 
    'is_bb_st', 
    'is_cc_st' ,
    'is_dd_st',
    'a_cost', 
    'b_cost', 
    'c_cost',
    'd_cost'
    ]

frame = pd.DataFrame(data, columns=col)

frame['less_equal_both_c'] = (frame['c_cost'] <= frame['a_cost']) & (frame['c_cost'] <= frame['b_cost'])
frame['less_equal_both_d'] = (frame['d_cost'] <= frame['a_cost']) & (frame['d_cost'] <= frame['b_cost'])

frame['less_both_c'] = (frame['c_cost'] < frame['a_cost']) & (frame['c_cost'] < frame['b_cost'])
frame['less_both_d'] = (frame['d_cost'] < frame['a_cost']) & (frame['d_cost'] < frame['b_cost'])

# frame['less_equal_a'] = (frame['c_cost'] <= frame['a_cost'])
# frame['less_equal_b'] = (frame['c_cost'] <= frame['b_cost'])

frame['diff_a_c'] = frame['a_cost'] - frame['c_cost']
frame['diff_b_c'] = frame['b_cost'] - frame['c_cost']

frame['diff_a_d'] = frame['a_cost'] - frame['d_cost']
frame['diff_b_d'] = frame['b_cost'] - frame['d_cost']

frame.head()

Wall time: 359 ms


Unnamed: 0,is_aa_st,is_bb_st,is_cc_st,is_dd_st,a_cost,b_cost,c_cost,d_cost,less_equal_both_c,less_equal_both_d,less_both_c,less_both_d,diff_a_c,diff_b_c,diff_a_d,diff_b_d
0,False,False,False,False,2728,2700,2578,2578,True,True,True,True,150,122,150,122
1,False,False,False,False,2740,2709,2729,2684,False,True,False,True,11,-20,56,25
2,False,False,False,False,2754,2780,2738,2778,True,False,True,False,16,42,-24,2
3,False,False,False,False,2754,2759,2688,2683,True,True,True,True,66,71,71,76
4,False,False,False,False,2786,2729,2777,2708,False,True,False,True,9,-48,78,21


In [7]:

# all solutions were classified as steiner tree

# np.all(frame['is_aa_st'])
# np.all(frame['is_bb_st'])
# np.all(frame['is_cc_st'])

frame[['is_aa_st',	'is_bb_st',	'is_cc_st', 'is_dd_st']].describe()

Unnamed: 0,is_aa_st,is_bb_st,is_cc_st,is_dd_st
count,50,50,50,50
unique,1,1,1,1
top,False,False,False,False
freq,50,50,50,50


In [8]:
frame[['a_cost', 'b_cost','c_cost', 'd_cost']].describe()

Unnamed: 0,a_cost,b_cost,c_cost,d_cost
count,50.0,50.0,50.0,50.0
mean,2746.52,2743.4,2679.74,2679.8
std,27.957139,25.89697,63.973786,57.78567
min,2687.0,2675.0,2576.0,2576.0
25%,2730.25,2728.25,2622.25,2624.25
50%,2743.0,2742.5,2683.5,2689.5
75%,2759.5,2761.0,2726.5,2724.0
max,2803.0,2797.0,2788.0,2797.0


In [9]:
frame[['diff_a_c', 'diff_b_c']].describe()

Unnamed: 0,diff_a_c,diff_b_c
count,50.0,50.0
mean,38.24,42.74
std,44.814611,73.784521
min,0.0,-85.0
25%,8.25,-9.25
50%,18.0,40.0
75%,46.5,72.0
max,159.0,220.0


In [10]:
frame[['diff_a_d', 'diff_b_d']].describe()

Unnamed: 0,diff_a_d,diff_b_d
count,50.0,50.0
mean,34.88,39.38
std,61.619517,47.450894
min,-94.0,0.0
25%,-21.25,8.25
50%,39.5,20.0
75%,79.5,60.75
max,165.0,168.0


In [9]:
frame2 = pd.DataFrame(data2)

frame2.head()

Unnamed: 0,has_cycle,all_terminals_in,all_leaves_are_terminals,all_edges_are_reliable,graph_is_connected,origem,is_st
0,False,True,False,True,True,a,False
1,False,True,False,True,True,b,False
2,False,True,False,True,True,c,False
3,False,True,False,True,True,d,False
4,False,True,False,True,True,a,False


In [11]:
frame2[~frame2['is_st']].describe()

Unnamed: 0,has_cycle,all_terminals_in,all_leaves_are_terminals,all_edges_are_reliable,graph_is_connected,origem,is_st
count,200,200,200,200,200,200,200
unique,1,1,1,1,1,4,1
top,False,True,False,True,True,d,False
freq,200,200,200,200,200,50,200
