In [1]:
import numpy as np 
import pandas as pd
from os import path
from random import choice, shuffle

from ga4stpg.graph import Graph, ReaderORLibrary
from ga4stpg.graph.steiner import (prunning_mst, shortest_path,
                            shortest_path_origin_prim,
                            prunning_kruskal_mst,
                            shortest_path_with_origin)

from ga4stpg.graph.util import (is_steiner_tree, 
                                has_cycle,
                                gg_total_weight)
from ga4stpg.graph.algorithms import kruskal, prim

In [2]:
# input
dataset_file = 'steinc5.txt'

# output
csv_output = 'resultado.csv'
graphs_output = 'grafos.pickle'

In [3]:
file = path.join('..', 'datasets','ORLibrary', dataset_file)

assert path.exists(file)

reader = ReaderORLibrary()

stpg = reader.parser(file)

In [4]:
from ga4stpg.tree.prunningcrossover import PrunningMSTCrossover
from ga4stpg.tree.evaluation import EvaluateTreeGraph

In [5]:
crossover = PrunningMSTCrossover(stpg)
evaluator = EvaluateTreeGraph(stpg)

In [6]:
vertices = list(stpg.graph.vertices)

shuffle(vertices)

print(vertices[:10],' ...')

v = vertices.pop()

aa, aa_cost = shortest_path_with_origin(stpg.graph, v, stpg.terminals)

r, test = is_steiner_tree(aa, stpg)

(r, test)

[165, 359, 450, 370, 479, 179, 454, 118, 480, 147]  ...


(True,
 {'has_cycle': False,
  'all_terminals_in': True,
  'all_leaves_are_terminals': True,
  'all_edges_are_reliable': True,
  'graph_is_connected': True})

In [7]:
%%time

data = list()

for _ in range(200):
    v = vertices.pop()
    aa, aa_cost = prunning_mst(stpg.graph, v, stpg.terminals)
    is_aa_st, _ = is_steiner_tree(aa, stpg)
    
    u = vertices.pop()
    bb, bb_cost = prunning_mst(stpg.graph, u, stpg.terminals)
    is_bb_st, _ = is_steiner_tree(bb, stpg)
    
    child = crossover(aa, bb)
    cc_cost, _  = evaluator(child)
    is_cc_st, _ = is_steiner_tree(child, stpg)
    
    data.append([v, u, is_aa_st, is_bb_st, is_cc_st, aa_cost, bb_cost, cc_cost])

Wall time: 11.4 s


In [8]:
columns_name = ['start_a', 
                'start_b', 
                'is_aa_st', 
                'is_bb_st', 
                'is_cc_st' ,
                'a_cost', 
                'b_cost', 
                'c_cost']
                
frame = pd.DataFrame(data, columns=columns_name)

In [9]:
frame['less_both'] = (frame['c_cost'] < frame['a_cost']) & (frame['c_cost'] < frame['b_cost'])

In [10]:
frame['less_equal_both'] = (frame['c_cost'] <= frame['a_cost']) & (frame['c_cost'] <= frame['b_cost'])

In [11]:
frame['less_a'] = (frame['c_cost'] <= frame['a_cost'])

In [12]:
frame['less_b'] = (frame['c_cost'] <= frame['b_cost'])

In [13]:
frame.head()

Unnamed: 0,start_a,start_b,is_aa_st,is_bb_st,is_cc_st,a_cost,b_cost,c_cost,less_both,less_equal_both,less_a,less_b
0,257,175,True,True,True,1637,1638,1639,False,False,False,False
1,427,110,True,True,True,1637,1637,1637,False,True,True,True
2,362,391,True,True,True,1636,1638,1634,True,True,True,True
3,325,442,True,True,True,1638,1638,1634,True,True,True,True
4,244,206,True,True,True,1634,1636,1634,False,True,True,True


In [14]:
frame[['is_aa_st', 'is_bb_st', 'is_cc_st']].describe()

Unnamed: 0,is_aa_st,is_bb_st,is_cc_st
count,200,200,200
unique,1,1,1
top,True,True,True
freq,200,200,200


In [15]:
frame[['a_cost', 'b_cost','c_cost']].min()

a_cost    1630
b_cost    1630
c_cost    1630
dtype: int64

In [16]:
frame[['a_cost', 'b_cost','c_cost']].max()

a_cost    1647
b_cost    1648
c_cost    1640
dtype: int64

In [17]:
frame[['less_both', 'less_a', 'less_b', 'less_equal_both']].describe()

Unnamed: 0,less_both,less_a,less_b,less_equal_both
count,200,200,200,200
unique,2,2,2,2
top,False,True,True,True
freq,177,152,151,106
