-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_beam.py
80 lines (59 loc) · 4.35 KB
/
prepare_beam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import numpy as np
import pandas as pd
import desc_based_selection as dbs
import cover_based_selection as cbs
def collect_beam_and_candidate_result_set(candidate_result_set=None, cq_satisfied=None, qm=None, beam_search_params=None, data_size=None, wcs_params=None):
n_redun_descs = None
if len(cq_satisfied) > 0:
if wcs_params['run']:
# follow procedure using 1 quality measure
# we apply description based selection and cover based selection to prevent issues with redundancy
candidate_result_set, candidate_queue, n_redun_descs = prepare_beam_and_candidate_result_set_redundancy_techniques(candidate_result_set=candidate_result_set,
cq_satisfied=cq_satisfied,
qm=qm, beam_search_params=beam_search_params,
data_size=data_size, wcs_params=wcs_params)
else:
candidate_result_set, candidate_queue = prepare_beam_and_candidate_result_set_simple(candidate_result_set=candidate_result_set,
cq_satisfied=cq_satisfied,
qm=qm, beam_search_params=beam_search_params)
else:
candidate_queue = []
return candidate_result_set, candidate_queue, n_redun_descs
def prepare_beam_and_candidate_result_set_simple(candidate_result_set=None, cq_satisfied=None, qm=None, beam_search_params=None):
# beam
beam = sorted(cq_satisfied, key = lambda i: i['qualities'][qm], reverse=True)
candidate_queue = beam[0:beam_search_params['w']]
#print(len(candidate_queue))
#for candidate in candidate_queue:
# print(candidate['description'])
# print(candidate['qualities']['phiwd'])
# result set
candidate_result_set.append(candidate_queue) # creates a nested list
#print(len(candidate_result_set))
candidate_result_set_unpacked = [item for sublist in candidate_result_set for item in sublist] # unlist alle elements
candidate_result_set_ordered = sorted(candidate_result_set_unpacked, key = lambda i: i['qualities'][qm], reverse=True) # sort each description according to the sort var
#for candidate in candidate_result_set_ordered:
# print(candidate['description'])
rs_candidates = candidate_result_set_ordered[0:beam_search_params['q']]
#print(len(rs_candidates))
return [rs_candidates], candidate_queue
def prepare_beam_and_candidate_result_set_redundancy_techniques(candidate_result_set=None, cq_satisfied=None, qm=None, beam_search_params=None, data_size=None, wcs_params=None):
# sort cq_satisfied on quality value
cq_sorted = sorted(cq_satisfied, key = lambda i: i['qualities'][qm], reverse=True)
# apply description-based selection
# difficult to know when to stop
# only from 2nd level and onwards
print('description-based selection')
candidates, n_redun_descs = dbs.remove_redundant_descriptions(descs=cq_sorted, stop_number=wcs_params['stop_number_description_selection'],
qm=qm, beam_search_params=beam_search_params)
# apply cover-based selection
print('cover-based selection')
candidate_queue = cbs.select_using_weighted_coverage(candidates=candidates, stop_number=beam_search_params['w'],
qm=qm, data_size=data_size, wcs_params=wcs_params)
# we have the same procedure for the result set
# but we only do it at the end of the entire beam search
# in every level of the beam search we only select the first q of the beam
selected_for_result_list = candidate_queue[0:beam_search_params['q']] # q has to be smaller than w
candidate_result_set.append(selected_for_result_list) # creates a nested list
rs_candidates = [item for sublist in candidate_result_set for item in sublist] # unlist alle elements
return [rs_candidates], candidate_queue, n_redun_descs