In [1]:
from presentation_utils import *
from DoD import view_4c_analysis_baseline as v4c
import glob
import pprint

In [2]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

<IPython.core.display.Javascript object>

# Config

In [3]:
# directory storing the views
dir_path = "./presentation_demo_views/"
# max size of candidate key
candidate_key_size = 2
# sample rows to present
sample_size = 5
# exploration / exploitation (choose next pair to present among top_k)
top_k = 10
# epsilon-greedy
epsilon = 0.1

max_num_interactions = 100

# Run 4C

In [4]:
# Run 4C
print("Running 4C...")

compatible_groups, contained_groups, complementary_groups, contradictory_groups, all_pair_contr_compl = \
    v4c.main(dir_path, candidate_key_size)

print("Compatible groups:")
for group in compatible_groups:
    print(group)

print("Contained groups:")
for group in contained_groups:
    print(group)

print("Contradictory groups:")
for path1, candidate_key_tuple, key_value_tuples, path2 in contradictory_groups:
    print(str(list(candidate_key_tuple)) + ": " + path1 + " - " + path2)

print("Complementary groups:")
for path1, path2, candidate_key_tuple, _, _ in complementary_groups:
    print(str(list(candidate_key_tuple)) + ": " + path1 + " - " + path2)

Running 4C...
Found 11 valid tables
View candidates classify into 1 groups based on schema

Num elements with schema -10669850065081178778 is: 11


100%|██████████| 33/33 [00:01<00:00, 18.35it/s]

Compatible groups:
['./presentation_demo_views/view_0', './presentation_demo_views/view_10']
Contained groups:
['./presentation_demo_views/view_9', './presentation_demo_views/view_7', './presentation_demo_views/view_1', './presentation_demo_views/view_3']
['./presentation_demo_views/view_8', './presentation_demo_views/view_6', './presentation_demo_views/view_0', './presentation_demo_views/view_2']
['./presentation_demo_views/view_7', './presentation_demo_views/view_1', './presentation_demo_views/view_3']
['./presentation_demo_views/view_6', './presentation_demo_views/view_0', './presentation_demo_views/view_2']
['./presentation_demo_views/view_4', './presentation_demo_views/view_0']
['./presentation_demo_views/view_5', './presentation_demo_views/view_1']
Contradictory groups:
['Building Name']: ./presentation_demo_views/view_9 - ./presentation_demo_views/view_8
['Building Name']: ./presentation_demo_views/view_7 - ./presentation_demo_views/view_8
['Building Name']: ./presentation_demo_




# Pruning 4C views
## Remove identical views and keep the contained view with the largest cardinality

In [5]:
view_files = glob.glob(dir_path + "/view_*")
print("Number of views: ", len(view_files))

view_files = prune_compatible_views(view_files, compatible_groups)
print("After pruning compatible views: ", len(view_files))

view_files = prune_contained_views(view_files, contained_groups)
print("After pruning contained views: ", len(view_files))

Number of views:  11
After pruning compatible views:  10
After pruning contained views:  6


# Pre-processing and generating sample rows to present

In [6]:
print("Pre-processing...")

contr_or_compl_view_pairs, non_contr_or_compl_views, row_to_path_dict = preprocess(view_files, all_pair_contr_compl, sample_size)

  6%|▌         | 2/33 [00:00<00:02, 13.86it/s]

Pre-processing...


100%|██████████| 33/33 [00:00<00:00, 45.43it/s]


# Actual presentation

In [7]:
%gui asyncio
# Using asynchronous widgets. Requires ipykernel 4.7 or later
# pip install ipython ipykernel --upgrade

# async
task = present_async(view_files, contr_or_compl_view_pairs, non_contr_or_compl_views, row_to_path_dict,
                     top_k, epsilon, max_num_interactions, sample_size)

# sync
# final_view_scores, num_interactions = present(view_files, contr_or_compl_view_pairs, non_contr_or_compl_views, row_to_path_dict,
#                                               top_k, epsilon, max_num_interactions, sample_size)

Output()

# Final view scores

In [8]:
# async
await task
final_view_scores, num_interactions = task.result()

print("Final view scores:")
pprint.pprint(final_view_scores)

print("Number of interactions = " + str(num_interactions))

Final view scores:
[('./presentation_demo_views/view_7', 14),
 ('./presentation_demo_views/view_9', 10),
 ('./presentation_demo_views/view_5', 4),
 ('./presentation_demo_views/view_8', 3),
 ('./presentation_demo_views/view_4', 2),
 ('./presentation_demo_views/view_6', 1)]
Number of interactions = 8
