In [1]:
from knowledgerepr import fieldnetwork
from modelstore.elasticstore import StoreHandler
from algebra import API
from qbe_module.column_selection import ColumnSelection
from qbe_module.query_by_example import ExampleColumn, QueryByExample
from qbe_module.materializer import Materializer


In [2]:
graph_path = '/home/cc/advw_graph_02/'
store_client = StoreHandler()
network = fieldnetwork.deserialize_network(graph_path)
aurum_api = API(network=network, store_client=store_client)
demo = Demo(aurum_api)

In [3]:
demo.qbe_interface()

HBox(children=(Button(description='Add Column', style=ButtonStyle()), Button(description='Remove Column', styl…

Output()

Button(description='Confirm', style=ButtonStyle())

In [4]:
column_selection = ColumnSelection(aurum_api)
candidate_columns = column_selection.column_retreival(attr="", examples=['USD', 'CNY'])
for col in candidate_columns.values():
    print(col.tbl_name, col.attr_name, col.examples_set)

CurrencyRate.csv ToCurrencyCode {'CNY', 'USD'}
Password.csv PasswordSalt {'CNY'}
Currency.csv CurrencyCode {'CNY', 'USD'}
CountryRegionCurrency.csv CurrencyCode {'CNY', 'USD'}
CurrencyRate.csv FromCurrencyCode {'USD'}
Password.csv PasswordHash {'USD'}


In [5]:
example_columns = [ExampleColumn(attr='', examples=['United States', 'China']), 
                   ExampleColumn(attr='', examples=['USD', 'CNY']),
                   ExampleColumn(attr='', examples=['US Dollar', 'Yuan Renminbi'])]
qbe = QueryByExample(aurum_api)
candidate_list = qbe.find_candidate_columns(example_columns)
for candidate in candidate_list:
    print('00000')
    for x in candidate:
        print(x.tbl_name, x.attr_name)

00000
CountryRegion.csv Name
00000
CurrencyRate.csv ToCurrencyCode
Password.csv PasswordSalt
Currency.csv CurrencyCode
CountryRegionCurrency.csv CurrencyCode
CurrencyRate.csv FromCurrencyCode
Password.csv PasswordHash
00000
Currency.csv Name


In [6]:
join_graphs = qbe.find_join_graphs_between_candidate_columns(candidate_list)
print(join_graphs)

((0, 1), (0, 2))
{(0, 1): defaultdict(<class 'list'>, {'CountryRegion.csv': [<ver_utils.join_path_search.JoinPath object at 0x7faf7e5c0e10>, <ver_utils.join_path_search.JoinPath object at 0x7faf7e5c0b70>]}), (1, 0): defaultdict(<class 'list'>, {'CountryRegion.csv': [<ver_utils.join_path_search.JoinPath object at 0x7faf7e5c0e10>, <ver_utils.join_path_search.JoinPath object at 0x7faf7e5c0b70>]}), (0, 2): defaultdict(<class 'list'>, {'CountryRegion.csv': [<ver_utils.join_path_search.JoinPath object at 0x7faf7e76cf28>]}), (2, 0): defaultdict(<class 'list'>, {'CountryRegion.csv': [<ver_utils.join_path_search.JoinPath object at 0x7faf7e76cf28>]})}
[(1, 0), (0, 2)]
[(1, 0), (0, 2)]
[(1, 0), (0, 2)]
((0, 1), (1, 2))
{(0, 1): defaultdict(<class 'list'>, {'CountryRegion.csv': [<ver_utils.join_path_search.JoinPath object at 0x7faf7e5c0e10>, <ver_utils.join_path_search.JoinPath object at 0x7faf7e5c0b70>]}), (1, 0): defaultdict(<class 'list'>, {'CountryRegion.csv': [<ver_utils.join_path_search.Join

In [7]:
for join_graph in join_graphs:
    print("==============")
    for join_path in join_graph.paths:
        print(join_path.to_str())
        print("---------------")

CountryRegion.csv.CountryRegionCode JOIN CountryRegionCurrency.csv.CountryRegionCode
---------------
CountryRegionCurrency.csv.CurrencyCode JOIN Currency.csv.CurrencyCode
---------------
CountryRegion.csv.CountryRegionCode JOIN CountryRegionCurrency.csv.CountryRegionCode
CountryRegionCurrency.csv.CurrencyCode JOIN Currency.csv.CurrencyCode
---------------
Currency.csv.CurrencyCode JOIN Currency.csv.CurrencyCode
---------------
CountryRegion.csv.CountryRegionCode JOIN CountryRegionCurrency.csv.CountryRegionCode
CountryRegionCurrency.csv.CurrencyCode JOIN Currency.csv.CurrencyCode
---------------
Currency.csv.CurrencyCode JOIN Currency.csv.CurrencyCode
---------------


In [9]:
materializer = Materializer(table_path='/home/cc/adventureWork/', sample_size=1000)
materializer.materialize_join_graph(join_graphs[0])

{(0, 1): <ver_utils.join_path_search.JoinPath object at 0x7faf7e593198>, (1, 2): <ver_utils.join_path_search.JoinPath object at 0x7faf7e5937b8>}
defaultdict(<class 'set'>, {'CountryRegion.csv': {'Name'}, 'CountryRegionCurrency.csv': {'CurrencyCode'}, 'Currency.csv': {'Name'}})


Unnamed: 0,Name,CurrencyCode,Name.1
0,Emirati Dirham,AED,Emirati Dirham
1,Argentine Peso,ARS,Argentine Peso
2,Shilling,ATS,Shilling
3,EURO,EUR,EURO
4,EURO,EUR,EURO
...,...,...,...
104,Uruguayan Peso,UYU,Uruguayan Peso
105,Bolivar,VEB,Bolivar
106,Dong,VND,Dong
107,Rand,ZAR,Rand


In [8]:
join_paths = qbe.find_joins_between_candidate_columns(candidate_list)

<ver_utils.column.Column object at 0x7f446754dba8>
[<ver_utils.join_path_search.JoinPath object at 0x7f44675b3eb8>, <ver_utils.join_path_search.JoinPath object at 0x7f44675b3e48>]


In [9]:
for join_path in join_paths:
    print(join_path.to_str())

CountryRegion.csv.CountryRegionCode JOIN CountryRegionCurrency.csv.CountryRegionCode
CountryRegion.csv.CountryRegionCode JOIN CountryRegionCurrency.csv.CountryRegionCode
CountryRegionCurrency.csv.CurrencyCode JOIN Currency.csv.CurrencyCode


In [None]:
import ddapi
from api.apiutils import DRS, Relation
hit = aurum_api._nid_to_hit('1357802409')
print(hit)
dd_api = ddapi.DDAPI(network)
drs = dd_api.drs_from_hit(hit)
res = aurum_api.neighbors(hit, Relation.CONTENT_SIM)
for x in res:
    print(x)