# Upload Knowledge Graph to Bostrom network

In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from IPython.display import HTML, display

from config import ipfs_client
from src.utils_cyber import create_cls

/Users/sergenedashkovsky/Library/Python/3.8/lib/python/site-packages/ipfshttpclient/client/__init__.py:75: VersionMismatch: Unsupported daemon version '0.10.0' (not in range: 0.5.0 ≤ … < 0.9.0)


## Read and Preprocess Ontology Data

In [2]:
def read_and_preprocess_ontology(ontology_file_name: str, show_aggregations: bool = True):
    _ontology_df = pd.read_csv(ontology_file_name, sep=';')
    display(HTML(_ontology_df.head(50).to_html(index=False)))

    _ontology_df["subject"] = _ontology_df["subject"].map(
        lambda x: x.replace('_', ' ').lower().split(' (')[0].split(',')[0])
    _ontology_df["object"] = _ontology_df["object"].map(
        lambda x: x.replace('_', ' ').lower().split(' (')[0].split(',')[0])
    _subject_object_list = set(list(_ontology_df["subject"]) + list(_ontology_df["object"]))
    print(f'Number of items: {len(_subject_object_list):>,}')
    print(f'Number of links: {len(_ontology_df):>,}')

    if show_aggregations:
        print('\nTop subjects')
        display(HTML(
            _ontology_df.groupby('subject')['object']
                .agg(np.count_nonzero).reset_index().sort_values('object', ascending=False).head(10)
                .to_html(index=False)))

        print('\nTop objects')
        display(HTML(
            _ontology_df.groupby('object')['subject']
                .agg(np.count_nonzero).reset_index().sort_values('subject', ascending=False).head(10)
                .to_html(index=False)))

    print('\nUpload CIDs to IPFS')
    ipfs_hashes_dict = {name: ipfs_client.add_str(name) for name in tqdm(_subject_object_list)}
    _ontology_df.loc[:, 'subject_ipfs_hash'] = _ontology_df['subject'].map(lambda x: ipfs_hashes_dict[x])
    _ontology_df.loc[:, 'object_ipfs_hash'] = _ontology_df['object'].map(lambda x: ipfs_hashes_dict[x])

    return _ontology_df

In [3]:
ontology_df = read_and_preprocess_ontology(ontology_file_name='data/caligraph-ontology.nt.csv')
ontology_df.to_csv('data/dbpedia_caligraph_relations_for_upload.csv')

subject,object
1970s_event,Class
1970s_event,Event
1980s_event,Class
1980s_event,Event
1990s_event,Class
1990s_event,Event
19th-century_maritime_incident,Class
19th-century_maritime_incident,Maritime_incident
19th-century_person,2nd-millennium_person
19th-century_person,Class


Number of items: 1,349
Number of links: 6,130

Top subjects


subject,object
artist,50
badminton player,47
ambassador,45
academic journal,43
body of water,42
airport,42
airline,41
bank,40
album,39
baseball player,38



Top objects


object,subject
class,1206
work,189
unit of work,187
topical concept,183
time period,176
specie,157
sports season,157
thing,127
results of a sport competition,112
person function,111



Upload CIDs to IPFS


  0%|          | 0/1349 [00:00<?, ?it/s]

## Create cyberlinks

In [4]:
link_candidates = ontology_df[['subject_ipfs_hash', 'object_ipfs_hash']].values.tolist()
link_candidates = link_candidates[80:400]
link_candidates[:5]

[['QmTYZZkRC8Kh4Uj3NhbMTbsHtn4GUFnKfMfKVU5BiMJHob',
  'QmPeLePmoiUViajVjay7CiqDXRL8BcYHXnW99yBKN1uLi2'],
 ['QmTYZZkRC8Kh4Uj3NhbMTbsHtn4GUFnKfMfKVU5BiMJHob',
  'QmUveVbC1JdAJE4W53BwGFLJ5e2VkMKX5HB8MZ7TLBKkEy'],
 ['QmTYZZkRC8Kh4Uj3NhbMTbsHtn4GUFnKfMfKVU5BiMJHob',
  'QmPWVLevpzEki6bcBF7bEk7rrNRhFSnBXD4imtTYM2pAgA'],
 ['QmTYZZkRC8Kh4Uj3NhbMTbsHtn4GUFnKfMfKVU5BiMJHob',
  'QmSpPA6mVdwq41xLBtgoHsi1ZrTn7DSi8kvNX9crhzDRFm'],
 ['QmTYZZkRC8Kh4Uj3NhbMTbsHtn4GUFnKfMfKVU5BiMJHob',
  'QmWWcKFcfXMNQGGUrymVMm973SudGmeLGeVsZGCoQQaG4r']]

In [5]:
NUMBER_CYBERLINK_IN_TX = 50

link_candidates_chunks = [link_candidates[i: i+ NUMBER_CYBERLINK_IN_TX] for i in range(0, len(link_candidates), NUMBER_CYBERLINK_IN_TX)]

res = []
for link_candidates_chunk in link_candidates_chunks:
    res.append(create_cls(link_candidates=link_candidates_chunk, print_message=True))


{'code': 0,
 'codespace': '',
 'data': None,
 'gas_used': 1311249,
 'gas_wanted': 2200000,
 'height': 2366895,
 'info': None,
 'logs': [{'events': [{'attributes': [{'key': 'particleFrom',
                                       'value': 'QmTYZZkRC8Kh4Uj3NhbMTbsHtn4GUFnKfMfKVU5BiMJHob'},
                                      {'key': 'particleTo',
                                       'value': 'QmPeLePmoiUViajVjay7CiqDXRL8BcYHXnW99yBKN1uLi2'},
                                      {'key': 'neuron',
                                       'value': 'bostrom1cj8j6pc3nda8v708j3s4a6gq2jrnue7j857m9t'}],
                       'type': 'cyberlink'},
                      {'attributes': [{'key': 'action',
                                       'value': '/cyber.graph.v1beta1.MsgCyberlink'},
                                      {'key': 'module', 'value': 'graph'},
                                      {'key': 'sender',
                                       'value': 'bostrom1cj8j6pc3nda8v708j3s4a6g