In [1]:
import kiara_plugin.jupyter
dir(kiara_plugin.jupyter)

['KIARA_METADATA',
 '__author__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__email__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'defaults',
 'ensure_kiara_plugins',
 'get_version',
 'os',
 'utils']

In [2]:
try:
    from kiara_plugin.jupyter import ensure_kiara_plugins
except:
    import sys
    print("Installing 'kiara_plugin.jupyter'...")
    !{sys.executable} -m pip install -q kiara_plugin.jupyter
    from kiara_plugin.jupyter import ensure_kiara_plugins

# select the kiara plugin packages you are interested in, available currently:
#
# - kiara_plugin.tabular
# - kiara_plugin.onboarding
# - kiara_plugin.network_analysis
# - kiara_plugin.language_processing

ensure_kiara_plugins("kiara_plugin.onboarding", "kiara_plugin.tabular")

Output()

In [3]:
from kiara import KiaraAPI

# create the default api instance
api = KiaraAPI.instance()


In [4]:
# we can use any of the functions of the api instance
# documentation [here](https://dharpa.org/kiara/latest/reference/kiara/interfaces/python_api/__init__/#kiara.interfaces.python_api.KiaraAPI)

api.list_operation_ids()

['create.database.from.file',
 'create.database.from.file_bundle',
 'create.database.from.table',
 'create.table.from.file',
 'create.table.from.file_bundle',
 'date.check_range',
 'date.extract_from_string',
 'download.file',
 'download.file_bundle',
 'export.file.as.file',
 'export.table.as.csv_file',
 'extract.date_array.from.table',
 'file_bundle.pick.file',
 'file_bundle.pick.sub_folder',
 'import.database.from.local_file_path',
 'import.file',
 'import.file_bundle',
 'import.table.from.local_file_path',
 'import.table.from.local_folder_path',
 'jupyter.example',
 'list.contains',
 'logic.and',
 'logic.nand',
 'logic.nor',
 'logic.not',
 'logic.or',
 'logic.xor',
 'onboard.zenodo_record',
 'parse.date_array',
 'query.database',
 'query.table',
 'string_filter.tokens',
 'table.pick.column',
 'table_filter.drop_columns',
 'table_filter.select_columns',
 'table_filter.select_rows']

In [5]:
# most likely, we'll want to run an operation, let's pick one

op_id = "download.file"

# now, let's check information about the operation, esp. what input(s) it requires
api.retrieve_operation_info(op_id)


In [6]:
# ok, we need an 'url' value, and an (optional) file_name:

inputs = {
    "url": "https://raw.githubusercontent.com/DHARPA-Project/kiara.examples/main/examples/data/journals/JournalNodes1902.csv",
    "file_name": "JournalNodes1902.csv"
}

# now, run it
results = api.run_job(op_id, inputs=inputs)
# ...and print a result preview
results

In [7]:
# this looks good, we are really only interested in the 'file' output, let's store it in a variable for later:

downloaded_file = results["file"]

# now we want to do something with that file, specifically, convert it into a table
# the operation id for this is:

op_id = "create.table.from.file"

# and let us check information about the operation, esp. its input(s)
api.retrieve_operation_info(op_id)


In [8]:
# same procedure as before, we assemble the input(s), and run a kiara job

inputs = {
    "file": downloaded_file
}

# now, run it
results = api.run_job(op_id, inputs=inputs)
# ...and print a result preview
results


In [9]:
# we are really only interested in the 'table' output field, let's get the 'Value' object for it

table_val = results["table"]
table_val  # this displays some general (mostly internal) information and metadata for the value

In [10]:
# maybe we want to run a sql query against the table, let's find a good operation

api.list_operation_ids("query")

['query.database', 'query.table']

In [11]:
# 'query.table' looks good, let's see...

api.retrieve_operation_info('query.table')

In [12]:
# ok, given this information, we only need to provide the 'table' and 'query', and reference the string data in the sql...

inputs = {
    "table": table_val,
    "query": "SELECT Label, City from data where City like 'Berlin'"
}
results = api.run_job("query.table", inputs=inputs)
results

In [13]:
# let's get the value of the 'query_result' field...

query_result = results['query_result']

# and have a look at its lineage (even though it's not particularly interesting yet)
query_result.lineage


In [14]:
# and some (type specific) properties...

query_result.property_values


In [15]:
# the value is of type 'table', we can access the data as a Python object

query_table_data = query_result.data

# we can check out what (Python) type is used under the hood
type(query_table_data)

kiara_plugin.tabular.models.table.KiaraTable

In [16]:
# and if we look up documentation about this type, we can use some of its
# utility methods etc. For this type, the documentation is here: https://dharpa.org/kiara_plugin.tabular/latest/reference/kiara_plugin/tabular/models/table/#kiara_plugin.tabular.models.table.KiaraTable
#
# TODO: implement discoverability for these types of things, so links to documentation can be accessd
# directly via the Python object

# for example, we can get the table as a Pandas dataframe:

df = query_table_data.to_pandas()
df


Unnamed: 0,Label,City
0,Die Krankenpflege,Berlin
1,Die deutsche Klinik am Eingange des zwanzigste...,Berlin
2,Therapeutische Monatshefte,Berlin
3,Allgemeine Zeitschrift für Psychiatrie,Berlin
4,Archiv für Psychiatrie und Nervenkrankheiten,Berlin
5,Berliner klinische Wochenschrift,Berlin
6,Charité Annalen,Berlin
7,Monatsschrift für Psychiatrie und Neurologie,Berlin
8,Virchows Archiv,Berlin
9,Zeitschrift für pädagogische Psychologie und P...,Berlin
