In [1]:
import os

# here we try to import a kiara specific (Python) module
# if it is not found, an exception is caught, and we install kiara and a few
# dependencies into the current virtual environment
#
# note: if the base kiara package was already installed, but not any of the
# plugin packages, those might not be installed when doing it like this, but you
# get the idea...
#
# other note: we could just do the `pip install` every time (without the try/catch block),
# but this would feel like a bit of a waste...

try:
    from kiara import KiaraAPI
except:
    import sys
    !{sys.executable} -m pip install -U 'kiara[all_plugins]'


In [2]:
# initiate some defaults

THIS_DIR = os.path.abspath("")
JOURNAL_NODES_FILE = os.path.join(THIS_DIR, "..", "data", "journals", "JournalNodes1902.csv")

In [3]:
# import the kiara api class
from kiara import KiaraAPI

# retrieve the default instance
api = KiaraAPI.instance()
api

In [4]:
# we can use any of the functions of the api instance
# documentation [here](https://dharpa.org/kiara/latest/reference/kiara/interfaces/python_api/__init__/#kiara.interfaces.python_api.KiaraAPI)

api.list_operation_ids()

['create.database.from.file',
 'create.database.from.file_bundle',
 'create.database.from.table',
 'create.network_data.from.files',
 'create.network_data.from.tables',
 'create.stopwords_list',
 'create.table.from.file',
 'create.table.from.file_bundle',
 'date.check_range',
 'date.extract_from_string',
 'download.file',
 'download.file_bundle',
 'export.file.as.file',
 'export.network_data.as.csv_files',
 'export.network_data.as.graphml_file',
 'export.network_data.as.sql_dump',
 'export.network_data.as.sqlite_db',
 'export.table.as.csv_file',
 'extract.date_array.from.table',
 'file_bundle.pick.file',
 'file_bundle.pick.sub_folder',
 'filter.table',
 'generate.LDA.for.tokens_array',
 'import.database.from.local_file_path',
 'import.file',
 'import.file_bundle',
 'import.network_data.from.local_file_paths',
 'import.table.from.local_file_path',
 'import.table.from.local_folder_path',
 'kiara_plugin.my_kiara_module.my_kiara_module.tutorial_module',
 'list.contains',
 'logic.and',
 'lo

In [5]:
# most likely, we'll want to run an operation, let's pick one

op_id = "import.table.from.local_file_path"

# now, let's check information about the operation, esp. what input(s) it requires
api.retrieve_operation_info(op_id)


In [6]:
# ok, we need a 'path' value (which in this case is relative to this notebook file)

inputs = {
    "path": JOURNAL_NODES_FILE
}

# now, run it
results = api.run_job(op_id, inputs=inputs)
# ...and print a result preview
results


In [7]:
# we are really only interested in the 'table' output field, let's get the 'Value' object for it

table_val = results["table"]
table_val  # this displays some general (mostly internal) information and metadata for the value

In [8]:
# maybe we want to run a sql query against the table, let's find a good operation

api.list_operation_ids("query")

['query.database', 'query.table']

In [9]:
# 'query.table' looks good, let's see...

api.retrieve_operation_info('query.table')

In [10]:
# ok, given this information, we only need to provide the 'table' and 'query', and reference the string data in the sql...

inputs = {
    "table": table_val,
    "query": "SELECT Label, City from data where City like 'Berlin'"
}
results = api.run_job("query.table", inputs=inputs)
results

In [11]:
# let's get the value of the 'query_result' field...

query_result = results['query_result']

# and have a look at its lineage (even though it's not particularly interesting yet)
query_result.lineage


In [13]:
# and some (type specific) properties...

query_result.property_values


In [14]:
# the value is of type 'table', we can access the data as a Python object

query_table_data = query_result.data

# we can check out what (Python) type is used under the hood
type(query_table_data)

kiara_plugin.tabular.models.table.KiaraTable

In [15]:
# and if we look up documentation about this type, we can use some of its
# utility methods etc. For this type, the documentation is here: https://dharpa.org/kiara_plugin.tabular/latest/reference/kiara_plugin/tabular/models/table/#kiara_plugin.tabular.models.table.KiaraTable
#
# TODO: implement discoverability for these types of things)

# for example, we can get the table as a Pandas dataframe:

df = query_table_data.to_pandas()
df


Unnamed: 0,Label,City
0,Die Krankenpflege,Berlin
1,Die deutsche Klinik am Eingange des zwanzigste...,Berlin
2,Therapeutische Monatshefte,Berlin
3,Allgemeine Zeitschrift für Psychiatrie,Berlin
4,Archiv für Psychiatrie und Nervenkrankheiten,Berlin
5,Berliner klinische Wochenschrift,Berlin
6,Charité Annalen,Berlin
7,Monatsschrift für Psychiatrie und Neurologie,Berlin
8,Virchows Archiv,Berlin
9,Zeitschrift für pädagogische Psychologie und P...,Berlin
