In [1]:
# ! pip install kgforge

In [2]:
from kgforge.core import KnowledgeGraphForge

In [3]:
forge = KnowledgeGraphForge("../../configurations/demo-forge.yml")

## Imports

In [4]:
from kgforge.core import Resource

In [5]:
from kgforge.specializations.resources import Dataset

In [6]:
import pandas as pd

## Creation with files

In [7]:
! ls -p ../../data | egrep -v /$

associations.tsv
persons.csv


In [8]:
dataset = Dataset(forge, name="Interesting files")

In [9]:
dataset.add_files("../../data/")

In [10]:
print(dataset)

{
    type: Dataset
    hasPart: LazyAction(operation=Store.upload, args=['../../data/'])
    name: Interesting files
}


In [11]:
# forge.register(dataset)

In [12]:
# print(dataset)

## Creation with resources

In [13]:
distribution_1 = forge.attach("../../data/associations.tsv")

In [14]:
distribution_2 = forge.attach("../../data/persons.csv")

In [15]:
jane = Resource(type="Person", name="Jane Doe", distribution=distribution_1)

In [16]:
john = Resource(type="Person", name="John Smith", distribution=distribution_2)

In [17]:
persons = [jane, john]

In [18]:
# forge.register(persons)

In [19]:
dataset = Dataset(forge, name="Interesting people")

In [20]:
# dataset.add_parts(persons)

In [21]:
# print(dataset)

In [22]:
# forge.register(dataset)

In [23]:
# dataset.download("parts", "./downloaded/")

In [24]:
# ! ls ./downloaded

## Creation from a dataframe

See notebook `DataFrame IO.ipynb` for details on conversions of instances of Resource from a Pandas DataFrame.

### basics

In [25]:
dataframe = pd.read_csv("../../data/persons.csv")

In [26]:
dataframe

Unnamed: 0,type,name,distribution
0,Person,Albert Einstein,../../data/scientists-database/albert_einstein...
1,Person,Peter Higgs,../../data/scientists-database/peter_higgs.txt


In [27]:
persons = forge.from_dataframe(dataframe)

In [28]:
forge.register(persons)

<count> 2
<action> _register_one
<succeeded> True


In [29]:
dataset = Dataset(forge, name="Interesting people")

In [30]:
dataset.add_parts(persons)

In [31]:
print(dataset)

{
    type: Dataset
    hasPart:
    [
        {
            id: 54ce5333-7662-44dd-a975-7aaa33a48e40_version=1
            type: Person
            distribution: ../../data/scientists-database/albert_einstein.txt
            name: Albert Einstein
        }
        {
            id: d5ad87cc-9325-4545-920f-5e27010b72b0_version=1
            type: Person
            distribution: ../../data/scientists-database/peter_higgs.txt
            name: Peter Higgs
        }
    ]
    name: Interesting people
}


In [32]:
forge.register(dataset)

<action> _register_one
<succeeded> True


### advanced

In [33]:
dataframe = pd.read_csv("../../data/associations.tsv", sep="\t")

In [34]:
dataframe

Unnamed: 0,id,type,agent__type,agent__name,agent__gender__id,agent__gender__type,agent__gender__label,distribution
0,https://kg.example.ch/associations/15,Association,Person,Albert Einstein,http://purl.obolibrary.org/obo/PATO_0000384,LabeledOntologyEntity,male,./../data/scientists-database/albert_einstein.txt
1,(missing),Association,Person,Peter Higgs,http://purl.obolibrary.org/obo/PATO_0000384,LabeledOntologyEntity,male,../../data/scientists-database/peter_higgs.txt


In [35]:
dataframe["distribution"] = dataframe["distribution"].map(lambda x: forge.attach(x))

In [36]:
associations = forge.from_dataframe(dataframe, na="(missing)", nesting="__")

In [37]:
# forge.register(associations)

In [38]:
# dataset = Dataset(forge, name="Interesting associations")

In [39]:
# dataset.add_parts(associations)

In [40]:
# print(dataset)

In [41]:
# forge.register(dataset)