# Data Importation example with multiple variables and secondary variables
27/07/2021   
Gabriel Besombes    
__Data used__ : GrainSample.csv, extracted from Notations_2020_EPPN-PhenoMaize.xlsx
#     

## Import packages
---

In [2]:
import opensilexClientToolsPython
import pandas as pd
from opensilexClientToolsPython.rest import ApiException
from pprint import pprint

---
#    

## Import csv
---

In [3]:
df = pd.read_csv("./GrainSample.csv", delimiter=";", decimal=",")
df.head()

Unnamed: 0,Tri,Trial_id,X,Y,Plot,Alias,Experiment,Species,Variety,ExperimentModalities,...,Project,Alias_PF,CodeB,GrainSample,Hte,nbk,Poids,Date,PMG,PMG15
0,79,MA20_PM,3,8,10,EPPN_Plot_0010,2020_PhenoMaize,Maize,DKC4590,WW,...,EPPN,73,MA20_PM_10_DKC4590_1,1,21.0,500,163.34,2020-10-14,326.68,303.620235
1,77,MA20_PM,3,10,26,EPPN_Plot_0026,2020_PhenoMaize,Maize,EPPN4_H,WW,...,EPPN,93,MA20_PM_26_EPPN4_H_2,2,17.8,500,142.7,2020-10-14,285.4,275.998588
2,75,MA20_PM,3,12,42,EPPN_Plot_0042,2020_PhenoMaize,Maize,EPPN9_H,WW,...,EPPN,113,MA20_PM_42_EPPN9_H_3,3,20.9,500,153.28,2020-10-14,306.56,285.281129
3,73,MA20_PM,3,14,58,EPPN_Plot_0058,2020_PhenoMaize,Maize,EPPN2_H,WW,...,EPPN,133,MA20_PM_58_EPPN2_H_4,4,19.7,500,136.86,2020-10-14,273.72,258.584894
4,71,MA20_PM,3,16,74,EPPN_Plot_0074,2020_PhenoMaize,Maize,DKC4590,WW,...,EPPN,153,MA20_PM_74_DKC4590_5,5,19.6,500,176.2,2020-10-14,352.4,333.328941


Global look at the data

In [4]:
df.describe()

Unnamed: 0,Tri,X,Y,Plot,Replication,GenotypeNumber,Internal_Id,Alias_PF,GrainSample,Hte,nbk,Poids,PMG,PMG15
count,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0
mean,160.5,5.5,41.5,160.5,2.722222,5.833333,54.5,410.5,54.5,20.659259,500.0,156.456296,312.912593,292.036681
std,69.631003,1.715787,26.138917,94.061175,2.103994,4.750799,31.32092,261.394803,31.32092,1.672093,0.0,11.676797,23.353593,22.007594
min,42.0,3.0,8.0,10.0,0.0,0.0,1.0,73.0,1.0,17.0,500.0,129.8,259.6,238.199529
25%,101.75,4.0,16.0,76.75,1.0,1.0,27.75,155.75,27.75,19.6,500.0,148.3425,296.685,276.368382
50%,160.5,5.5,41.5,160.5,3.0,5.5,54.5,410.5,54.5,20.7,500.0,154.9,309.8,288.746612
75%,219.25,7.0,67.0,244.25,5.0,10.0,81.25,665.25,81.25,21.925,500.0,165.24,330.48,309.596541
max,279.0,8.0,75.0,311.0,6.0,14.0,108.0,748.0,108.0,24.9,500.0,182.72,365.44,338.52


Checking for na

In [5]:
df.isna().any().any()

False

The data looks good from the get-go

---
#    

## Use of the `ApiClient` class
---

Creating instances of :
* `ApiClient` class to connect to OpenSilex
* `VariablesApi` class to create Variables
* `ScientificObjectsApi` class to create Scientific Objects
* `DataApi` class to create Provenances and upload the Data

In [6]:
pythonClient = opensilexClientToolsPython.ApiClient()
pythonClient.connect_to_opensilex_ws(identifier="gabriel.besombes@inrae.fr",
                                     password="NPLdj6gFdnCfDP6",
                                     host="http://138.102.159.37:8081/rest")
var_api_instance = opensilexClientToolsPython.VariablesApi(pythonClient)
data_api_instance = opensilexClientToolsPython.DataApi(pythonClient)
obj_api_instance = opensilexClientToolsPython.ScientificObjectsApi(pythonClient)

---
#   

## Subset of data to test
---

The test is done with only the first five elements and on the __Hte__, __nbk__, __Poids__, __PMG__, __PMG15__ variables. __Plot__ is also kept as the scientific objects.    
Description :
* __Hte__ : Humidity level in %
* __nbk__ : Number of grains in the sample
* __Poids__ : Weight of the sample
* __PMG__ : Thousand seeds weight (TSW) = __Poids__\*1000/__nbk__
* __PMG15__ = __PMG__\*(100-__Hte__)/85

In [7]:
n = 5

In [8]:
df[["Plot", "Hte", "nbk", "Poids", "PMG", "PMG15"]][:n]

Unnamed: 0,Plot,Hte,nbk,Poids,PMG,PMG15
0,10,21.0,500,163.34,326.68,303.620235
1,26,17.8,500,142.7,285.4,275.998588
2,42,20.9,500,153.28,306.56,285.281129
3,58,19.7,500,136.86,273.72,258.584894
4,74,19.6,500,176.2,352.4,333.328941


In [9]:
vars_kept = ["Hte", "nbk", "Poids", "PMG", "PMG15"]

Making sure to use the right data types

In [10]:
df[["Hte", "Poids", "PMG", "PMG15"]] = df[["Hte", "Poids", "PMG", "PMG15"]][:n].astype(float)
df[["nbk"]] = df[["nbk"]].astype(int)
df[["Plot", "Hte", "nbk", "Poids", "PMG", "PMG15"]][:n]

Unnamed: 0,Plot,Hte,nbk,Poids,PMG,PMG15
0,10,21.0,500,163.34,326.68,303.620235
1,26,17.8,500,142.7,285.4,275.998588
2,42,20.9,500,153.28,306.56,285.281129
3,58,19.7,500,136.86,273.72,258.584894
4,74,19.6,500,176.2,352.4,333.328941


In [11]:
df[["Plot", "Hte", "nbk", "Poids", "PMG", "PMG15"]].dtypes

Plot       int64
Hte      float64
nbk        int32
Poids    float64
PMG      float64
PMG15    float64
dtype: object

---
#    

## 1) Creating the Provenances
---

One provenance is created for each variable

`ProvenanceCreationDTO` is used to make the right Json format

In [12]:
pc_dto = opensilexClientToolsPython.ProvenanceCreationDTO
bodies = [pc_dto(name="Grain_sample_{}_prov".format(v)) for v in vars_kept]
pprint(bodies)

[{'description': None,
 'name': 'Grain_sample_Hte_prov',
 'prov_activity': None,
 'prov_agent': None,
 'uri': None},
 {'description': None,
 'name': 'Grain_sample_nbk_prov',
 'prov_activity': None,
 'prov_agent': None,
 'uri': None},
 {'description': None,
 'name': 'Grain_sample_Poids_prov',
 'prov_activity': None,
 'prov_agent': None,
 'uri': None},
 {'description': None,
 'name': 'Grain_sample_PMG_prov',
 'prov_activity': None,
 'prov_agent': None,
 'uri': None},
 {'description': None,
 'name': 'Grain_sample_PMG15_prov',
 'prov_activity': None,
 'prov_agent': None,
 'uri': None}]


The Provenances are then created

In [13]:
res_prov = [
    data_api_instance.create_provenance(body=body)
    for body in bodies
]
pprint(res_prov)

[{'metadata': {'datafiles': ['http://www.phenome-fppn.fr/id/provenance/grain_sample_hte_prov/6'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
               'status': []},
  'result': ['http://www.phenome-fppn.fr/id/provenance/grain_sample_hte_prov/6']},
 {'metadata': {'datafiles': ['http://www.phenome-fppn.fr/id/provenance/grain_sample_nbk_prov/6'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
               'status': []},
  'result': ['http://www.phenome-fppn.fr/id/provenance/grain_sample_nbk_prov/6']},
 {'metadata': {'datafiles': ['http://www.phenome-fppn.fr/id/provenance/grain_sample_poids_prov/6'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
         

The Provenances' URIs are retrieved from `res_prov`

In [14]:
prov_uris = [prov['result'][0] for prov in res_prov]
prov_uris

['http://www.phenome-fppn.fr/id/provenance/grain_sample_hte_prov/6',
 'http://www.phenome-fppn.fr/id/provenance/grain_sample_nbk_prov/6',
 'http://www.phenome-fppn.fr/id/provenance/grain_sample_poids_prov/6',
 'http://www.phenome-fppn.fr/id/provenance/grain_sample_pmg_prov/6',
 'http://www.phenome-fppn.fr/id/provenance/grain_sample_pmg15_prov/6']

---
#   

## 2) Creating the Variables
---

To create the Variables substeps are needed

### Substep 1 : Entity creation

Only one entity needs to be created as all these refer to grain samples

`EntityCreationDTO` is used to make the right format of Json to create an Entity

In [15]:
body = opensilexClientToolsPython.EntityCreationDTO(name="Grain_sample")
pprint(body)

{'broad_match': None,
 'close_match': None,
 'description': None,
 'exact_match': None,
 'name': 'Grain_sample',
 'narrow_match': None,
 'uri': None}


The Entity is then created

In [16]:
res_entity = var_api_instance.create_entity(body=body)
pprint(res_entity)

{'metadata': {'datafiles': ['test:set/variables#variable.entity.grain_sample/6'],
              'pagination': {'currentPage': 0,
                             'pageSize': 0,
                             'totalCount': 0,
                             'totalPages': 0},
              'status': []},
 'result': ['test:set/variables#variable.entity.grain_sample/6']}


The Entity's URI is retrieved from `res_entity`

In [17]:
entity_uri = res_entity['result'][0]
entity_uri

'test:set/variables#variable.entity.grain_sample/6'

### Substep 2 : Characteristic creation

One Characteristic is created for each variable

In [18]:
vars_kept

['Hte', 'nbk', 'Poids', 'PMG', 'PMG15']

In [19]:
char_dict = {
    'Hte' : 'Humidity level',
    'nbk' : 'Number of grains',
    'Poids' : 'Weight of sample',
    'PMG' : 'Thousand seeds weight',
    'PMG15' : 'Thousand seeds weight 15%'
}

`CharacteristicCreationDTO` is used to make the right format of Json to create a Characteristic

In [20]:
cc_dto = opensilexClientToolsPython.CharacteristicCreationDTO
bodies = [cc_dto(name=char_dict[x]) for x in vars_kept]
pprint(bodies)

[{'broad_match': None,
 'close_match': None,
 'description': None,
 'exact_match': None,
 'name': 'Humidity level',
 'narrow_match': None,
 'uri': None},
 {'broad_match': None,
 'close_match': None,
 'description': None,
 'exact_match': None,
 'name': 'Number of grains',
 'narrow_match': None,
 'uri': None},
 {'broad_match': None,
 'close_match': None,
 'description': None,
 'exact_match': None,
 'name': 'Weight of sample',
 'narrow_match': None,
 'uri': None},
 {'broad_match': None,
 'close_match': None,
 'description': None,
 'exact_match': None,
 'name': 'Thousand seeds weight',
 'narrow_match': None,
 'uri': None},
 {'broad_match': None,
 'close_match': None,
 'description': None,
 'exact_match': None,
 'name': 'Thousand seeds weight 15%',
 'narrow_match': None,
 'uri': None}]


The Characteristics are then created

In [21]:
res_char = [
    var_api_instance.create_characteristic(body=body)
    for body in bodies
]
pprint(res_char)

[{'metadata': {'datafiles': ['test:set/variables#characteristic.humidity-level/7'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
               'status': []},
  'result': ['test:set/variables#characteristic.humidity-level/7']},
 {'metadata': {'datafiles': ['test:set/variables#characteristic.number-of-grains/6'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
               'status': []},
  'result': ['test:set/variables#characteristic.number-of-grains/6']},
 {'metadata': {'datafiles': ['test:set/variables#characteristic.weight-of-sample/6'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                            

The Characteristics' URIs are retrieved from `res_char`

In [22]:
char_uris = [char['result'][0] for char in res_char]
char_uris

['test:set/variables#characteristic.humidity-level/7',
 'test:set/variables#characteristic.number-of-grains/6',
 'test:set/variables#characteristic.weight-of-sample/6',
 'test:set/variables#characteristic.thousand-seeds-weight/6',
 'test:set/variables#characteristic.thousand-seeds-weight-15/6']

### Substep 3 : Unit creation

One Unit is created for each variable

In [23]:
vars_kept

['Hte', 'nbk', 'Poids', 'PMG', 'PMG15']

In [24]:
unit_dict = {
    'Hte' : 'Humidity %',
    'nbk' : 'Number of grains',
    'Poids' : 'grams',
    'PMG' : 'grams per thousand seeds',
    'PMG15' : 'grams per thousand seeds'
}

`UnitCreationDTO` is used to make the right format of Json to create a Unit

In [25]:
uc_dto = opensilexClientToolsPython.UnitCreationDTO
bodies = [uc_dto(name=unit_dict[x]) for x in vars_kept]
pprint(bodies)

[{'alternative_symbol': None,
 'broad_match': None,
 'close_match': None,
 'description': None,
 'exact_match': None,
 'name': 'Humidity %',
 'narrow_match': None,
 'symbol': None,
 'uri': None},
 {'alternative_symbol': None,
 'broad_match': None,
 'close_match': None,
 'description': None,
 'exact_match': None,
 'name': 'Number of grains',
 'narrow_match': None,
 'symbol': None,
 'uri': None},
 {'alternative_symbol': None,
 'broad_match': None,
 'close_match': None,
 'description': None,
 'exact_match': None,
 'name': 'grams',
 'narrow_match': None,
 'symbol': None,
 'uri': None},
 {'alternative_symbol': None,
 'broad_match': None,
 'close_match': None,
 'description': None,
 'exact_match': None,
 'name': 'grams per thousand seeds',
 'narrow_match': None,
 'symbol': None,
 'uri': None},
 {'alternative_symbol': None,
 'broad_match': None,
 'close_match': None,
 'description': None,
 'exact_match': None,
 'name': 'grams per thousand seeds',
 'narrow_match': None,
 'symbol': None,
 'uri'

The Characteristics are then created

In [26]:
res_unit = [
    var_api_instance.create_unit(body=body)
    for body in bodies
]
pprint(res_unit)

[{'metadata': {'datafiles': ['test:set/variables#variable.unit.humidity-/7'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
               'status': []},
  'result': ['test:set/variables#variable.unit.humidity-/7']},
 {'metadata': {'datafiles': ['test:set/variables#variable.unit.number-of-grains/6'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
               'status': []},
  'result': ['test:set/variables#variable.unit.number-of-grains/6']},
 {'metadata': {'datafiles': ['test:set/variables#variable.unit.grams/6'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
      

The Units' URIs are retrieved from `res_unit`

In [27]:
unit_uris = [unit['result'][0] for unit in res_unit]
unit_uris

['test:set/variables#variable.unit.humidity-/7',
 'test:set/variables#variable.unit.number-of-grains/6',
 'test:set/variables#variable.unit.grams/6',
 'test:set/variables#variable.unit.grams-per-thousand-seeds/12',
 'test:set/variables#variable.unit.grams-per-thousand-seeds/13']

__Note__ : a duplicate unit was created by mistake

### Substep 4 : Variables creation

Fetching the datatypes to get the right datatype's URI

In [28]:
datatypes = var_api_instance.get_datatypes()
datatypes

{'result': [{'name': 'datatypes.boolean', 'uri': 'http://www.w3.org/2001/XMLSchema#boolean'},
  {'name': 'datatypes.date', 'uri': 'http://www.w3.org/2001/XMLSchema#date'},
  {'name': 'datatypes.decimal', 'uri': 'http://www.w3.org/2001/XMLSchema#decimal'},
  {'name': 'datatypes.number', 'uri': 'http://www.w3.org/2001/XMLSchema#integer'},
  {'name': 'datatypes.string', 'uri': 'http://www.w3.org/2001/XMLSchema#string'}],
 'metadata': {'pagination': {'pageSize': 5,
   'currentPage': 0,
   'totalCount': 5,
   'totalPages': 1},
  'status': [],
  'datafiles': []}}

Keep only decimal and number datatype's URI

In [29]:
dec = datatypes['result'][2].uri
num = datatypes['result'][3].uri
datatypes_uris = [dec, num, dec, dec, dec]
datatypes_uris

['http://www.w3.org/2001/XMLSchema#decimal',
 'http://www.w3.org/2001/XMLSchema#integer',
 'http://www.w3.org/2001/XMLSchema#decimal',
 'http://www.w3.org/2001/XMLSchema#decimal',
 'http://www.w3.org/2001/XMLSchema#decimal']

`VariableCreationDTO` is used to make the right format of Json to create a Variable

In [30]:
vc_dto = opensilexClientToolsPython.VariableCreationDTO
bodies = [
    vc_dto(name=vars_kept[x],
           entity=entity_uri,
           characteristic=char_uris[x], unit=unit_uris[x],
           datatype=datatypes_uris[x])
    for x in range(0, len(vars_kept))
]
pprint(bodies)

[{'alternative_name': None,
 'broad_match': None,
 'characteristic': 'test:set/variables#characteristic.humidity-level/7',
 'close_match': None,
 'datatype': 'http://www.w3.org/2001/XMLSchema#decimal',
 'description': None,
 'entity': 'test:set/variables#variable.entity.grain_sample/6',
 'exact_match': None,
 'method': None,
 'name': 'Hte',
 'narrow_match': None,
 'sampling_interval': None,
 'time_interval': None,
 'trait': None,
 'trait_name': None,
 'unit': 'test:set/variables#variable.unit.humidity-/7',
 'uri': None},
 {'alternative_name': None,
 'broad_match': None,
 'characteristic': 'test:set/variables#characteristic.number-of-grains/6',
 'close_match': None,
 'datatype': 'http://www.w3.org/2001/XMLSchema#integer',
 'description': None,
 'entity': 'test:set/variables#variable.entity.grain_sample/6',
 'exact_match': None,
 'method': None,
 'name': 'nbk',
 'narrow_match': None,
 'sampling_interval': None,
 'time_interval': None,
 'trait': None,
 'trait_name': None,
 'unit': 'test:s

The variables are then created

In [31]:
res_var = [
    var_api_instance.create_variable(body=body)
    for body in bodies
]
pprint(res_var)

[{'metadata': {'datafiles': ['test:set/variables#variable.hte/8'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
               'status': []},
  'result': ['test:set/variables#variable.hte/8']},
 {'metadata': {'datafiles': ['test:set/variables#variable.nbk/8'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
               'status': []},
  'result': ['test:set/variables#variable.nbk/8']},
 {'metadata': {'datafiles': ['test:set/variables#variable.poids/8'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
               'status': []},
  'result': ['test:set/variables#variab

The Variable's URI is retrieved from `res_var`

In [32]:
var_uris = [var['result'][0] for var in res_var]
var_uris

['test:set/variables#variable.hte/8',
 'test:set/variables#variable.nbk/8',
 'test:set/variables#variable.poids/8',
 'test:set/variables#variable.pmg/7',
 'test:set/variables#variable.pmg15/7']

---
#    

## 4) Creating the Scientific Objects
---

Fetching the datatypes to get the right datatype's URI

In [33]:
obj_types = obj_api_instance.get_used_types()
obj_types

{'result': [{'name': 'leaf', 'uri': 'http://www.opensilex.org/vocabulary/oeso#Leaf'},
  {'name': 'seed', 'uri': 'http://www.opensilex.org/vocabulary/oeso#Seed'},
  {'name': 'scion', 'uri': 'http://www.opensilex.org/vocabulary/oeso#Scion'},
  {'name': 'plant', 'uri': 'http://www.opensilex.org/vocabulary/oeso#Plant'},
  {'name': 'plot', 'uri': 'http://www.opensilex.org/vocabulary/oeso#Plot'},
  {'name': 'silk', 'uri': 'http://www.opensilex.org/vocabulary/oeso#Silk'},
  {'name': 'rootstock',
   'uri': 'http://www.opensilex.org/vocabulary/oeso#Rootstock'},
  {'name': 'Sub plot', 'uri': 'http://www.opensilex.org/vocabulary/oeso#SubPlot'},
  {'name': 'Pot', 'uri': 'http://www.opensilex.org/vocabulary/oeso#Pot'},
  {'name': 'Rosette', 'uri': 'http://www.opensilex.org/vocabulary/oeso#Rosette'},
  {'name': 'test_renaud_bug_update_type',
   'uri': 'dev:test_renaud_bug_update_type'},
  {'name': 'os-type-test',
   'uri': 'http://www.opensilex.org/vocabulary/oeso#os-type-test'},
  {'name': 'petit-o

Keep only plot Object type URI

In [34]:
objtype_uri = [x.uri for x in obj_types['result'] if x.name == 'plot'][0]
objtype_uri

'http://www.opensilex.org/vocabulary/oeso#Plot'

`ScientificObjectCreationDTO` is used to make the right format of Json to create Scientific Objects

In [35]:
so_dto = opensilexClientToolsPython.ScientificObjectCreationDTO
bodies = [
    so_dto(name=str(x),
           rdf_type=objtype_uri) for x in df.Plot[:n]
]
pprint(bodies)

[{'experiment': None,
 'geometry': None,
 'name': '10',
 'rdf_type': 'http://www.opensilex.org/vocabulary/oeso#Plot',
 'relations': None,
 'uri': None},
 {'experiment': None,
 'geometry': None,
 'name': '26',
 'rdf_type': 'http://www.opensilex.org/vocabulary/oeso#Plot',
 'relations': None,
 'uri': None},
 {'experiment': None,
 'geometry': None,
 'name': '42',
 'rdf_type': 'http://www.opensilex.org/vocabulary/oeso#Plot',
 'relations': None,
 'uri': None},
 {'experiment': None,
 'geometry': None,
 'name': '58',
 'rdf_type': 'http://www.opensilex.org/vocabulary/oeso#Plot',
 'relations': None,
 'uri': None},
 {'experiment': None,
 'geometry': None,
 'name': '74',
 'rdf_type': 'http://www.opensilex.org/vocabulary/oeso#Plot',
 'relations': None,
 'uri': None}]


The Scientific Objects are then created

In [36]:
cso = obj_api_instance.create_scientific_object
res_so = [cso(body=body) for body in bodies]
pprint(res_so)

[{'metadata': {'datafiles': ['http://www.phenome-fppn.fr/set/scientific-objects/so-10-9'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
               'status': []},
  'result': ['http://www.phenome-fppn.fr/set/scientific-objects/so-10-9']},
 {'metadata': {'datafiles': ['http://www.phenome-fppn.fr/set/scientific-objects/so-26-9'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,
                              'totalPages': 0},
               'status': []},
  'result': ['http://www.phenome-fppn.fr/set/scientific-objects/so-26-9']},
 {'metadata': {'datafiles': ['http://www.phenome-fppn.fr/set/scientific-objects/so-42-9'],
               'pagination': {'currentPage': 0,
                              'pageSize': 0,
                              'totalCount': 0,

The Scientific Objects' URIs are retrieved from `res_so`

In [37]:
so_uris = [res['result'][0] for res in res_so]
so_uris

['http://www.phenome-fppn.fr/set/scientific-objects/so-10-9',
 'http://www.phenome-fppn.fr/set/scientific-objects/so-26-9',
 'http://www.phenome-fppn.fr/set/scientific-objects/so-42-9',
 'http://www.phenome-fppn.fr/set/scientific-objects/so-58-9',
 'http://www.phenome-fppn.fr/set/scientific-objects/so-74-9']

#     

## 5) Data upload
---

The data for the first three variables can now be uploaded and linked to the new Provenances and Variables

In [38]:
dpm = opensilexClientToolsPython.DataProvenanceModel
provs = [dpm(uri=uri) for uri in prov_uris[:3]]
provs

[{'experiments': None,
  'prov_used': None,
  'settings': None,
  'uri': 'http://www.phenome-fppn.fr/id/provenance/grain_sample_hte_prov/6'},
 {'experiments': None,
  'prov_used': None,
  'settings': None,
  'uri': 'http://www.phenome-fppn.fr/id/provenance/grain_sample_nbk_prov/6'},
 {'experiments': None,
  'prov_used': None,
  'settings': None,
  'uri': 'http://www.phenome-fppn.fr/id/provenance/grain_sample_poids_prov/6'}]

`DataCreationDTO` is used multiple times to make a list of Json to upload multiple values at once

__NUMPY INTEGERS AREN'T SUPPORTED__

In [39]:
dc_dto = opensilexClientToolsPython.DataCreationDTO
bodies = [
    [
        dc_dto(_date="2021-07-30",
           variable=var_uris[y],
           value=int(df[vars_kept[y]][x]),
           scientific_object=so_uris[x],
           provenance=provs[y])
        if y == 1
        else
        dc_dto(_date="2021-07-30",
           variable=var_uris[y],
           value=float(df[vars_kept[y]][x]),
           scientific_object=so_uris[x],
           provenance=provs[y])
        for x in range(0, n)
    ]
    for y in range(0, 3)
]
pprint(bodies)

[[{'_date': '2021-07-30',
 'confidence': None,
 'metadata': None,
 'provenance': {'experiments': None,
                'prov_used': None,
                'settings': None,
                'uri': 'http://www.phenome-fppn.fr/id/provenance/grain_sample_hte_prov/6'},
 'raw_data': None,
 'scientific_object': 'http://www.phenome-fppn.fr/set/scientific-objects/so-10-9',
 'timezone': None,
 'uri': None,
 'value': 21.0,
 'variable': 'test:set/variables#variable.hte/8'},
  {'_date': '2021-07-30',
 'confidence': None,
 'metadata': None,
 'provenance': {'experiments': None,
                'prov_used': None,
                'settings': None,
                'uri': 'http://www.phenome-fppn.fr/id/provenance/grain_sample_hte_prov/6'},
 'raw_data': None,
 'scientific_object': 'http://www.phenome-fppn.fr/set/scientific-objects/so-26-9',
 'timezone': None,
 'uri': None,
 'value': 17.8,
 'variable': 'test:set/variables#variable.hte/8'},
  {'_date': '2021-07-30',
 'confidence': None,
 'metadata': None,
 '

Length should be 3 Variables * 5 entries

In [40]:
print(len(bodies), len(bodies[0]))

3 5


In [41]:
res_data_three = [
    data_api_instance.add_list_data(body=bodies[x])
    for x in range(0, 3)
]

In [42]:
res_data_three

[{'result': ['http://www.phenome-fppn.fr/id/data/1627603200/44576454fb85e94c15daca10d5e38ef9',
   'http://www.phenome-fppn.fr/id/data/1627603200/9b4bb86883a3797ebcf53956794e453e',
   'http://www.phenome-fppn.fr/id/data/1627603200/49d0efc416022d07bb623ffba23c9df7',
   'http://www.phenome-fppn.fr/id/data/1627603200/158e1611709bc7bd9e6ce6256d431f33',
   'http://www.phenome-fppn.fr/id/data/1627603200/3c4666aeecb9dae663c85780aa020755'],
  'metadata': {'pagination': {'pageSize': 0,
    'currentPage': 0,
    'totalCount': 0,
    'totalPages': 0},
   'status': [],
   'datafiles': ['http://www.phenome-fppn.fr/id/data/1627603200/44576454fb85e94c15daca10d5e38ef9',
    'http://www.phenome-fppn.fr/id/data/1627603200/9b4bb86883a3797ebcf53956794e453e',
    'http://www.phenome-fppn.fr/id/data/1627603200/49d0efc416022d07bb623ffba23c9df7',
    'http://www.phenome-fppn.fr/id/data/1627603200/158e1611709bc7bd9e6ce6256d431f33',
    'http://www.phenome-fppn.fr/id/data/1627603200/3c4666aeecb9dae663c85780aa020

The data for the last two variables can now be uploaded and linked to the new Provenances, Variables and the adequate data used to produce the values

In [43]:
res_data_three[0]["result"][0]

'http://www.phenome-fppn.fr/id/data/1627603200/44576454fb85e94c15daca10d5e38ef9'

In [44]:
dc_dto = opensilexClientToolsPython.DataCreationDTO
pem = opensilexClientToolsPython.ProvEntityModel
pems = [
    [
        pem(res_data_three[y]["result"][x])
        for x in range(0, n)
    ]
    for y in range(0, 3)
]
bodies = [
    [
        dc_dto(_date="2021-07-30",
           variable=var_uris[y],
           value=float(df[vars_kept[y]][x]),
           scientific_object=so_uris[x],
           provenance=dpm(uri=prov_uris[y], prov_used=pems[1] + pems[2]))
        if y == 3
        else
        dc_dto(_date="2021-07-30",
           variable=var_uris[y],
           value=float(df[vars_kept[y]][x]),
           scientific_object=so_uris[x],
           provenance=dpm(uri=prov_uris[y], prov_used=pems[0] + pems[1] + pems[2]))
        for x in range(0, n)
    ]
    for y in range(3, len(vars_kept))
]
pprint(bodies)

[[{'_date': '2021-07-30',
 'confidence': None,
 'metadata': None,
 'provenance': {'experiments': None,
                'prov_used': [{'rdf_type': None,
                               'uri': 'http://www.phenome-fppn.fr/id/data/1627603200/a900b32667d0b33db7cdc2a483b713e3'},
                              {'rdf_type': None,
                               'uri': 'http://www.phenome-fppn.fr/id/data/1627603200/1ef56e15e9986c042d6aa934df2b085c'},
                              {'rdf_type': None,
                               'uri': 'http://www.phenome-fppn.fr/id/data/1627603200/6cecaccd2ed46e5281271011d1453dfb'},
                              {'rdf_type': None,
                               'uri': 'http://www.phenome-fppn.fr/id/data/1627603200/2bc353513a3ce343420bb5a16df1c51c'},
                              {'rdf_type': None,
                               'uri': 'http://www.phenome-fppn.fr/id/data/1627603200/304479c74ba3ea7ea4e05ed15f065c7c'},
                              {'rdf_type': None

Length should be 2 Variables * 5

In [45]:
print(len(bodies), len(bodies[0]))

2 5


In [46]:
res_data_two = [
    data_api_instance.add_list_data(body=bodies[x])
    for x in range(0, 2)
]

In [47]:
res_data_two

[{'result': ['http://www.phenome-fppn.fr/id/data/1627603200/34fb0bacfc180f18f3cc0427c60ed461',
   'http://www.phenome-fppn.fr/id/data/1627603200/e38d425eb1163e7e64b4dc61ecfc25e7',
   'http://www.phenome-fppn.fr/id/data/1627603200/a57c13d026ccae5ab910a8df41a53a68',
   'http://www.phenome-fppn.fr/id/data/1627603200/a137f02e85cb899c410ca379be8785d4',
   'http://www.phenome-fppn.fr/id/data/1627603200/34533dd88f5b08e95b0dc9f87ce29bf4'],
  'metadata': {'pagination': {'pageSize': 0,
    'currentPage': 0,
    'totalCount': 0,
    'totalPages': 0},
   'status': [],
   'datafiles': ['http://www.phenome-fppn.fr/id/data/1627603200/34fb0bacfc180f18f3cc0427c60ed461',
    'http://www.phenome-fppn.fr/id/data/1627603200/e38d425eb1163e7e64b4dc61ecfc25e7',
    'http://www.phenome-fppn.fr/id/data/1627603200/a57c13d026ccae5ab910a8df41a53a68',
    'http://www.phenome-fppn.fr/id/data/1627603200/a137f02e85cb899c410ca379be8785d4',
    'http://www.phenome-fppn.fr/id/data/1627603200/34533dd88f5b08e95b0dc9f87ce29

Checking the result on one entry

In [48]:
data_api_instance.get_data(res_data_two[1]["result"][0])

{'result': {'_date': '2021-07-30',
  'confidence': None,
  'metadata': None,
  'provenance': {'experiments': None,
                 'prov_used': [{'rdf_type': None,
                                'uri': 'test:id/data/1627603200/44576454fb85e94c15daca10d5e38ef9'},
                               {'rdf_type': None,
                                'uri': 'test:id/data/1627603200/9b4bb86883a3797ebcf53956794e453e'},
                               {'rdf_type': None,
                                'uri': 'test:id/data/1627603200/49d0efc416022d07bb623ffba23c9df7'},
                               {'rdf_type': None,
                                'uri': 'test:id/data/1627603200/158e1611709bc7bd9e6ce6256d431f33'},
                               {'rdf_type': None,
                                'uri': 'test:id/data/1627603200/3c4666aeecb9dae663c85780aa020755'},
                               {'rdf_type': None,
                                'uri': 'test:id/data/1627603200/a900b32667d0b33db7cdc

---
#    