In [None]:
#@title Imports and code functions
import requests
import json
from urllib.parse import urlparse, urljoin
import os
from getpass import getpass



ckan_url='http://ckan.kupferdigital.org'
csvtocsvw_url='https://csvtocsvw.matolab.org'
maptomethod_url='https://maptomethod.matolab.org'
rdfconverter_url='https://rdfconverter.matolab.org'

ORG = 'fem'

#USER=os.getenv('USER','')
API_KEY=getpass('Enter ckan api-key here')

s_ckan= requests.Session()
s_ckan.headers.update({"X-CKAN-API-Key": API_KEY})

#print(USER,API_KEY)

def post_request(url, headers, data, files=None):
    try:
        if files:
            # should crate a multipart form upload
            response = requests.post(url, data=data, headers=headers, files=files)
        else:
            # a application json post request
            response = requests.post(url, data=json.dumps(data), headers=headers)
        response.raise_for_status()
        
    except requests.exceptions.RequestException as e:
        #placeholder for save file / clean-up
        raise SystemExit(e) from None
    return response

def expand_url(url):
    p_url = urlparse(url)
    if not p_url.scheme in ['https', 'http']:
        #relative url?
        p_url=urljoin(ckan_url, p_url.path)
        return p_url
    else:
        return p_url.path.geturl()

def annotate_csv(csv_url: str, separator: str='auto', header_separator: str='auto', encoding: str='auto',):
    ## curl -X 'POST' \ 'https://csvtocsvw.matolab.org/api/annotation' \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "data_url": "https://github.com/Mat-O-Lab/CSVToCSVW/raw/main/examples/example.csv", "separator": "auto", "header_separator": "auto", "encoding": "auto" }'
    url = csvtocsvw_url+"/api/annotation"
    data = { 
        "data_url": data_url, 
        "separator": separator, 
        "header_separator": header_separator, 
        "encoding": encoding
    }
    headers = {'Content-type': 'application/json', 'Accept': 'application/json'}
    r = post_request(url, headers, data).json()
    metafilename=filename.rsplit('.')[0]+'-metadata.json'
    file=json.dumps(r['filedata']).encode('utf-8')
    print('csvw annotation file created, suggested name: {}'.format(metafilename))
    return metafilename, file
    
def transform_csv(metadata_url: str):
    ## curl -X 'POST' \ 'https://csvtocsvw.matolab.org/api/annotation' \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "data_url": "https://github.com/Mat-O-Lab/CSVToCSVW/raw/main/examples/example.csv", "separator": "auto", "header_separator": "auto", "encoding": "auto" }'
    url = csvtocsvw_url+"/api/rdf"
    data = { 
        "metadata_url": metadata_url,
    }
    headers = {'Content-type': 'application/json', 'Accept': 'application/json'}
    #returns file object
    r = post_request(url, headers, data)
    rdffilename=filename.rsplit('.')[0]+'.ttl'
    print('csv rdf file created, suggested name: {}'.format(rdffilename))
    return rdffilename, r.content
    
def create_rdf(mapping_url: str, metadata_url: str=''):
    ## curl -X 'POST' \ 'https://csvtocsvw.matolab.org/api/annotation' \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "data_url": "https://github.com/Mat-O-Lab/CSVToCSVW/raw/main/examples/example.csv", "separator": "auto", "header_separator": "auto", "encoding": "auto" }'
    url = rdfconverter_url+"/api/createrdf"
    if metadata_url:
        print('using {} instead or data: in mapping'.format(metadata_url))
    data = { 
        "mapping_url": mapping_url,
        "data_url": metadata_url
    }
    headers = {'Content-type': 'application/json', 'Accept': 'application/json'}
    r = post_request(url, headers, data).json()
    num_applied=r["num_mappings_applied"]
    num_skipped=r["num_mappings_skipped"]
    rdfdata=r['graph']
    print('rdf file created, applied {} rules, skipped {}'.format(num_applied,num_skipped))
    return rdfdata, num_applied, num_skipped

def ckan_api_get(url,params={}):
    p_url=expand_url(url)
    try:
        response = s_ckan.get(p_url,params=params)
        response.raise_for_status()
        r = response.json()
    except requests.exceptions.RequestException as e:
        #placeholder for save file / clean-up
        raise SystemExit(e) from None
    return r

from io import BytesIO

def ckan_file_upload(dataset_id, filename, filedata, format='', group=None):
    url=ckan_url+'/api/action/resource_create'
    data_stream=BytesIO(filedata)
    # or resource_update
    data={
        "package_id": dataset_id,
        "name": filename,
        "format": format
    }
    files=[('upload', data_stream)]
    resource=s_ckan.post(url, data=data, files=files).json()['result']
    print('file {} uploaded at: {}'.format(filename, resource['url']))
    return resource

def ckan_link_ressource(dataset_id, ressource_name, ressource_url, format='', group=None):
    url=ckan_url+'/api/action/resource_create'
    # or resource_update
    data={
        "package_id": dataset_id,
        "name": ressource_name,
        "url": ressource_url,
        "format": format
    }
    resource=s_ckan.post(url, data=data).json()['result']
    print('ressource {} linked at: {}'.format(ressource_name, resource['url']))
    return resource

# def get_api_tokens():
#     response=requests.post(ckan_url+'/api/action/api_token_list',data={'user': USER},headers={"X-CKAN-API-Key": API_KEY})
#     #return first token
#     return response.json()['result']

def get_groups():
    r_groups=ckan_api_get('/api/action/group_list',params={'all_fields': True})['result']
    return {item['name']: item  for item in r_groups}

#requests.post(ckan_url+'/api/action/package_create',data={'name': 'test2','private': True,'owner_org': ORG} ,headers={'X-CKAN-API-Key': API_KEY})
def ckan_get_create_dataset(name: str, groups: list=[], owner_org: str='IWM'):
    r=s_ckan.post(ckan_url+'/api/action/package_search',data={'q': 'name:{}'.format(name)})
    results_list=r.json()['result']['results']
    #print(results_list)
    if results_list:
        #return first hit
        print('dataset exist, returning first found')
        return results_list[0]
    else:
        #create package
        url=ckan_url+'/api/action/package_create'
        # or resource_update
        groups_dict=[GROUPS[item] for item in groups]
        data={
            "name": name,
            "owner_org": owner_org,
            "groups": groups_dict,
            "private": False
            }
        #print(url, data)
        r=s_ckan.post(url, json=data)
        dataset=r.json()['result']
        print("dataset created")
        return dataset

def remove_dataset(id):
    r=s_ckan.post(ckan_url+'/api/action/package_delete',data={"id": id})
    if r.json()['success']:
        print('dataset deleted')
    return True
GROUPS=get_groups()
if GROUPS:
  print('connection to ckan established')
  print('available dataset groups are: {}'.format(GROUPS.keys()))


In [28]:
# required inputs
data_url='https://gitlab.com/kupferdigital/process-graphs/energy-dispersive-x-ray-analysis-fem/-/raw/main/KupferDigital_7F21109_ID3243_Pos._a_EDX_data.txt'
method_url='https://kupferdigital.gitlab.io/process-graphs/energy-dispersive-x-ray-analysis-fem/index.ttl'
mapping_url='https://gitlab.com/kupferdigital/process-graphs/energy-dispersive-x-ray-analysis-fem/-/raw/main/KupferDigital_7F21109_ID3243_Pos._a_EDX_data-map.yaml'

#ckan dataset names
method_name='energy-dispersive-x-ray-analysis-fem'
datasetname=method_name+"-example"
mapping_name=method_name+"-mapping"
rdf_name=method_name+"-dataset"


In [None]:
#get or create dataset with name
dataset=ckan_get_create_dataset(datasetname,groups=['machine-files'],owner_org=ORG)
#upload csv file
csv_file=requests.get(data_url).content
filename=data_url.rsplit('/')[-1]
csv_resource=ckan_file_upload(dataset['id'], filename, csv_file, format='csv')

In [None]:
# generated csvw meta data
data_url=csv_resource['url']
filename, filedata=annotate_csv(csv_resource['url'])
csvw_resource=ckan_file_upload(dataset['id'], filename, filedata, format='json-ld')
#print(json.dumps(filedata, indent=4))

In [None]:
# generated csv rdf data
filename, file =transform_csv(csvw_resource['url'])
csvrdf_resource=ckan_file_upload(dataset['id'], filename, file, format='turtle')

In [None]:
#link method graph
method=ckan_get_create_dataset(method_name,groups=['process-graphs'],owner_org=ORG)
csvrdf_resource=ckan_link_ressource(method['id'], method_name, method_url, format='turtle')

In [None]:
#link mapping graph
mapping=ckan_get_create_dataset(mapping_name,groups=['process-graph-mappings'],owner_org=ORG)
csvrdf_resource=ckan_link_ressource(mapping['id'], mapping_name, mapping_url, format='yaml')

In [None]:
#create rdf graph
print(mapping_url,csvw_resource['url'])
rdf, num_applied, num_skipped=create_rdf(mapping_url, csvw_resource['url'])
filename=datasetname+'.ttl'
rdf_dataset=ckan_get_create_dataset(rdf_name,groups=['mapping-results'],owner_org=ORG)
rdf_resource=ckan_file_upload(rdf_dataset['id'], filename, rdf.encode('utf-8'), format='turtle')


In [None]:
# delete datasets cleanup
dataset=ckan_get_create_dataset(datasetname,groups=['machine-files'], owner_org=ORG)
rdf_dataset=ckan_get_create_dataset(rdf_name,groups=['mapping-results'],owner_org=ORG)
mapping=ckan_get_create_dataset(mapping_name,groups=['process-graph-mappings'],owner_org=ORG)
method=ckan_get_create_dataset(method_name,groups=['process-graphs'],owner_org=ORG)
remove_dataset(dataset['id'])
remove_dataset(rdf_dataset['id'])
remove_dataset(mapping['id'])
remove_dataset(method['id'])
