In [None]:
# This script downloads datasets from claircitydata.cbs.nl and uploads it to the Zenodo ClairCity community.
# It supposes that the ClairCity community has been created already.
# For simplicity, datasets are hardcoded for now.
import requests
import json
import ckanapi
import datetime
import os

In [None]:
# Globals:
today = datetime.date.today().strftime("%Y%m%d")
claircitydataAPIkey = '73b4f80f-6ddf-4ae6-977b-afabd1a0a986'
ZenodoToken = 'GRSdtkiD3kGINZOgkVsOM5SZaS9B9vsUP1Z1Ln3l64ZqZH1s4TY2T3IyX6AW'
headers = {"Content-Type": "application/json"}
datasets = [
    {
        'claircitydataname': 'd5-5-assessment-of-impacts-first-city',
        'zenodoname': 'd5-5-assessment-of-impacts-bristol'
    }
#     {
#         'claircitydataname': 'd5-5d-assessment-of-impacts-cira',
#         'zenodoname': 'd5-5d-assessment-of-impacts-cira'
#     }   
]
version = '0.1.6'

In [None]:
# Get datasets from claircitydata.cbs.nl :
metadatafile = today + '/metadata.jsonl' 
if not os.path.exists(today):
    os.makedirs(today)

for dataset in datasets:
    name = dataset['claircitydataname']
    folder = today + '/' + name
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    metadatafile = folder + '/metadata.jsonl'
    !ckanapi dump datasets {name} -O {metadatafile} -r https://claircitydata.cbs.nl   
    #!ckanapi dump datasets {name} -O {metadatafile} -a {claircitydataAPIkey} -r https://claircitydata.cbs.nl

    with open(metadatafile, 'r', encoding='utf8') as json_file:
        
        jsonl = json.load(json_file)
        
        datafolder = folder + '/data'
        if not os.path.exists(datafolder):
            os.makedirs(datafolder)

        for resource in jsonl['resources']:

            filename = datafolder + '/' + resource['url'].split("/")[-1]
            # make unique if exists, which happens with files from Techne:
            if (os.path.exists(filename)):
                filename = datafolder + '/1_' + resource['url'].split("/")[-1]
                print(resource['name']+' duplicate renamed into: ' + filename)
            
            url = resource['url']
            r = requests.get(url, allow_redirects=True)            
            open(filename, 'wb').write(r.content)
            print(str(r.status_code) + ': ' + filename)
    
    json_file.close()


In [12]:
# Create Zenodo datasets:
for dataset in datasets:
    zenodoname = dataset['zenodoname']
    url = "https://zenodo.org/api/deposit/depositions"
    params = {'access_token': ZenodoToken}
    
    # Get metadata from claircitydata jsonl file:
    basepath = './'+today+'/'+dataset['claircitydataname']
    filename = basepath +'/metadata.jsonl'
    with open(filename, 'r', encoding='utf8') as json_file:
        metadataOld = json.load(json_file)
    json_file.close()
    
    # Define metadata for Zenodo:
    fileDescriptions = '<ul>'
    for resource in metadataOld["resources"]:
        filename = resource["url"].split("/")[-1]
        descr = resource['description'] if "description" in resource else ''
        fileDescriptions += '<li><strong>'+ resource['name'] + '</strong><br/>' + 'file: ' + filename + '<br/>' + descr +'</li>'
    fileDescriptions += '</ul>'
    
    keywords = []
    for tag in metadataOld["tags"]:
        keywords.append(tag['display_name'])
        
    metadataNew = {
        "metadata": {
            "upload_type": "dataset",
            "title": zenodoname,
            "description": metadataOld["notes"] + fileDescriptions,
            "license": metadataOld["license_id"],
            "keywords": keywords,
            "communities": [{'identifier':'claircity'}],
            "grants": [{'id':'689289'}],
            "version": version,
            "language": 'eng',
            "creators": [
                {'name': 'Lopes, Myriam', 'affiliation': 'UAVR'},
                {'name': 'Borrego, Carlos', 'affiliation': 'UAVR'},
                {'name': 'Rodrigues, Vera', 'affiliation': 'UAVR'},
                {'name': 'Coelho, Sílvia', 'affiliation': 'UAVR'},
                {'name': 'Faria, Carlos', 'affiliation': 'UAVR'},
                {'name': 'Rafael, Sandra', 'affiliation': 'UAVR'},
                {'name': 'Ferreira, Joana', 'affiliation': 'UAVR'},
                {'name': 'Fernandes, Ana Patrícia', 'affiliation': 'UAVR'},
                {'name': 'Vanherle, Kris', 'affiliation': 'TML'},
                {'name': 'Kewo, Angreine', 'affiliation': 'DTU'},
                {'name': 'Sieverts, Per', 'affiliation': 'DTU'},
                {'name': 'Knudsen, Svein', 'affiliation': 'NILU'},
                {'name': 'Soares, Joana', 'affiliation': 'NILU'},
                {'name': 'Trozzi, Carlo', 'affiliation': 'Techne'},
                {'name': 'Diafas, Iason', 'affiliation': 'PBL'}
            ],
            "contributors": [
                {'name': 'Lopes, Myriam', 'affiliation': 'UAVR', 'type': 'ContactPerson'},
                {'name': 'Vanherle, Kris', 'affiliation': 'TML', 'type': 'ContactPerson'},
                {'name': 'Rodrigues, Vera', 'affiliation': 'UAVR', 'type': 'ContactPerson'},
                {'name': 'ten Bosch, Olav', 'affiliation': 'CBS', 'type': 'DataManager'}
            ]
        }
    }
    
    #Do an update or a create:
    r = requests.get(url, params={'q': zenodoname, 'access_token': ZenodoToken})
    if (r.json()):
        print('Update: ' + zenodoname)
        deposition_id = str(r.json()[0]['id'])
        urlId = url + '/' + deposition_id
        r = requests.put(urlId, params=params, data=json.dumps(metadataNew), headers=headers)
    else:
        print('Create: ' + zenodoname)
        r = requests.post(url, params=params, data=json.dumps(metadataNew), headers=headers)
        deposition_id = str(r.json()['id'])

    # Add the files:
    urlAddFile = url + '/' + deposition_id + '/files'
    for resource in metadataOld["resources"]:

        filename = resource["url"].split("/")[-1]
        resourcename = resource["name"]
        data = {'name': filename} # we use filename because Zenodo uses it for naming downloads
        files = {'file': open(basepath+'/data/'+filename, 'rb')}

        r = requests.post(urlAddFile, params=params, data=data, files=files)
        print(str(r.status_code) + ': ' + resourcename + ' ' + filename)


Create: d5-5-assessment-of-impacts-bristol
201: D5.5 Assessment of Impacts - First City d5.5-assessment-of-impacts-first-city.pdf
201: specifications of modelling tool set module_specifications_report.pdf
201: Bristol shapefile bristol_200mx200m.rar
201: 1_BRS_Integrated_household_BAU_Scenarios bristol_household_projections.xlsx
201: 1_BRS_Integrated_BAU_Scenarios_2025 bristol_hh_energy_use_2025.xlsx
201: 1_BRS_Integrated_BAU_Scenarios_2035 bristol_hh_energy_use_2035.xlsx
201: 1_BRS_Integrated_BAU_Scenarios_2050 bristol_hh_energy_use_2050.xlsx
201: 2_BRS_Natural_baseline claircity_naturalemissions_brs_jan2019.pdf
201: 2_BRS_Agriculture_baseline claircity_agricultureemissions_brs_jan2019.pdf
201: 2_BRS_IRCI_baseline fr1a-wp5-irc-first-city-bristol.pdf
201: 2_BRS_IRCI_baseline_not_industry_area_fuel_cons 2_brs_irci_baseline_not_industry_area_fuel_cons.csv
201: 2_BRS_IRCI_baseline_not_industry_area_emi 2_brs_irci_baseline_not_industry_area_emi.csv
201: 2_BRS_IRCI_baseline_industry_area_em

201: 6_BRS_Carbon Footprint_UPS_2025 6_brs_carbon-footprint_ups_2025.csv
201: 6_BRS_Carbon Footprint_UPS_2035 6_brs_carbon-footprint_ups_2035.csv
201: 6_BRS_Carbon Footprint_UPS_2050 6_brs_carbon-footprint_ups_2050.csv
201: 6_BRS_Carbon Footprint_BAU_Scenarios_WITH_UPS 190204_cf_bristol_summary_withfupsv3.xlsx
