# CKAN MANAGER
- A fast forward to build an entire dataset in a single try
- Accepted formats so far:
    - WCS
    - csv


In [21]:
import ckanapi
import re
import os
import requests
from bs4 import BeautifulSoup

In [22]:
# CKAN API base URL and API KEY
CKAN_BASE_URL = "https://data.integratedmodelling.org/"
API_KEY = ""

ckan = ckanapi.RemoteCKAN(CKAN_BASE_URL, apikey=API_KEY)

In [23]:
def get_csv_file_list(path):
    File_list = [] #f for f in os.listdir(path) if os.isfile(mypath,f)
    for file in os.listdir(path):
        if file.endswith(".csv"):
            if file not in File_list:
                File_list.append(os.path.join(path,file))
        else:
            pass
    return File_list

def get_all_datasets(ckan):
    try:
        datasets = ckan.action.package_list()
        all_datasets = []
        for dataset_id in datasets:
            dataset = ckan.action.package_show(id=dataset_id)
            all_datasets.append(dataset)
        return all_datasets
    except Exception as e:
        exceptions(e)
        return None

def get_dataset_by_name(ckan, dataset_name):
    try:
        result = ckan.action.package_search(q='name:"{}"'.format(dataset_name))
        datasets = result['results']
        if result['count'] == 0:
            print("Dataset with name '{}' does not exist.".format(dataset_name))
            return None
        elif result['count'] == 1:
            print("Dataset with name '{}' already exists.".format(dataset_name))
            return datasets[0]
        else:
            return print("Dataset with name '{}' are more than one.".format(dataset_name))
    except Exception as e:
        exceptions(e)
        return None
    
def get_organization_groups(ckan):
    try:
        organization_groups = ckan.action.organization_list(all_fields=True)
        return organization_groups
    except Exception as e:
        exceptions(e)
        return None
    
def get_all_licenses(ckan):
    try:
        licenses = ckan.action.license_list()
        return licenses
    except Exception as e:
        exceptions(e)
        return None

def create_dataset(ckan, dataset_metadata):
    try:
        created_dataset = ckan.action.package_create(**dataset_metadata)
        return created_dataset
    except Exception as e:
        exceptions(e)
        return None

def create_resource(ckan, resource_metadata, file_obj=None):
    try:
        if file_obj:
            created_resource = ckan.action.resource_create(**resource_metadata, upload=file_obj)
        else:
            created_resource = ckan.action.resource_create(**resource_metadata)
        return created_resource
    except Exception as e:
        exceptions(e)
        return None
    
def create_view(ckan, view_params):
    try:
        created_view = ckan.action.resource_view_create(**view_params)
        return created_view
    except Exception as e:
        exceptions(e)
        return None

def delete_dataset(ckan, dataset_id):
    try:
        ckan.action.package_delete(id=dataset_id)
        return print("Dataset deleted successfully.")
    except Exception as e:
        exceptions(e)
        return None

def exceptions(exception):
    if isinstance(exception, ckanapi.NotFound):
        print("Not found:", exception)
    elif isinstance(exception, ckanapi.NotAuthorized):
        print("Not authorized:", exception)
    elif isinstance(exception, ckanapi.CKANAPIError):
        print("CKAN API error:", exception)
    else:
        print("An unexpected error I did not code occurred:", exception)

def get_url_xml(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print("Error: Failed to retrieve GetCapabilities document")
        return None
    
def extract_ows_layers(capabilities_xml, regex_pattern):
    soup = BeautifulSoup(capabilities_xml, 'xml')
    if soup.find('wcs:CoverageId'):
        layers = soup.find_all('wcs:CoverageId')
    elif soup.find('wfs:Name'):
        layers = soup.find_all('wfs:Name')
    else:
        print("wtf")

    layer_names = [layer.text for layer in layers]
    matched_layers = [layer for layer in layer_names if re.match(regex_pattern, layer)]
    return matched_layers

def build_ows_layers(ows_url, layers):
    created_layers = []
    for layer in layers:
        if "__" in layer:
            describecoverage = r"?service=WCS&version=2.0.1&request=DescribeCoverage&CoverageId="
            created_layers.append(ows_url + describecoverage + layer)
        elif ":" in layer:
            describefeaturetype = r"?request=DescribeFeatureType&version=1.0.0&service=WFS&typeName="
            created_layers.append(ows_url + describefeaturetype + layer)
    return created_layers



In [4]:
"""Check the groups/datasets availability"""
# See all datasets
datasets = get_all_datasets(ckan)
for dataset in datasets:
    print("Dataset name:", dataset["name"], "Title: ", dataset["title"])
# Get all organization groups
org_groups = get_organization_groups(ckan)
for group in org_groups:
    print("name: ", group["name"], "owner_org: ", group["id"])


Dataset name: global-vegetation-carbon-stock-2001-2020 Title:  Vegetation Carbon Stock 2001-2020
name:  integrated-modelling-parnership owner_org:  e7a09a40-27a3-42b0-b265-8e034fe1454a


In [None]:
"""Get all licenses"""
licenses = get_all_licenses(ckan)
for license in licenses:
    print("License name:", license["title"], " - ID:", license["id"])

In [None]:
"""Input parameters"""
dataset_name = "global-vegetation-carbon-stock-2001-2020" #No spaces

files_path = r"C:\Users\admin\Downloads\vegetation_carbon_stock_2001-2020\export"
resources = []

# CSV resources
file_list = get_csv_file_list(files_path)
resources = resources + file_list

# OWS resources
ows_url = r'https://integratedmodelling.org/dev-geoserver/ows'
# WCS resources
getcapabilities = r"?service=WCS&request=GetCapabilities"
capabilities_xml = get_url_xml(ows_url + getcapabilities)

regex_pattern = r'im-data-global-ecology__ET_EVI2_MEAN_32612_of_USA_San_Pe.*'

ows_layers = extract_ows_layers(capabilities_xml, regex_pattern)

wcs_layers = build_ows_layers(ows_url, ows_layers)
resources = resources + wcs_layers

# WFS resources
getcapabilities = r"?service=WFS&request=GetCapabilities"
capabilities_xml = get_url_xml(ows_url + getcapabilities)

regex_pattern = r'im-data-global-agriculture:crop_polli.*'

ows_layers = extract_ows_layers(capabilities_xml, regex_pattern)
wfs_layers = build_ows_layers(ows_url, ows_layers)
resources = resources + wfs_layers


In [34]:
# If the dataset does not exist. The next areas have to be filled
title = "this is the title"
author = "thisistheautor"
author_email = "thisistheautor@email"
maintainer = "thisisthemaintainer"
maintainer_email = "thisisthemaintainer@email"
license_id = "cc-zero"
description = "this is the description of the dataset"
owner_org = "e7a09a40-27a3-42b0-b265-8e034fe1454a"

In [24]:
dataset_metadata = {
    "name": dataset_name,               # the name of the new dataset
    "title": title,              # the title of the dataset (optional, default: same as name)
    "private": False,         # If True creates a private dataset
    "author": author,             # the name of the dataset’s author (optional)
    "author_email": author_email,       # the email address of the dataset’s author (optional)
    "maintainer": maintainer,         # the name of the dataset’s maintainer (optional)
    "maintainer_email": maintainer_email,   # the email address of the dataset’s maintainer (optional)
    "license_id": license_id,         # the id of the dataset’s license (optional)
    "notes": description,              # a description of the dataset (optional)
    "url": "",                # a URL for the dataset’s source (optional)
    "version": "",            # (optional)
    "state": "active",        # the current state of the dataset (optional, default: 'active')
    "type": "",               # the type of the dataset (optional)
    "resources": [],          # the dataset’s resources, list of resource dictionaries (optional)
    "tags": [],               # the dataset’s tags, list of tag dictionaries (optional)
    "extras": [],             # the dataset’s extras, list of dataset extra dictionaries (optional)
    "relationships_as_object": [],  # list of relationship dictionaries (optional)
    "relationships_as_subject": [], # list of relationship dictionaries (optional)
    "groups": [],             # the groups to which the dataset belongs (optional)
    "owner_org": owner_org           # the id of the dataset’s owning organization (optional)
}

In [35]:
# dataset = None
dataset = get_dataset_by_name(ckan, dataset_name)
if dataset is None:
    new_dataset = create_dataset(ckan, dataset_metadata)
    dataset = new_dataset


Dataset with name 'global-vegetation-carbon-stock-2001-2020' already exists.


In [37]:
for resource in resources[:]:
    if resource.endswith("csv"):
        resource_format = "CSV"
        csv_file = open(resource, mode='rb')

        resource_metadata = {
        'name': os.path.splitext(os.path.basename(resource))[0],
        'description': '',
        'url': '',
        'format': resource_format,
        'package_id': dataset["id"],
        'state': 'active',
        }
        new_resource = create_resource(ckan, resource_metadata, file_obj=csv_file)
        # No es necesario crear un View por ahora, se genera uno automaticamente.
        # view_params = {
        # 'resource_id': new_resource["id"],
        # 'view_type': "grid_view",
        # 'title': "Grid type"
        # }
        # create_view(ckan, view_params)
        csv_file.close() # Hay que cerrarlo siempre
        
    elif "wcs" in resource:    
        pattern = r'CoverageId=.*?__(.*?)(&|$)'
        match = re.search(pattern, resource)
        coverage_id = match.group(1)

        resource_metadata = {
        'name': coverage_id,
        'description': '',
        'url': resource,
        'datastore_active': False,
        'format': 'WCS',
        'package_id': new_dataset["id"],
        'state': 'active',
        }
        new_resource = create_resource(ckan, resource_metadata, file_obj=None)

        view_metadata = {
        'resource_id': new_resource["id"],
        'view_type': "image_view",
        'title': 'Example View',
        'view_type': 'grid',
        'data': {'columns': ['column1', 'column2']}
        }

        image_url = "apañalo"

        view_params = {
        "title": "Image View",
        "description": "View for displaying an image.",
        "image_url": image_url,
        "width": "100%",  # Set the width of the image to 100% to make it responsive
        }
        create_view(ckan, new_resource["id"], view_params)

    elif "wfs" in resource:
        print("wtf")
    else:
        print("wtf")
    

In [None]:
"""Delete a dataset"""
#Use the ID
delete_dataset(ckan, "9c5227f6-9e20-48ad-8fb4-978c04e98569")

# View types

In CKAN, `view_type` refers to the type of view that you want to create for a resource. These view types determine how the data in the resource will be presented or visualized to users. CKAN provides various view types to accommodate different types of data and visualization needs. Here are some common `view_type` options:

1. **grid_view**: This view type displays tabular data in a grid format, similar to a spreadsheet. It's suitable for displaying datasets with rows and columns of data like CSV.

2. **graph**: This view type allows you to create different types of graphs and charts to visualize data. Graphs could include bar charts, line charts, pie charts, scatter plots, etc.

3. **text_view**: This view type simply displays the raw text content of the resource. It's useful for displaying textual data or files like READMEs, documentation, or plain text files, or CSV.

4. **image_view**: This view type displays images that are embedded within the resource. It's suitable for viewing image files or thumbnails.

5. **map_view**: This view type displays geographical data on an interactive map. It's commonly used for datasets containing geographic coordinates or spatial data.

6. **recline_view**: This view type provides an interactive data exploration interface with features like filtering, sorting, and faceted search. It's useful for exploring and analyzing tabular datasets.

8. **markdown**: This view type allows you to render Markdown content within CKAN. It's useful for displaying formatted text, documentation, or other Markdown-based content.

9. **recline_view**: Provides an interactive grid view with filtering, sorting, and visualization capabilities.

10. **geojson_view**: Displays GeoJSON data on a map.
11. **pdf_view**: Renders a PDF document.

12. **markdown**: Renders Markdown content.

13. **textile**: Renders Textile content.
14. **dcat_rdf_view**: Displays metadata in DCAT RDF format.
15. **json_table_view**: Displays JSON data in a tabular format.

16. **html_view**: Renders HTML content.

17. **javascript_view**: Executes JavaScript code embedded within a view.

18. **pdf_preview**: Provides a preview of a PDF document.

These are just a few examples of `view_type` options available in CKAN. Depending on your CKAN setup and any additional plugins you may have installed, there may be additional view types available. When creating a resource view using `resource_view_create`, you'll need to specify the appropriate `view_type` based on how you want the data to be presented or visualized.
