In [2]:
#!/usr/bin/env python3
"""
CKAN Theme Bulk Loader for Scottish Official Statistics
Bulk loads statistical themes as groups into a CKAN instance
"""

import requests
import json
import sys
from typing import Dict, List, Optional

class CKANThemeLoader:
    def __init__(self, ckan_url: str, api_key: str):
        """
        Initialise the CKAN theme loader
        
        Args:
            ckan_url: Base URL of the CKAN instance (e.g., 'https://data.gov.scot')
            api_key: CKAN API key with appropriate permissions
        """
        self.ckan_url = ckan_url.rstrip('/')
        self.api_key = api_key
        self.headers = {
            'Authorization': api_key,
            'Content-Type': 'application/json'
        }
    
    def create_group(self, name: str, title: str, description: str) -> Dict:
        """
        Create a group (theme) in CKAN
        
        Args:
            name: Machine-readable name (slug)
            title: Human-readable title
            description: Description of the theme
            
        Returns:
            API response dictionary
        """
        url = f"{self.ckan_url}/api/3/action/group_create"
        
        data = {
            'name': name,
            'title': title,
            'description': description,
            'type': 'group',
            'state': 'active'
        }
        
        try:
            response = requests.post(url, headers=self.headers, json=data)
            result = response.json()
            
            if response.status_code == 200 and result.get('success'):
                print(f"✓ Created theme: {title}")
                return result
            else:
                error_msg = result.get('error', {}).get('message', 'Unknown error')
                print(f"✗ Failed to create {title}: {error_msg}")
                return result
                
        except requests.RequestException as e:
            print(f"✗ Network error creating {title}: {str(e)}")
            return {'success': False, 'error': str(e)}
    
    def theme_exists(self, name: str) -> bool:
        """
        Check if a theme (group) already exists
        
        Args:
            name: Machine-readable name to check
            
        Returns:
            True if theme exists, False otherwise
        """
        url = f"{self.ckan_url}/api/3/action/group_show"
        
        try:
            response = requests.post(url, headers=self.headers, json={'id': name})
            return response.status_code == 200 and response.json().get('success')
        except:
            return False
    
    def generate_theme_name(self, title: str) -> str:
        """
        Generate a machine-readable name from the theme title
        
        Args:
            title: Human-readable theme title
            
        Returns:
            Machine-readable name (slug)
        """
        return title.lower().replace(' ', '-').replace(',', '').replace('&', 'and')
    
    def generate_description(self, theme: str) -> str:
        """
        Generate a generic description for each theme with Scottish context
        
        Args:
            theme: Theme title
            
        Returns:
            Generic description for the theme
        """
        descriptions = {
            'Access to Services': 'Official statistics relating to access to public and private services across Scotland, including digital services, transport links, and service availability in rural and urban areas.',
            
            'Business, Enterprise and Energy': 'Statistical data on Scottish business activity, enterprise development, energy production and consumption, including renewable energy statistics and business demographics.',
            
            'Children and Young People': 'Official statistics concerning children and young people in Scotland, covering education outcomes, health, welfare, and social development indicators.',
            
            'Community Wellbeing and Social Environment': 'Statistics measuring community cohesion, social capital, neighbourhood satisfaction, and environmental quality across Scottish communities.',
            
            'Crime and Justice': 'Official crime statistics for Scotland, including recorded crime data, justice system performance, and community safety indicators.',
            
            'Economic Activity, Benefits and Tax Credits': 'Statistical data on economic activity, benefit claimants, tax credit recipients, and welfare system performance across Scotland.',
            
            'Economy': 'Key economic indicators for Scotland, including GDP, productivity, business investment, and economic performance metrics.',
            
            'Education, Skills and Training': 'Official education statistics covering all levels from early years through higher education, plus skills development and training data for Scotland.',
            
            'Environment': 'Environmental statistics for Scotland, including air quality, biodiversity, climate change indicators, and natural resource management data.',
            
            'Farming and Rural': 'Agricultural statistics and rural development indicators for Scotland, covering farming practices, rural economy, and land use patterns.',
            
            'Geography': 'Geographical and spatial data for Scotland, including administrative boundaries, land classification, and geographic reference information.',
            
            'Health and Social Care': 'Official health statistics for Scotland, covering NHS performance, public health indicators, social care provision, and health outcomes data.',
            
            'Housing': 'Housing statistics for Scotland, including supply, demand, affordability, tenure patterns, and housing quality indicators.',
            
            'Labour Force': 'Employment and labour market statistics for Scotland, including unemployment rates, job vacancies, and workforce characteristics.',
            
            'Management Information': 'Operational and performance data from Scottish public sector organisations, used for monitoring and management purposes.',
            
            'Population': 'Official population statistics for Scotland, including demographic data, migration patterns, and population projections.',
            
            'Reference': 'Reference materials, classifications, and supporting documentation for Scottish official statistics and data standards.',
            
            'Scottish Index of Multiple Deprivation': 'Statistics relating to the Scottish Index of Multiple Deprivation (SIMD), measuring deprivation across different domains in Scottish areas.',
            
            'Transport': 'Transport statistics for Scotland, covering all modes of transport, infrastructure, usage patterns, and transport policy outcomes.'
        }
        
        return descriptions.get(theme, f'Official statistics relating to {theme.lower()} in Scotland.')


def create_as_tags(self, themes: List[str]) -> None:
    """
    Alternative method: Create themes as vocabulary tags instead of groups
    Uncomment this section if your CKAN uses controlled vocabularies for themes
    """
    # Create vocabulary first
    vocab_data = {
        'name': 'statistical_themes',
        'tags': [{'name': self.generate_theme_name(theme)} for theme in themes]
    }
    
    url = f"{self.ckan_url}/api/3/action/vocabulary_create"
    try:
        response = requests.post(url, headers=self.headers, json=vocab_data)
        if response.status_code == 200:
            print("✓ Created themes vocabulary")
        else:
            print(f"✗ Failed to create vocabulary: {response.json()}")
    except Exception as e:
        print(f"✗ Error creating vocabulary: {e}")


def main():
    """Main execution function"""
    
    # CONFIGURATION OPTIONS:
    # Set this to determine how themes should be created in your CKAN instance
    THEME_METHOD = 'groups'  # Options: 'groups', 'tags', 'vocabulary'
    
    # Scottish statistical themes
    themes = [
        'Access to Services',
        'Business, Enterprise and Energy',
        'Children and Young People',
        'Community Wellbeing and Social Environment',
        'Crime and Justice',
        'Economic Activity, Benefits and Tax Credits',
        'Economy',
        'Education, Skills and Training',
        'Environment',
        'Farming and Rural',
        'Geography',
        'Health and Social Care',
        'Housing',
        'Labour Force',
        'Management Information',
        'Population',
        'Reference',
        'Scottish Index of Multiple Deprivation',
        'Transport'
    ]
    
    # Configuration - Update these values for your CKAN instance
    CKAN_URL = 'http://35.177.24.156:5000'  # Replace with your CKAN URL
    API_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOiIyMU1BMkRLSURreUI1MEdlbS1NQWV4NXhvai1kS01yWkhOTHExaXMzT0FZIiwiaWF0IjoxNzQ4NTExODE5fQ.SZzZO1U5HJVBtdbEtgl6B6bj9amMKno-Sgwnikmg5Oo'  # Replace with your API key
    
    # Validate configuration
    if CKAN_URL == 'https://your-ckan-instance.gov.scot' or API_KEY == 'your-ckan-api-key-here':
        print("Please update the CKAN_URL and API_KEY variables in the script before running.")
        sys.exit(1)
    
    # Initialise the loader
    loader = CKANThemeLoader(CKAN_URL, API_KEY)
    
    print(f"Loading {len(themes)} Scottish statistical themes into CKAN as {THEME_METHOD}...")
    print(f"CKAN Instance: {CKAN_URL}")
    
    # Handle different theme methods
    if THEME_METHOD == 'tags':
        print("Note: Creating as vocabulary tags - you'll need to manually associate datasets")
        # Uncomment the next line if using tags approach:
        # loader.create_as_tags(themes)
        # return
    elif THEME_METHOD == 'vocabulary':
        print("Note: Check CKAN documentation for vocabulary setup with your datasets")
    
    print("-" * 60)
    
    success_count = 0
    skip_count = 0
    error_count = 0
    
    # Process each theme
    for theme in themes:
        theme_name = loader.generate_theme_name(theme)
        description = loader.generate_description(theme)
        
        # Check if theme already exists
        if loader.theme_exists(theme_name):
            print(f"- Skipped {theme}: Already exists")
            skip_count += 1
            continue
        
        # Create the theme
        result = loader.create_group(theme_name, theme, description)
        
        if result.get('success'):
            success_count += 1
        else:
            error_count += 1
    
    # Summary
    print("-" * 60)
    print(f"Summary:")
    print(f"  Created: {success_count}")
    print(f"  Skipped: {skip_count}")
    print(f"  Errors:  {error_count}")
    print(f"  Total:   {len(themes)}")
    
    if error_count > 0:
        print("\nSome themes failed to load. Check the error messages above.")
        sys.exit(1)
    else:
        print("\nAll themes processed successfully!")


if __name__ == '__main__':
    main()

Loading 19 Scottish statistical themes into CKAN as groups...
CKAN Instance: http://35.177.24.156:5000
------------------------------------------------------------
✓ Created theme: Access to Services
✓ Created theme: Business, Enterprise and Energy
✓ Created theme: Children and Young People
✓ Created theme: Community Wellbeing and Social Environment
✓ Created theme: Crime and Justice
✓ Created theme: Economic Activity, Benefits and Tax Credits
✓ Created theme: Economy
- Skipped Education, Skills and Training: Already exists
- Skipped Environment: Already exists
✓ Created theme: Farming and Rural
✓ Created theme: Geography
✓ Created theme: Health and Social Care
✓ Created theme: Housing
✓ Created theme: Labour Force
✓ Created theme: Management Information
✓ Created theme: Population
✓ Created theme: Reference
✓ Created theme: Scottish Index of Multiple Deprivation
✓ Created theme: Transport
------------------------------------------------------------
Summary:
  Created: 17
  Skipped: 2
