In [2]:
import os
import csv
import json
from datetime import datetime

PENDING_DIR = "Pending/files"
METADATA_DIR = "Pending/metadata"

def generate_metadata_template(filename):
    """
    Generates a JSON metadata template for a given CSV file.
    The template is saved in Pending/metadata/ and should be manually populated before CKAN upload.
    """
    template = {
        "dataset": {
            "name": "",  # Required: Unique dataset name (e.g., "my_dataset")
            "title": "",  # Required: User-friendly title
            "notes": "",  # Optional: Description of the dataset
            "owner_org": "",  # Optional: Organization name
            "author": "",  # Optional: Author of the dataset
            "author_email": "",  # Optional: Contact email
            "maintainer": "",  # Optional: Dataset maintainer
            "maintainer_email": "",  # Optional: Maintainer email
            "license_id": "",  # Optional: License ID (e.g., "odc-by")
            "tags": [],  # Optional: List of tag dictionaries
            "extras": [
                {"key": "Quality Management", "value": ""},
                {"key": "Accessibility and Clarity", "value": ""},
                {"key": "Accuracy and Reliability", "value": ""},
                {"key": "Coherence and Comparability", "value": ""},
                {"key": "Relevance", "value": ""},
                {"key": "Timeliness and Punctuality", "value": ""},
                {"key": "Confidentiality Policy", "value": ""},
                {"key": "Revisions", "value": ""}
            ]
        },
        "resource": {
            "package_id": "",  # Required if adding to existing dataset
            "name": filename,  # Required: Resource name (usually filename)
            "description": "",  # Optional: Resource description
            "format": "csv",  # Required: File format
            "mimetype": "text/csv",  # Optional: MIME type
            "url_type": "upload",  # Required: Specifies upload type
            "resource_type": "file.upload",  # Required: Type of resource
            "schema": {
                "fields": []  # Automatically populated with column names and descriptions
            }
        }
    }

    # Generate field descriptions based on CSV headers
    dataset_path = os.path.join(PENDING_DIR, filename)
    if os.path.exists(dataset_path) and filename.endswith('.csv'):
        with open(dataset_path, 'r', encoding='utf-8') as f:
            csv_reader = csv.reader(f)
            headers = next(csv_reader)  # Read the first row (headers)
            template["resource"]["schema"]["fields"] = [
                {
                    "id": col.strip(),
                    "type": "text",
                    "description": f"[Enter description for {col.strip()}]"  # Placeholder for user to fill
                }
                for col in headers
            ]

    # Ensure metadata directory exists
    os.makedirs(METADATA_DIR, exist_ok=True)

    # Save metadata template
    template_filename = f"metadata_{filename.split('.')[0]}_{datetime.now().strftime('%Y%m%d')}.json"
    metadata_path = os.path.join(METADATA_DIR, template_filename)
    
    with open(metadata_path, 'w', encoding='utf-8') as json_file:
        json.dump(template, json_file, indent=4)

    print(f"✅ Metadata template generated: {metadata_path}")
    return template_filename

if __name__ == "__main__":
    # Ensure directories exist
    os.makedirs(PENDING_DIR, exist_ok=True)
    os.makedirs(METADATA_DIR, exist_ok=True)

    # Find all CSV files in Pending/files/
    pending_files = [f for f in os.listdir(PENDING_DIR) if f.endswith(".csv")]

    if not pending_files:
        print("⚠️ No CSV files found in Pending/files/")
    else:
        for file in pending_files:
            generate_metadata_template(file)

✅ Metadata template generated: Pending/metadata\metadata_Carbon Footprint Breakdown_20250509.json
✅ Metadata template generated: Pending/metadata\metadata_Data Zone Lookup - Archived Geographies_20250509.json
✅ Metadata template generated: Pending/metadata\metadata_Deaths Involving Coronavirus (Covid-19)_20250509.json
✅ Metadata template generated: Pending/metadata\metadata_Gender Pay Gap_20250509.json
✅ Metadata template generated: Pending/metadata\metadata_Gross Domestic Product - Quarterly Output by Industry_20250509.json
✅ Metadata template generated: Pending/metadata\metadata_House Prices - Residential Properties, Sales and Price_20250509.json
✅ Metadata template generated: Pending/metadata\metadata_Life Expectancy_20250509.json
✅ Metadata template generated: Pending/metadata\metadata_Non-Domestic Energy Performance Certificates_20250509.json
✅ Metadata template generated: Pending/metadata\metadata_Police Officer Quarterly Strength_20250509.json
✅ Metadata template generated: Pend