<a href="https://colab.research.google.com/github/PhiloBiblon/philobiblon-to-wikibase/blob/master/pb2wb/colab/make_schema_csv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
def extract_entity_edits(data):
    def extract_value(value):
        return {
            "columnName": value.get("columnName"),
            "id": value.get("id"),
            "amount": {"columnName": value.get("amount", {}).get("columnName")},
            "unit": {
                "id": value.get("unit", {}).get("id"),
                "label": value.get("unit", {}).get("label")
            }
        }

    def extract_qualifiers(qualifiers):
        return [
            {
                "prop": {
                    "pid": qualifier["prop"]["pid"],
                    "label": qualifier["prop"]["label"]
                },
                "value": extract_value(qualifier["value"])
            }
            for qualifier in qualifiers
        ]

    def extract_statements(statements):
        return [
            {
                "value": extract_value(statement["value"]),
                "qualifiers": extract_qualifiers(statement.get("qualifiers", []))
            }
            for statement in statements
        ]

    def extract_statement_groups(statement_groups):
        return [
            {
                "property": {
                    "pid": group["property"]["pid"],
                    "label": group["property"]["label"]
                },
                "statements": extract_statements(group["statements"])
            }
            for group in statement_groups
        ]

    return {
        "entityEdits": [
            {
                "statementGroups": extract_statement_groups(edit["statementGroups"])
            }
            for edit in data["entityEdits"]
        ]
    }


In [28]:
import json
import csv
import sys

def json_to_csv(json_data, csv_filename):
    # Open CSV file for writing
    with open(csv_filename, mode='w', newline='', encoding='utf-8') as csv_file:
        fieldnames = [
            'property_pid', 'property_label',
            'statement_value_columnName', 'statement_value_id',
            'qualifier_prop_pid', 'qualifier_prop_label',
            'qualifier_value_columnName', 'qualifier_value_amount_columnName',
            'qualifier_value_unit_id', 'qualifier_value_unit_label'
        ]

        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()

        # Iterate over the entityEdits and write rows to the CSV
        for edit in json_data["entityEdits"]:
            for group in edit["statementGroups"]:
                property_pid = group["property"]["pid"]
                property_label = group["property"]["label"]

                for statement in group["statements"]:
                    statement_value_columnName = statement["value"].get("columnName")
                    statement_value_id = statement["value"].get("id")

                    # Write rows for each qualifier
                    for qualifier in statement["qualifiers"]:
                        qualifier_prop_pid = qualifier["prop"].get("pid")
                        qualifier_prop_label = qualifier["prop"].get("label")
                        qualifier_value_columnName = qualifier["value"].get("columnName")
                        qualifier_value_amount_columnName = qualifier["value"].get("amount", {}).get("columnName")
                        qualifier_value_unit_id = qualifier["value"].get("unit", {}).get("id")
                        qualifier_value_unit_label = qualifier["value"].get("unit", {}).get("label")

                        writer.writerow({
                            'property_pid': property_pid,
                            'property_label': property_label,
                            'statement_value_columnName': statement_value_columnName,
                            'statement_value_id': statement_value_id,
                            'qualifier_prop_pid': qualifier_prop_pid,
                            'qualifier_prop_label': qualifier_prop_label,
                            'qualifier_value_columnName': qualifier_value_columnName,
                            'qualifier_value_amount_columnName': qualifier_value_amount_columnName,
                            'qualifier_value_unit_id': qualifier_value_unit_id,
                            'qualifier_value_unit_label': qualifier_value_unit_label
                        })



In [3]:
import json
import csv
from google.colab import files
uploaded = files.upload()


Saving schema.json to schema (1).json


In [14]:
data = next(iter(uploaded.values()))
import json
input_json = json.loads(data.decode())

In [15]:
print(f'{len(input_json)}')

4


In [16]:
extracted_schema = extract_entity_edits(input_json)


In [29]:
print(f'{len(extracted_schema)}')

1


In [18]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [30]:
outfile_name = 'extracted_schema.csv'
outfile_name = f'/content/drive/My Drive/{outfile_name}'
json_to_csv(extracted_schema, outfile_name)