## Imports  
Setting imports for the project

In [1]:
from rdflib import Graph, URIRef, Literal, RDFS, SKOS, FOAF, DC
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network
from rdflib.namespace import OWL, RDF, RDFS
import networkx as nx
import matplotlib.pyplot as plt
import os
from os import path
import lzma
import shutil
from rdflib import Graph, URIRef
import os
import lzma

## General function for dealing with the file system  
Here functions for dealing with the files.

In [2]:
def print_all_files(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            print(file)

## Useful functions for loading and converting files  
Below I define handy functions for modyfing, anlyzing and loading the data.


In [15]:
def convert_owl_to_rdf(input_owl_file, output_rdf_file, output_format='turtle'):
    g = Graph()
    g.parse(input_owl_file, format='xml') 
    g.serialize(destination=output_rdf_file, format=output_format)
    print(f"Converted {input_owl_file} to {output_rdf_file} in {output_format} format.")

def load_rdf(file_path, file_format='turtle', verbose = False):
    if not os.path.isfile(file_path):
        print(f"Error: The file '{file_path}' does not exist or is not a file.")
        return None
    g = Graph()
    try:
        g.parse(file_path, format=file_format)
        if verbose:
            print(f"Successfully loaded {len(g)} triples from '{file_path}' in '{file_format}' format.")
        return g
    except Exception as e:
        if verbose:
            print(f"Failed to load RDF file '{file_path}': {e}")
        return None

def save_rdf(graph, file_path, file_format='turtle'):
    graph.serialize(destination=file_path, format=file_format)
    print(f"Graph serialized to {file_path} in {file_format} format.")

def list_namespaces(graph):
    print("Namespaces in the graph:")
    for prefix, namespace in graph.namespaces():
        print(f"{prefix}: {namespace}")

def count_triples(graph):
    return len(graph)

def list_classes(graph):
    classes = set()
    for s in graph.subjects(predicate=RDF.type, object=OWL.Class):
        classes.add(s)
    print(f"Classes ({len(classes)}):")
    for cls in classes:
        print(cls)


def add_triple(graph, subject, predicate, obj):
    s = URIRef(subject)
    p = URIRef(predicate)
    if isinstance(obj, str) and obj.startswith("http"):
        o = URIRef(obj)
    else:
        o = Literal(obj)
    graph.add((s, p, o))
    print(f"Added triple: ({s}, {p}, {o})")

def remove_triple(graph, subject, predicate, obj):

    s = URIRef(subject)
    p = URIRef(predicate)
    if isinstance(obj, str) and obj.startswith("http"):
        o = URIRef(obj)
    else:
        o = Literal(obj)
    graph.remove((s, p, o))
    print(f"Removed triple: ({s}, {p}, {o})")


In [4]:
def list_namespaces(graph):
    print("Namespaces in the graph:")
    for prefix, namespace in graph.namespaces():
        print(f"{prefix}: {namespace}")

def count_triples_plain(graph):
    return len(graph)

def list_classes(graph):
    classes = set()
    for s in graph.subjects(predicate=RDF.type, object=OWL.Class):
        classes.add(s)
    print(f"Classes ({len(classes)}):")
    for cls in classes:
        print(cls)

def visualize_graph_static(graph, max_triples=100):
    G = nx.DiGraph()
    count = 0
    for s, p, o in graph:
        G.add_edge(str(s), str(o), label=str(p))
        count += 1
        if count >= max_triples:
            break

    plt.figure(figsize=(24, 24))
    pos = nx.spring_layout(G, k=0.15, iterations=20)
    nx.draw_networkx_nodes(G, pos, node_size=500, node_color='skyblue')
    nx.draw_networkx_edges(G, pos, arrowstyle='->', arrowsize=20, edge_color='gray')
    nx.draw_networkx_labels(G, pos, font_size=8)
    plt.title("RDF Graph Visualization (Subset)")
    plt.axis('off')
    plt.show()
    
def count_triples(ttl_file_path, file_format='turtle'):
    g = Graph()
    g.parse(ttl_file_path, format=file_format)
    subject_counts = {}
    predicate_counts = {}
    object_counts = {}
    for s, p, o in g:
        subject_counts[s] = subject_counts.get(s, 0) + 1
        predicate_counts[p] = predicate_counts.get(p, 0) + 1
        object_counts[o] = object_counts.get(o, 0) + 1
    return subject_counts, predicate_counts, object_counts


## Advanced analitical functions for visualizing Knowledge Graphs

In [5]:
def triplet_insights_with_schema(data_ttl_path, schema_owl_path, data_format='turtle', schema_format='xml'):
    g = Graph()
    if schema_owl_path.endswith('.xz'):
        with lzma.open(schema_owl_path, 'rt', encoding='utf-8') as f:
            g.parse(f, format=schema_format)
    else:
        g.parse(schema_owl_path, format=schema_format)
    if data_ttl_path.endswith('.xz'):
        with lzma.open(data_ttl_path, 'rt', encoding='utf-8') as f:
            g.parse(f, format=data_format)
    else:
        g.parse(data_ttl_path, format=data_format)
    subject_counts = {}
    predicate_counts = {}
    object_counts = {}
    for s, p, o in g:
        subject_counts[s] = subject_counts.get(s, 0) + 1
        predicate_counts[p] = predicate_counts.get(p, 0) + 1
        object_counts[o] = object_counts.get(o, 0) + 1
    sorted_predicates = sorted(predicate_counts.items(), key=lambda x: x[1], reverse=True)
    sorted_subjects = sorted(subject_counts.items(), key=lambda x: x[1], reverse=True)
    sorted_objects = sorted(object_counts.items(), key=lambda x: x[1], reverse=True)
    plt.figure(figsize=(10, 6))
    top_predicates = sorted_predicates[:10]
    predicates, counts = zip(*top_predicates)
    predicates = [str(p) for p in predicates]
    plt.bar(predicates, counts)
    plt.xticks(rotation=45, ha='right')
    plt.xlabel('Predicates')
    plt.ylabel('Counts')
    plt.title('Top 10 Predicates')
    plt.tight_layout()
    plt.show()
    plt.figure(figsize=(10, 6))
    top_subjects = sorted_subjects[:10]
    subjects, counts = zip(*top_subjects)
    subjects = [str(s).split('/')[-1] for s in subjects]
    plt.bar(subjects, counts)
    plt.xticks(rotation=45, ha='right')
    plt.xlabel('Subjects')
    plt.ylabel('Counts')
    plt.title('Top 10 Subjects')
    plt.tight_layout()
    plt.show()
    plt.figure(figsize=(10, 6))
    top_objects = sorted_objects[:10]
    objects, counts = zip(*top_objects)
    objects = [str(o).split('/')[-1] for o in objects]
    plt.bar(objects, counts)
    plt.xticks(rotation=45, ha='right')
    plt.xlabel('Objects')
    plt.ylabel('Counts')
    plt.title('Top 10 Objects')
    plt.tight_layout()
    plt.show()
    return subject_counts, predicate_counts, object_counts


In [13]:
def list_classes_from_ttl(ttl_file_path):
    if not os.path.isfile(ttl_file_path):
        raise FileNotFoundError(f"The file '{ttl_file_path}' does not exist.")
    g = Graph()

    try:
        g.parse(ttl_file_path, format='turtle')
        print(f"Successfully loaded {len(g)} triples from '{ttl_file_path}'.")
    except Exception as e:
        raise ValueError(f"Failed to parse the Turtle file: {e}")
        
    namespaces = {
        'rdfs': RDFS,
        'owl': OWL,
        'rdf': RDF
    }

    class_uris = set()
    for s, p, o in g.triples((None, RDF.type, None)):
        if o in [RDFS.Class, OWL.Class]:
            class_uris.add(s)

    class_uris = sorted(class_uris)
    class_labels = {}

    for class_uri in class_uris:
        label = None
        for predicate in [RDFS.label, FOAF.name, DC.title]:
            label = g.value(subject=class_uri, predicate=predicate)
            if label:
                break

        if label:
            class_labels[str(class_uri)] = str(label)
        else:
            class_labels[str(class_uri)] = str(class_uri)
    print(f"Found {len(class_uris)} classes in the dataset.")
    return class_uris, class_labels

## Function for splitting the file by proteome and saving into files

In [6]:
def split_rdf_by_proteome(data_ttl_path, schema_owl_path, output_dir, data_format='turtle', schema_format='xml'):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    schema_graph = Graph()
    if schema_owl_path.endswith('.xz'):
        with lzma.open(schema_owl_path, 'rt', encoding='utf-8') as f:
            schema_graph.parse(f, format=schema_format)
    else:
        schema_graph.parse(schema_owl_path, format=schema_format)

    data_graph = Graph()
    if data_ttl_path.endswith('.xz'):
        with lzma.open(data_ttl_path, 'rt', encoding='utf-8') as f:
            data_graph.parse(f, format=data_format)
    else:
        data_graph.parse(data_ttl_path, format=data_format)

    proteome_query = """
    PREFIX core: <http://purl.uniprot.org/core/>

    SELECT DISTINCT ?proteome
    WHERE {
        ?proteome a core:Proteome .
    }
    """
    results = data_graph.query(proteome_query)

    for row in results:
        proteome = row.proteome

        proteome_graph = Graph()
        proteome_graph += schema_graph

        for s, p, o in data_graph.triples((proteome, None, None)):
            proteome_graph.add((s, p, o))

        for s, p, o in data_graph.triples((None, None, proteome)):
            proteome_graph.add((s, p, o))

        proteome_id = str(proteome).split('/')[-1]
        file_name = f"proteome_{proteome_id}.ttl"
        file_path = os.path.join(output_dir, file_name)

        proteome_graph.serialize(destination=file_path, format='turtle')
        print(f"Saved file into {file_path}")


def split_rdf_by_proteome_id(data_ttl_path, schema_owl_path, output_dir, data_format='turtle', schema_format='xml'):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    g = Graph()
    if schema_owl_path.endswith('.xz'):
        with lzma.open(schema_owl_path, 'rt', encoding='utf-8') as f:
            g.parse(f, format=schema_format)
    else:
        g.parse(schema_owl_path, format=schema_format)
    if data_ttl_path.endswith('.xz'):
        with lzma.open(data_ttl_path, 'rt', encoding='utf-8') as f:
            g.parse(f, format=data_format)
    else:
        g.parse(data_ttl_path, format=data_format)
    proteomes = set()
    for s, p, o in g:
        if isinstance(s, URIRef):
            parts = str(s).split('/uniprot/')
            if len(parts) > 1:
                proteome_id = parts[1].split('#')[0]
                proteomes.add(proteome_id)
    proteome_graphs = {pid: Graph() for pid in proteomes}
    for pid in proteomes:
        proteome_graphs[pid].parse(schema_owl_path, format=schema_format)
    for s, p, o in g:
        if isinstance(s, URIRef):
            parts = str(s).split('/uniprot/')
            if len(parts) > 1:
                proteome_id = parts[1].split('#')[0]
                if proteome_id in proteome_graphs:
                    proteome_graphs[proteome_id].add((s, p, o))
    for pid, graph in proteome_graphs.items():
        file_name = f"proteome_{pid}.ttl"
        file_path = os.path.join(output_dir, file_name)
        graph.serialize(destination=file_path, format=data_format)



# Function for checking the overlap of proteoms for two organisism

In [7]:
def count_overlapping_triples_with_schema(dir1, dir2, schema_owl_path, data_format='turtle', schema_format='xml'):
    def load_graph_with_schema(proteome_file):
        g = Graph()
        if schema_owl_path.endswith('.xz'):
            with lzma.open(schema_owl_path, 'rt', encoding='utf-8') as f:
                g.parse(f, format=schema_format)
        else:
            g.parse(schema_owl_path, format=schema_format)
        if proteome_file.endswith('.ttl.xz'):
            with lzma.open(proteome_file, 'rt', encoding='utf-8') as f:
                g.parse(f, format=data_format)
        else:
            g.parse(proteome_file, format=data_format)
        return g

    def extract_proteome_id(filename):
        basename = os.path.basename(filename)
        if basename.startswith('proteome_') and basename.endswith('.ttl'):
            return basename[len('proteome_'):-len('.ttl')]
        elif basename.startswith('proteome_') and basename.endswith('.ttl.xz'):
            return basename[len('proteome_'):-len('.ttl.xz')]
        return None

    files1 = {extract_proteome_id(f): os.path.join(dir1, f) for f in os.listdir(dir1)}
    files2 = {extract_proteome_id(f): os.path.join(dir2, f) for f in os.listdir(dir2)}
    overlapping_ids = set(files1.keys()).intersection(set(files2.keys()))
    overlap_info = {}

    for pid in overlapping_ids:
        file1 = files1[pid]
        file2 = files2[pid]
        graph1 = load_graph_with_schema(file1)
        graph2 = load_graph_with_schema(file2)
        overlapping_triples = set(graph1).intersection(set(graph2))
        overlap_info[pid] = len(overlapping_triples)

    return overlap_info


## Inspecting the data, let's load and convert some organism files into .ttl format.

In [8]:
DATA_PATH = os.path.dirname(os.getcwd()) + "\\Data"

In [9]:
print_all_files(DATA_PATH)

uniprotkb_reviewed_archea_asgard_group_1935183_0.rdf
uniprotkb_reviewed_archea_candidatus_thermoplasmatota_2283796_0.rdf
uniprotkb_reviewed_archea_dpann_group_1783276_0.rdf
uniprotkb_reviewed_bacteria_bacteria_incertae_sedis_2323_0.rdf
uniprotkb_reviewed_bacteria_environmental_samples_48479_0.rdf
uniprotkb_reviewed_bacteria_fcb_group_1783270_0.rdf
uniprotkb_reviewed_bacteria_fusobacteriota_32066_0.rdf
uniprotkb_reviewed_bacteria_myxococcota_2818505_0.rdf
uniprotkb_reviewed_bacteria_nitrospinota_and_tectimicrobiota_group_1802340_0.rdf
uniprotkb_reviewed_bacteria_nitrospirota_40117_0.rdf
uniprotkb_reviewed_bacteria_proteobacteria_acidithiobacillia_1807140_0.rdf
uniprotkb_reviewed_bacteria_proteobacteria_alphaproteobacteria_28211_0.rdf
uniprotkb_reviewed_bacteria_proteobacteria_betaproteobacteria_28216_0.rdf
uniprotkb_reviewed_bacteria_proteobacteria_gammaproteobacteria_1236_0.rdf
uniprotkb_reviewed_bacteria_proteobacteria_hydrogenophilia_2008785_0.rdf
uniprotkb_reviewed_bacteria_proteoba

In [16]:
DATA_SAVE_CONVERTED = DATA_PATH + "\\Data_converted\\"
for _, _, files in os.walk(DATA_PATH + "\\Data\\"):
        for file in files:
            if file.endswith(".owl") or file.endswith(".xml"):
                  continue
            print(file)
            print(DATA_SAVE_CONVERTED + str(file).split(".")[0] + ".ttl")
            convert_owl_to_rdf(DATA_PATH + "\\" + file, DATA_SAVE_CONVERTED + str(file).split(".")[0] + ".ttl")
            print(f"Saved and converted file {file}")

d:\Pulpit\UniProtKnowledgeGraph\Data\uniprotkb_reviewed_archea_asgard_group_1935183_0.rdf does not look like a valid URI, trying to serialize this will break.


uniprotkb_reviewed_archea_asgard_group_1935183_0.rdf
d:\Pulpit\UniProtKnowledgeGraph\Data\Data_converted\uniprotkb_reviewed_archea_asgard_group_1935183_0.ttl


URLError: <urlopen error unknown url type: d>

In [11]:
DATA_PATH_CONVERTED = DATA_PATH + "\\Data_converted\\"

In [None]:
#Let's inspect the number of triples for every converted file
for _, _, files in os.walk(DATA_PATH_CONVERTED):
    for file in files:
        loaded_graph = load_rdf(DATA_SAVE_CONVERTED + file)
        print(f"Found {count_triples_plain(loaded_graph)} triples for {file} graph")

NameError: name 'DATA_SAVE_CONVERTED' is not defined

In [9]:
DATA_SPLIT_PATH = DATA_PATH + "\\Data_proteome_split\\"
DATA_SCHEMA_PATH = DATA_PATH + "\\Data_schema\\core.owl.xml"

## Split data by proteomes
In the cell below we perform splitting the data for a given organisms by proteomes existing in it.  
I chose only some files that contain sufficient ammount of triples and interesting ammount (diverse) ammount of proteoms.  
The data is saved into *Data_proteome_split* directory.

In [None]:
file_name_split = "uniprotkb_reviewed_bacteria_fcb_group_1783270_0.ttl"
OUTPUT_SAVE_PATH = DATA_SPLIT_PATH + "bacteria_fcb_group_1783270_0\\"

split_rdf_by_proteome_id(DATA_PATH_CONVERTED + file_name_split, DATA_SCHEMA_PATH, OUTPUT_SAVE_PATH)

d:\Pulpit\UniProtKnowledgeGraph\Data\Data_converted\uniprotkb_reviewed_bacteria_myxococcota_2818505_0 does not look like a valid URI, trying to serialize this will break.


URLError: <urlopen error unknown url type: d>

In [10]:
# Another split performed for different bacteria
file_name_split = "uniprotkb_reviewed_bacteria_myxococcota_2818505_0.ttl"
OUTPUT_SAVE_PATH = DATA_SPLIT_PATH + "bacteria_myxococcota_2818505_0\\"

split_rdf_by_proteome_id(DATA_PATH_CONVERTED + file_name_split, DATA_SCHEMA_PATH, OUTPUT_SAVE_PATH)

NameError: name 'DATA_PATH_CONVERTED' is not defined

In [27]:
#overlap check
file_bacteria_I = DATA_SPLIT_PATH + "bacteria_myxococcota_2818505_0\\"
file_bacteria_II = DATA_SPLIT_PATH + "bacteria_fcb_group_1783270_0\\"
count_overlapping_triples_with_schema(file_bacteria_I, file_bacteria_II, DATA_SCHEMA_PATH)

{'': 2153}

## Ammount of triplts for each proteon
We will ennumerate through each proteome and calculate the number of triplets available in each file.

In [12]:
total_number_triples = 0
for _, _, files in os.walk(OUTPUT_SAVE_PATH):
    for file in files:
        graph = load_rdf(OUTPUT_SAVE_PATH + file)
        triplet_count = count_triples_plain(graph)
        print(f"Number of triplets for proteome {file} is {triplet_count} \n")
        total_number_triples += triplet_count
print(f"Total number of triples it {total_number_triples}")

Number of triplets for proteome proteome_.ttl is 735716 

Number of triplets for proteome proteome_A0A084JZA8.ttl is 2963 

Number of triplets for proteome proteome_A0A084JZF2.ttl is 3104 

Number of triplets for proteome proteome_A0A086F3E3.ttl is 3012 

Number of triplets for proteome proteome_A0A0B5RNJ4.ttl is 3140 

Number of triplets for proteome proteome_A0A0B5RUB0.ttl is 2964 

Number of triplets for proteome proteome_A0A0P0FGV9.ttl is 3041 

Number of triplets for proteome proteome_A0A150XSC5.ttl is 2928 

Number of triplets for proteome proteome_A0A150XSR0.ttl is 2934 

Number of triplets for proteome proteome_A0A1G6LGU2.ttl is 2950 

Number of triplets for proteome proteome_A0A1H1XG33.ttl is 2951 

Number of triplets for proteome proteome_A0A1H7VGH3.ttl is 2939 

Number of triplets for proteome proteome_A0A1M7D0R2.ttl is 2961 

Number of triplets for proteome proteome_A0A1S1YUU1.ttl is 2972 

Number of triplets for proteome proteome_A0A2T5Y4G4.ttl is 3239 

Number of triplets

In [None]:
OUTPUT_SAVE_PATH = DATA_SPLIT_PATH + "bacteria_myxococcota_2818505_0\\"
total_number_triples = 0
for _, _, files in os.walk(OUTPUT_SAVE_PATH):
    for file in files:
        graph = load_rdf(OUTPUT_SAVE_PATH + file, verbose=True)
        triplet_count = count_triples_plain(graph)
        total_number_triples += triplet_count
print(f"Total number of triples is {total_number_triples}")

Successfully loaded 175145 triples from 'd:\Pulpit\UniProtKnowledgeGraph\Data\Data_proteome_split\bacteria_myxococcota_2818505_0\proteome_.ttl' in 'turtle' format.
Successfully loaded 3078 triples from 'd:\Pulpit\UniProtKnowledgeGraph\Data\Data_proteome_split\bacteria_myxococcota_2818505_0\proteome_A0A2T4VDM4.ttl' in 'turtle' format.
Successfully loaded 2914 triples from 'd:\Pulpit\UniProtKnowledgeGraph\Data\Data_proteome_split\bacteria_myxococcota_2818505_0\proteome_A0A2T4VDP8.ttl' in 'turtle' format.
Successfully loaded 2922 triples from 'd:\Pulpit\UniProtKnowledgeGraph\Data\Data_proteome_split\bacteria_myxococcota_2818505_0\proteome_A7H677.ttl' in 'turtle' format.
Successfully loaded 2944 triples from 'd:\Pulpit\UniProtKnowledgeGraph\Data\Data_proteome_split\bacteria_myxococcota_2818505_0\proteome_A7H688.ttl' in 'turtle' format.
Successfully loaded 2975 triples from 'd:\Pulpit\UniProtKnowledgeGraph\Data\Data_proteome_split\bacteria_myxococcota_2818505_0\proteome_A7H6E5.ttl' in 'turt

## Converting auxiliary files into .ttl format

In [16]:
DATA_AUXILIARY_PATH = DATA_PATH + "\\Data_auxiliary\\"
DATA_AUXILIARY_CONVERTED_PATH = DATA_PATH + "\\Data_auxiliary_converted\\"

In [None]:
total_number_triples_aux = 0
for _, _, files in os.walk(DATA_AUXILIARY_PATH):
    for file in files:
        if file.endswith(".owl") or file.endswith(".xml"):
            continue
        convert_owl_to_rdf(DATA_AUXILIARY_PATH + file, DATA_AUXILIARY_CONVERTED_PATH + str(file).split(".")[0] + ".ttl")
        #graph = load_rdf(DATA_AUXILIARY_PATH + file, verbose = True)
        #triplet_count = count_triples_plain(graph)
        #total_number_triples_aux += triplet_count
print(f"Total number of triples is {total_number_triples}")

Converted d:\Pulpit\UniProtKnowledgeGraph\Data\Data_auxiliary\databases.rdf to d:\Pulpit\UniProtKnowledgeGraph\Data\Data_auxiliary_converted\databases.ttl in turtle format.
Converted d:\Pulpit\UniProtKnowledgeGraph\Data\Data_auxiliary\diseases.rdf to d:\Pulpit\UniProtKnowledgeGraph\Data\Data_auxiliary_converted\diseases.ttl in turtle format.
Converted d:\Pulpit\UniProtKnowledgeGraph\Data\Data_auxiliary\enzyme-hierarchy.rdf to d:\Pulpit\UniProtKnowledgeGraph\Data\Data_auxiliary_converted\enzyme-hierarchy.ttl in turtle format.
Converted d:\Pulpit\UniProtKnowledgeGraph\Data\Data_auxiliary\enzyme.rdf to d:\Pulpit\UniProtKnowledgeGraph\Data\Data_auxiliary_converted\enzyme.ttl in turtle format.
Converted d:\Pulpit\UniProtKnowledgeGraph\Data\Data_auxiliary\journals.rdf to d:\Pulpit\UniProtKnowledgeGraph\Data\Data_auxiliary_converted\journals.ttl in turtle format.
Converted d:\Pulpit\UniProtKnowledgeGraph\Data\Data_auxiliary\keywords-hierarchy.rdf to d:\Pulpit\UniProtKnowledgeGraph\Data\Data_a

Failed to convert Literal lexical form to value. Datatype=http://www.w3.org/2001/XMLSchema#gYear, Converter=<function parse_xsd_gyear at 0x000002E11CD8E8C0>
Traceback (most recent call last):
  File "c:\Users\aaf6\AppData\Local\Programs\Python\Python310\lib\site-packages\rdflib\term.py", line 2163, in _castLexicalToPython
    return conv_func(lexical)  # type: ignore[arg-type]
  File "c:\Users\aaf6\AppData\Local\Programs\Python\Python310\lib\site-packages\rdflib\xsd_datetime.py", line 618, in parse_xsd_gyear
    raise ValueError("gYear string must be at least 4 numerals in length")
ValueError: gYear string must be at least 4 numerals in length
Failed to convert Literal lexical form to value. Datatype=http://www.w3.org/2001/XMLSchema#gYear, Converter=<function parse_xsd_gyear at 0x000002E11CD8E8C0>
Traceback (most recent call last):
  File "c:\Users\aaf6\AppData\Local\Programs\Python\Python310\lib\site-packages\rdflib\term.py", line 2163, in _castLexicalToPython
    return conv_func(lex