In [None]:
import logging
import requests
from bs4 import BeautifulSoup
import re
import json
from concurrent.futures import ThreadPoolExecutor
from rdflib import Graph, URIRef, OWL, RDF, RDFS, Literal, Namespace
from urllib.parse import unquote, quote

In [None]:
# Пути к файлам
main_data_path = 'filtered_data.json'
friends_data_path = 'lovecraft_friends.json'  # Файл с друзьями Лавкрафта
output_path = 'combined_data.json'

# Загрузка основной базы данных
with open(main_data_path, 'r', encoding='utf-8') as main_file:
    main_data = json.load(main_file)

# Загрузка данных о друзьях Лавкрафта
with open(friends_data_path, 'r', encoding='utf-8') as friends_file:
    friends_data = json.load(friends_file)

# Проверка на дубликаты по ключу "title"
titles_in_main_data = {entity['title'] for entity in main_data}
unique_friends_data = [friend for friend in friends_data if friend['title'] not in titles_in_main_data]

# Объединение данных
combined_data = main_data + unique_friends_data

# Сохранение объединенных данных
with open(output_path, 'w', encoding='utf-8') as output_file:
    json.dump(combined_data, output_file, indent=4, ensure_ascii=False)

print(f"Данные успешно объединены и сохранены в '{output_path}'.")

In [None]:
import json
from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, URIRef, BNode
from urllib.parse import quote, unquote
from rdflib.namespace import XSD

# Пути к файлам
data_path = 'combined_data.json'
output_ttl = 'lovecraft_ontology.ttl'

# Загрузка данных
with open(data_path, 'r') as data_file:
    cleaned_data = json.load(data_file)

# Создаем RDF-граф
g = Graph()
EX = Namespace("http://example.org/lovecraft#")
g.bind("owl", OWL)
g.bind("rdfs", RDFS)
g.bind("ex", URIRef("http://example.org/lovecraft#"))

property_renaming_map = {
    "hasAffiliation": {
        "Work": "hasAffiliationWork",
        "RealWorldPerson": "hasAffiliationRealWorldPerson",
        "Organisation": "hasAffiliationOrganisation",
        "Character": "hasAffiliationCharacter"
    },
    "hasAppearance": {
        "Work": "hasAppearanceWork",
        "Location": "hasAppearanceLocation",
        "Artefact": "hasAppearanceArtefact",
        "Organisation": "hasAppearanceOrganisation",
        "Character": "hasAppearanceCharacter"
    },
    "wasCreatedBy": {
        "Work": "wasCreatedByWork",
        "Artefact": "wasCreatedByArtefact",
        "Organisation": "wasCreatedByOrganisation",
        "Character": "wasCreatedByCharacter",
        "Location": "wasCreatedByLocation"
    },
    "madeFirstAppearanceOn": {
        "Work": "madeFirstAppearanceOnWork",
        "Location": "madeFirstAppearanceOnLocation",
        "Artefact": "madeFirstAppearanceOnArtefact",
        "Organisation": "madeFirstAppearanceOnOrganisation",
        "Character": "madeFirstAppearanceOnCharacter"
    },
    "hasAlternativeName": {
        "RealWorldPerson": "hasAlternativeNameRealWorldPerson",
        "Location": "hasAlternativeNameLocation",
        "Artefact": "hasAlternativeNameArtefact",
        "Organisation": "hasAlternativeNameOrganisation",
        "Character": "hasAlternativeNameCharacter"
    },
    "hasType": {
        "Location": "hasTypeLocation",
        "Artefact": "hasTypeArtefact",
        "Organisation": "hasTypeOrganisation"
    },
    "isLocatedAt": {
        "Location": "isLocatedAtLocation",
        "Artefact": "isLocatedAtArtefact",
        "Organisation": "isLocatedAtOrganisation",
        "Character": "isLocatedAtCharacter"
    },
    "hasOrigin": {
        "Character": "hasOriginCharacter",
        "Location": "hasOriginLocation",
        "Artefact": "hasOriginArtefact"
    }
}

def clean_uri(uri):
    """Удаляет кавычки, пробелы и декодирует URI."""
    return unquote(uri).replace('%22', '').replace('"', '').replace("'", '').strip().replace(" ", "_")
    
def rename_property(prop_name, domain, renaming_map):
    if prop_name in renaming_map and domain in renaming_map[prop_name]:
        return renaming_map[prop_name][domain]
    return prop_name
    
# Словари для каждого класса
Work_property_map = {
    "affiliations": {"name": "hasAffiliation", "type": [], "domain": "Work", "range": ["Organisation", "Character", "Work"]},
    "appearances": {"name": "hasAppearance", "type": [], "domain": "Work", "range": ["Character", "Artefact", "Location"]},
    "appears_in": {"name": "isPartOfSeries", "type": ["transitive"], "inverse": "hasSeriesPart", "domain": "Work", "range": "Work"},
    "author": {"name": "hasAuthor", "type": [], "inverse": "isAuthorOf", "domain": "Work", "range": "RealWorldPerson"},
    "based_on": {"name": "isBasedOn", "type": [], "domain": "Work", "range": "Work"},
    "country": {"name": "hasCountryOfOrigin", "type": ["functional"], "domain": "Work", "range": "Location"},
    "date_of_writing": {"name": "wasWrittenOn", "type": ["functional"], "domain": "Work", "range": "xsd:string"},
    "genre(s)": {"name": "hasGenre", "type": [], "domain": "Work", "range": "xsd:string"},
    "language": {"name": "isInLanguage", "type": ["functional"], "domain": "Work", "range": "xsd:string"},
    "pages": {"name": "hasPageCount", "type": ["functional"], "domain": "Work", "range": "xsd:string"},
    "publication": {"name": "isPublishedAs", "type": [], "domain": "Work", "range": "xsd:string"},
    "publication_date": {"name": "wasPublishedOn", "type": ["functional"], "domain": "Work", "range": "xsd:string"},
    "release_date": {"name": "wasReleasedOn", "type": ["functional"], "domain": "Work", "range": "xsd:string"},
    "series": {"name": "isPartOfSeries", "type": ["transitive"], "domain": "Work", "range": "Work"},
    "writer": {"name": "hasWriter", "type": ["functional"], "domain": "Work", "range": "RealWorldPerson"},
}


RealWorldPerson_property_map = {
    "affiliations": {"name": "hasAffiliation", "type": [], "domain": "RealWorldPerson", "range": ["Organisation", "Work"]},
    "also_known_as": {"name": "hasAlternativeName", "type": ["functional"], "domain": "RealWorldPerson", "range": "xsd:string"},
    "birth_date": {"name": "wasBornOn", "type": ["functional"], "domain": "RealWorldPerson", "range": "xsd:string"},
    "birthplace": {"name": "hasBirthplace", "type": ["functional"], "domain": "RealWorldPerson", "range": ["Location"]},
    "connection_to_the_mythos": {"name": "isConnectedToMythos", "type": [], "domain": "RealWorldPerson", "range": "Work"},
    "death_date": {"name": "diedOn", "type": ["functional"], "domain": "RealWorldPerson", "range": "xsd:string"},
    "full_name": {"name": "hasFullName", "type": ["functional"], "domain": "RealWorldPerson", "range": "xsd:string"},
    "mate": {"name": "hasRealPersonMate", "type": ["symmetric"], "inverse": "isRealPersonMateOf", "domain": "RealWorldPerson", "range": ["RealWorldPerson"]},
    "nationality": {"name": "hasNationality", "type": ["functional"], "domain": "RealWorldPerson", "range": "Location"},
    "notable_works": {"name": "isKnownFor", "type": [], "domain": "RealWorldPerson", "range": "Work"},
    "occupation": {"name": "hasOccupation", "type": [], "domain": "RealWorldPerson", "range": "xsd:string"},
    "pseudonym(s)": {"name": "hasPseudonym", "type": [], "domain": "RealWorldPerson", "range": "xsd:string"},
    "relatives": {"name": "hasRelativeRealPerson", "type": ["symmetric"], "domain": "RealWorldPerson", "range": "RealWorldPerson"},
    "spouses": {"name": "hasSpouse", "type": ["functional", "symmetric"], "inverse": "isSpouseOf", "domain": "RealWorldPerson", "range": ["RealWorldPerson"]},
    "website": {"name": "hasWebsite", "type": ["functional"], "domain": "RealWorldPerson", "range": "xsd:string"},
    "years_active": {"name": "hasYearsActive", "type": [], "domain": "RealWorldPerson", "range": "xsd:string"},
    "children": {"name": "hasChildren", "type": [], "domain": "RealWorldPerson", "range": "RealWorldPerson"},
}

Location_property_map = {
    "accessibility": {"name": "hasAccessibility", "type": [], "domain": "Location", "range": "xsd:string"},
    "also_known_as": {"name": "hasAlternativeName", "type": ["functional"], "domain": "Location", "range": "xsd:string"},
    "appearances": {"name": "hasAppearance", "type": [], "domain": "Location", "range": "Work"},
    "country": {"name": "isInCountry", "type": ["functional"], "domain": "Location", "range": ["Location"]},
    "destroyed": {"name": "wasDestroyedOn", "type": ["functional"], "domain": "Location", "range": "xsd:string"},
    "first_appearance": {"name": "madeFirstAppearanceOn", "type": ["functional"], "domain": "Location", "range": "Work"},
    "inhabitants": {"name": "isInhabitedBy", "type": ["transitive"], "domain": "Location", "range": "Character"},
    "location": {"name": "isLocatedAt", "type": ["functional"], "domain": "Location", "range": ["Location"]},
    "notable_features": {"name": "hasNotableFeature", "type": [], "domain": "Location", "range": "xsd:string"},
    "purpose": {"name": "hasPurpose", "type": [], "domain": "Location", "range": "xsd:string"},
    "type": {"name": "hasType", "type": [], "domain": "Location", "range": "xsd:string"},
    "origin": {"name": "hasOrigin", "type": ["functional"], "domain": "Location", "range": ["Location"]}
}

Artefact_property_map = {
    "also_known_as": {"name": "hasAlternativeName", "type": ["functional"], "domain": "Artefact", "range": "xsd:string"},
    "appearances": {"name": "hasAppearance", "type": [], "domain": "Artefact", "range": "Work"},
    "created_by": {"name": "wasCreatedBy", "type": ["functional"], "inverse": "created", "domain": "Artefact", "range": "RealWorldPerson"},
    "destroyed": {"name": "wasDestroyedOn", "type": ["functional"], "domain": "Artefact", "range": "xsd:string"},
    "first_appearance": {"name": "madeFirstAppearanceOn", "type": ["functional"], "domain": "Artefact", "range": "Work"},
    "function": {"name": "hasFunction", "type": [], "domain": "Artefact", "range": "xsd:string"},
    "location": {"name": "isLocatedAt", "type": ["functional"], "domain": "Artefact", "range": ["Location", "Work"]},
    "origin": {"name": "hasOrigin", "type": ["functional"], "domain": "Artefact", "range": ["Location"]},
    "place_of_origin": {"name": "hasPlaceOfOrigin", "type": ["functional"], "domain": "Artefact", "range": ["Location", "xsd:string"]},
    "type": {"name": "hasType", "type": [], "domain": "Artefact", "range": "xsd:string"},
    "used_by": {"name": "isUsedBy", "type": ["transitive"], "domain": "Artefact", "range": ["Character"]}
}

Organisation_property_map = {
    "affiliations": {"name": "hasAffiliation", "type": [], "domain": "Organisation", "range": ["Organisation", "Character", "Work"]},
    "also_known_as": {"name": "hasAlternativeName", "type": ["functional"], "domain": "Organisation", "range": "xsd:string"},
    "created_by": {"name": "wasCreatedBy", "type": ["functional"], "inverse": "created", "domain": "Organisation", "range": "RealWorldPerson"},
    "location": {"name": "isLocatedAt", "type": ["functional"], "domain": "Organisation", "range": ["Location"]},
    "notable_members": {"name": "hasNotableMember", "type": [], "domain": "Organisation", "range": "RealWorldPerson"},
    "purpose": {"name": "hasPurpose", "type": [], "domain": "Organisation", "range": "xsd:string"},
    "type": {"name": "hasType", "type": [], "domain": "Organisation", "range": "xsd:string"}
}

Character_property_map = {
    "affiliations": {"name": "hasAffiliation", "type": [], "domain": "Character", "range": ["Organisation", "Character", "Work"]},
    "also_known_as": {"name": "hasAlternativeName", "type": ["functional"], "domain": "Character", "range": "xsd:string"},
    "appearances": {"name": "hasAppearance", "type": [], "domain": "Character", "range": "Work"},
    "created_by": {"name": "wasCreatedBy", "type": ["functional"], "inverse": "created", "domain": "Character", "range": "RealWorldPerson"},
    "current_location": {"name": "isLocatedAt", "type": ["functional"], "domain": "Character", "range": ["Location", "xsd:string"]},
    "died": {"name": "ceasedOn", "type": ["functional"], "domain": "Character", "range": "xsd:string"},
    "distinctions": {"name": "hasDistinction", "type": [], "domain": "Character", "range": "xsd:string"},
    "first_appearance": {"name": "madeFirstAppearanceOn", "type": ["functional"], "domain": "Character", "range": "Work"},
    "language": {"name": "usesLanguage", "type": ["functional"], "domain": "Character", "range": "xsd:string"},
    "mate": {"name": "hasCharacterMate", "type": ["functional", "symmetric"], "inverse": "isCharacterMateOf", "domain": "Character", "range": ["Character"],
    "occupation": {"name": "hasOccupation", "type": [], "domain": "Character", "range": "xsd:string"},
    "offspring": {"name": "hasOffspring", "type": ["transitive"], "inverse": "isOffspringOf", "domain": "Character", "range": "Character"},
    "origin": {"name": "hasOrigin", "type": ["functional"], "domain": "Character", "range": ["Location"]},
    "powers_and_abilities": {"name": "hasPowersAndAbilities", "type": [], "domain": "Character", "range": "xsd:string"},
    "relatives": {"name": "hasRelativesCharacter", "type": ["symmetric"], "domain": "Character", "range": "Character"},
    "species": {"name": "isOfSpecies", "type": ["functional"], "inverse": "hasIndividual", "domain": "Character", "range": ["Character"]},
    "hasParent": {"name": "hasParent", "type": ["transitive"], "inverse": "hasChild", "domain": "Character", "range": ["Character"]},
    "hasGrandParent": {"name": "hasGrandParent", "type": ["transitive"], "inverse": "hasGrandchild", "domain": "Character", "range": ["Character"]},
    "hasGreatGrandParent": {"name": "hasGreatGrandParent", "type": ["transitive"], "inverse": "hasGreatGrandchild", "domain": "Character", "range": ["Character"]},
    "hasFather": {"name": "hasFather", "type": [], "inverse": "hasChild", "domain": "Character", "range": ["Character"]},
    "hasMother": {"name": "hasMother", "type": [], "inverse": "hasChild", "domain": "Character", "range": ["Character"]},
    "hasSister": {"name": "hasSister", "type": ["symmetric"], "domain": "Character", "range": ["Character"]},
    "hasBrother": {"name": "hasBrother", "type": ["symmetric"], "domain": "Character", "range": ["Character"]},
    "hasHalfSister": {"name": "hasHalfSister", "type": [], "inverse": "hasHalfSibling", "domain": "Character", "range": ["Character"]},
    "hasHalfBrother": {"name": "hasHalfBrother", "type": [], "inverse": "hasHalfSibling", "domain": "Character", "range": ["Character"]},
    "hasSon": {"name": "hasSon", "type": [], "inverse": "hasParent", "domain": "Character", "range": ["Character"]},
    "hasDaughter": {"name": "hasDaughter", "type": [], "inverse": "hasParent", "domain": "Character", "range": ["Character"]},
    "hasAncestor": {"name": "hasAncestor", "type": ["transitive"], "inverse": "hasDescendant", "domain": "Character", "range": ["Character"]},
    "hasUncle": {"name": "hasUncle", "type": [], "inverse": "hasNephewOrNiece", "domain": "Character", "range": ["Character"]},
    "hasCousin": {"name": "hasCousin", "type": ["symmetric"], "domain": "Character", "range": ["Character"]},
    "hasNephew": {"name": "hasNephew", "type": [], "inverse": "hasUncleOrAunt", "domain": "Character", "range": ["Character"]},
    "hasGrandfather": {"name": "hasGrandfather", "type": [], "inverse": "hasGrandchild", "domain": "Character", "range": ["Character"]},
    "hasGrandmother": {"name": "hasGrandmother", "type": [], "inverse": "hasGrandchild", "domain": "Character", "range": ["Character"]},
    "hasGrandson": {"name": "hasGrandson", "type": [], "inverse": "hasGrandParent", "domain": "Character", "range": ["Character"]},
    "hasGranddaughter": {"name": "hasGranddaughter", "type": [], "inverse": "hasGrandParent", "domain": "Character", "range": ["Character"]},
    "hasGreatGrandson": {"name": "hasGreatGrandson", "type": [], "inverse": "hasGreatGrandParent", "domain": "Character", "range": ["Character"]},
    "hasGreatGreatGrandson": {"name": "hasGreatGreatGrandson", "type": [], "inverse": "hasGreatGreatGrandParent", "domain": "Character", "range": ["Character"]},
    "hasGreatGreatFather": {"name": "hasGreatGreatFather", "type": [], "inverse": "hasGreatGreatGrandchild", "domain": "Character", "range": ["Character"]},
    "hasGreatGranddaughter": {"name": "hasGreatGranddaughter", "type": [], "inverse": "hasGreatGrandParent", "domain": "Character", "range": ["Character"]},
    "hasGreatGrandchildren": {"name": "hasGreatGrandchildren", "type": [], "inverse": "hasGreatGrandParent", "domain": "Character", "range": ["Character"]},
    "hasGreatGreatGranddaughter": {"name": "hasGreatGreatGranddaughter", "type": [], "inverse": "hasGreatGreatGrandParent", "domain": "Character", "range": ["Character"]},
    "hasRelative": {"name": "hasRelativeCharacter", "type": ["symmetric"], "domain": "Character", "range": ["Character"]}
}
}

# Словарь, где ключ — название главного класса,
# а значение — список его подклассов.
subclass_map = {
    "Character": [
        "Avatars",
        "Chaosium",
        "CharactersIncorporatedFromFolkloreMythAndReligion",
        "Cults",
        "DeceasedInMythos",
        "Disambiguations",
        "Dreamers",
        "Dunwich",
        "EarthNativeSpecies",
        "ElderGods",
        "ExtinctSpecies",
        "ExtraDimensionalCharacters",
        "ExtraDimensionalSpecies",
        "ExtraterrestrialCharacters",
        "ExtraterrestrialSpecies",
        "Fictional",
        "GamesAndAdaptations",
        "GreatOldOnes",
        "Humans",
        "LifeformsAndEntities",
        "MagicUsers",
        "MythosCharacters",
        "MythosLiterature",
        "NonSapientSpecies",
        "OtherSupernaturalBeings",
        "OuterGods",
        "RobertEHowardWorks",
        "SapientSpecies",
        "ServitorRaces",
        "Species",
        "SpeciesIncorporatedFromFolkloreMythAndReligion"
    ],
    "Work": [
        "AmateurPress",
        "AndersFagerWorks",
        "ArkhamHorror",
        "ArkhamHorrorFiction",
        "Audio",
        "AugustDerlethWorks",
        "AveroigneCycle",
        "BhyhlunSeries",
        "Books",
        "BrianLumleyWorks",
        "Chaosium",
        "ClarkAshtonSmithWorks",
        "ComicBooks",
        "Crossovers",
        "CrossroadPress",
        "Dunwich",
        "Ebooks",
        "ExpandedMythosAudioAdaptations",
        "ExpandedMythosComicBookAdaptations",
        "ExpandedMythosFilmAndTelevision",
        "ExpandedMythosFilmAndTelevisionAdaptations",
        "Ezines",
        "Fanzines",
        "FritzLeiberWorks",
        "GamesAndAdaptations",
        "GamingSupplements",
        "HyperboreanCycle",
        "JamesAllisonCycle",
        "JohnDHaefele",
        "LovecraftCircleAdaptations",
        "LovecraftCircleAudioAdaptations",
        "LovecraftCircleComicBookAdaptations",
        "LovecraftCircleFilmAndTelevisionAdaptations",
        "MythosAdjacentFilmAndTelevision",
        "MythosAdjacentWorks",
        "MythosInspiredFilmAndTelevisionWorks",
        "MythosInspiredWorks",
        "MythosLiterature",
        "NonFictionWorks",
        "OtherAuthors",
        "Pastiches",
        "PatHarriganWorks",
        "PennyFrierson",
        "Periodicals",
        "PulpMagazines",
        "RamseyCampbellWorks",
        "RuthannaEmrysWorks",
        "SpectraFiles",
        "StoryCycles",
        "Storybooks",
        "TheSalemHawleySeries",
        "Works",
        "ZothiqueStoryCycle",
        "HPLovecraftWorks",
        "CaitlinRKiernanWorks",
        "MediaAdaptations"
        
    ],
    "RealWorldPerson": [
        "AlterEgosCharactersIncorporatedFromTheRealWorld",
        "AmateurPress",
        "Artists",
        "Audio",
        "Chaosium",
        "ComicBooks",
        "ContentCreators",
        "Critics",
        "CrossroadPress",
        "DeceasedRealWorld",
        "DefunctRealWorld",
        "Editors",
        "ExpandedMythosAuthors",
        "GamesAndAdaptations",
        "GamingSupplements",
        "KalemClub",
        "LovecraftCircleAuthors",
        "LovecraftsCoAuthors",
        "LovecraftsCorrespondents",
        "LovecraftsInspirationsAuthors",
        "MythosAdjacentAuthors",
        "MythosAdjacentWorks",
        "MythosInspiredAuthors",
        "MythosInspiredWorks",
        "MythosLiterature",
        "NonFictionAuthors",
        "Publishers",
        "RealWorld",
        "RealWorldPeople",
        "Scholars",
        "SmallPress",
        "LovecraftSFriendsAndAcquaintances"
    ],
    "Location": [
        "CallOfCthulhuRealWorld",
        "Dimensions",
        "Dunwich",
        "Ebooks",
        "ExtraDimensionalLocations",
        "Locations",
        "LocationsIncorporatedFromFolkloreMythAndReligion",
        "LocationsIncorporatedFromTheRealWorld",
        "LocationsOriginatingFromMythosAdjacentWorks",
        "LocationsOriginatingFromMythosInspiredWorks",
        "LovecraftsInspirationsWorks",
        "MythosLiterature",
        "Planets",
        "RealWorld",
        "RegionsTerritories",
        "Stars",
        "Structures"
    ],
    "Artefact": [
        "ArtefactsOriginatingFromMythosAdjacentWorks",
        "ArtefactsOriginatingFromMythosInspiredWorks",
        "LocationsOriginatingFromMythosInspiredWorks",
        "LovecraftsInspirationsWorks",
        "MysticalArtefacts",
        "MythosBooksFictional",
        "MythosLiterature",
        "RegionsTerritories",
        "TechnologicalArtefacts"
    ],
    "Organisation": [
        "Cults",
        "DefunctOrganisationsFictional",
        "Events",
        "MythosLiterature",
        "Organisations",
        "OrganisationsOriginatingFromMythosAdjacentWorks",
        "OrganisationsOriginatingFromMythosInspiredWorks"
    ]
}

# Добавляем свойства из property_map в зависимости от класса
property_maps = {
    "Work": Work_property_map,
    "Character": Character_property_map,
    "RealWorldPerson": RealWorldPerson_property_map,
    "Location": Location_property_map,
    "Artefact": Artefact_property_map,
    "Organisation": Organisation_property_map
}

# Определение классов
classes = {
    'Work': 'Creative works such as literary pieces, articles, games, comics, and other media.',
    'Character': 'A being from mythology or fictional lore, including deities, monsters, and heroes.',
    'RealWorldPerson': 'A real historical or contemporary person with connections to the mythos or works.',
    'Location': 'A place or setting, real or fictional, associated with events or entities from the mythos.',
    'Artefact': 'An object of historical, mystical, or fictional significance, often tied to the mythos.',
    'Organisation': 'A group or institution, real or fictional, playing a role in the mythos or related works.'
}


# Добавляем 6 главных классов
for class_name, label in classes.items():
    class_uri = URIRef(EX[clean_uri(class_name)])
    g.add((class_uri, RDF.type, OWL.Class))
    g.add((class_uri, RDFS.label, Literal(label, lang='en')))


def add_subclass_hierarchy(graph, subclass_map):
    """
    Для каждого главного класса (например, 'Character')
    создаёт подклассы (Avatars, Chaosium ...) и связывает их
    rdfs:subClassOf -> EX[Character].
    """
    for main_class, sub_list in subclass_map.items():
        main_class_uri = EX[clean_uri(main_class)]
        
        for sub_name in sub_list:
            sub_uri = EX[clean_uri(sub_name)]
            # Если вдруг не объявлено как класс — объявим
            graph.add((sub_uri, RDF.type, OWL.Class))
            # sub_name является подклассом main_class
            graph.add((sub_uri, RDFS.subClassOf, main_class_uri))
# После этого:
add_subclass_hierarchy(g, subclass_map)
    
# Функция для экранирования значений для XML
def escape_xml(value):
    if not isinstance(value, str):
        return value
    return (
        value.replace("&", "&amp;")
             .replace("<", "&lt;")
             .replace(">", "&gt;")
             .replace("\"", "&quot;")
             .replace("'", "&apos;")
    ).strip()

def add_subclasses_from_json(graph, default_parent_class, subclasses):
    parent_uri = URIRef(EX[clean_uri(default_parent_class)])
    
    for subclass in subclasses:
        subclass_uri = URIRef(EX[clean_uri(subclass)])
        
        # Проверка на самоссылку
        if parent_uri == subclass_uri:
            print(f"Warning: Detected self-referential subclass: {subclass}. Skipping.")
            continue
        
        # Проверка существования класса в графе
        if (subclass_uri, RDF.type, OWL.Class) not in graph:
            graph.add((subclass_uri, RDF.type, OWL.Class))
        
        # Добавление отношения подкласса
        graph.add((subclass_uri, RDFS.subClassOf, parent_uri))

def clean_property_name(prop_name):
    """Удаляет лишние символы и приводит название свойства к корректному виду."""
    replacements = {
        "%28": "",
        "%29": "",
        "(s)": "s",
        "(": "",
        ")": ""
    }
    for old, new in replacements.items():
        prop_name = prop_name.replace(old, new)
    return prop_name


# Создаём маппинг title -> URI
title_to_uri_map = {clean_uri(e['title']): URIRef(EX[clean_uri(e['title'].replace(' ', '_'))]) for e in cleaned_data}

# Функция для очистки инфобокса
def clean_infobox(entity):
    if 'infobox' in entity and isinstance(entity['infobox'], dict):
        for key, values in entity['infobox'].items():
            if isinstance(values, list):
                entity['infobox'][key] = [v for v in values if clean_uri(v) != clean_uri(entity.get('title', ''))]
            elif isinstance(values, str):
                if clean_uri(values) == clean_uri(entity.get('title', '')):
                    entity['infobox'][key] = None
    return entity


def add_object_properties(graph, property_maps):
    for class_name, properties in property_maps.items():
        for prop_key, prop_details in properties.items():
            original_name = prop_details["name"]
            prop_name = clean_property_name(original_name)
            renamed_prop = rename_property(prop_name, class_name, property_renaming_map)

            prop_uri = URIRef(EX[renamed_prop])
            domain = prop_details.get("domain")
            range_ = prop_details.get("range", [])
            prop_types = prop_details.get("type", [])
            inverse = prop_details.get("inverse", None)

            # Если чего-то не хватает, пропускаем
            if not renamed_prop or not domain or not range_:
                continue

            # Приводим range_ к списку (если это одна строка)
            if isinstance(range_, str):
                range_ = [range_.strip()]

            # Добавляем, что domain - это класс
            domain_uri = URIRef(EX[domain])
            graph.add((domain_uri, RDF.type, OWL.Class))

            #
            # --- ВАЖНОЕ: Определяем тип свойства (ObjectProperty vs DatatypeProperty)
            #
            # Проверяем, все ли элементы range_ начинаются с "xsd:"
            # Если да — объявим DatatypeProperty, иначе — ObjectProperty
            #
            for r in range_:
                range_uri = URIRef(EX[r]) if not r.startswith("xsd:") else getattr(XSD, r.split(":")[1], None)
                if range_uri:
                    graph.add((range_uri, RDF.type, OWL.Class if not r.startswith("xsd:") else RDFS.Datatype))

            graph.add((prop_uri, RDF.type, OWL.ObjectProperty))
            graph.add((prop_uri, RDFS.label, Literal(renamed_prop)))
            graph.add((prop_uri, RDFS.domain, domain_uri))

            #
            # --- Обработка range ---
            #
            if len(range_) > 1:
                # Если несколько range, собираем их в union
                union_bnode = BNode()
                graph.add((union_bnode, RDF.type, OWL.Class))
                current = union_bnode
                for r in range_[:-1]:
                    next_node = BNode()
                    graph.add((current, RDF.first, URIRef(EX[r]) if not r.startswith("xsd:") else getattr(XSD, r.split(":")[1], None)))
                    graph.add((current, RDF.rest, next_node))
                    current = next_node
                last_range = range_[-1]
                graph.add((current, RDF.first, URIRef(EX[last_range]) if not last_range.startswith("xsd:") else getattr(XSD, last_range.split(":")[1], None)))
                graph.add((current, RDF.rest, RDF.nil))

                graph.add((prop_uri, RDFS.range, union_bnode))
            else:
                # Один range
                r = range_[0]
                range_uri = URIRef(EX[r]) if not r.startswith("xsd:") else getattr(XSD, r.split(":")[1], None)
                graph.add((prop_uri, RDFS.range, range_uri))

            #
            # --- Проставляем типы свойств (functional, symmetric, transitive...)
            #
            for prop_type in prop_types:
                if prop_type == "symmetric":
                    graph.add((prop_uri, RDF.type, OWL.SymmetricProperty))
                elif prop_type == "transitive":
                    graph.add((prop_uri, RDF.type, OWL.TransitiveProperty))
                elif prop_type == "functional":
                    graph.add((prop_uri, RDF.type, OWL.FunctionalProperty))

            #
            # --- Если есть inverse
            #
            if inverse and not range_[0].startswith("xsd:"):
                inverse_uri = URIRef(EX[clean_property_name(inverse)])
                graph.add((inverse_uri, RDF.type, OWL.ObjectProperty))
                graph.add((inverse_uri, RDFS.label, Literal(inverse)))
                graph.add((inverse_uri, RDFS.domain, URIRef(EX[range_[0]])))
                graph.add((inverse_uri, RDFS.range, domain_uri))
                graph.add((prop_uri, OWL.inverseOf, inverse_uri))
                graph.add((inverse_uri, OWL.inverseOf, prop_uri))

add_object_properties(g, property_maps)



def add_individuals(entity):
    """
    Добавляем индивида (entity) в граф:
      - entity['class'] => базовый класс (напр. Character)
      - создаём триплет :Entity rdf:type :Class :Subclass
      - добавляем label, аннотации, и заполняем свойства из инфобокса
    """
    if not entity.get("title") or not entity.get("class"):
        return

    # Чистим URI
    title = clean_uri(entity['title'].replace(' ', '_'))
    parent_class_name = clean_uri(entity['class'].replace(' ', '_'))

    # URI для индивида, родительского класса и подкласса
    entity_uri = title_to_uri_map.get(title, URIRef(EX[title]))
    parent_class_uri = URIRef(EX[parent_class_name])

    # 1) Привязка к базовому классу
    g.add((entity_uri, RDF.type, parent_class_uri))
    g.add((entity_uri, RDFS.label, Literal(escape_xml(entity['title']))))

    # 2) Привязка к подклассу (если указан)
    if 'subclass' in entity:
        subclasses = entity['subclass'] if isinstance(entity['subclass'], list) else [entity['subclass']]
        for subclass in subclasses:
            cleaned_subclass = clean_uri(subclass).lower().strip()
            cleaned_title = clean_uri(entity['title']).lower().strip()
            if cleaned_subclass == cleaned_title:
                print(f"Skipping self-referential subclass: {subclass}")
                continue
            
            subclass_uri = URIRef(EX[clean_uri(subclass)])
            if (subclass_uri, RDF.type, OWL.Class) not in g:
                g.add((subclass_uri, RDF.type, OWL.Class))
            
            g.add((entity_uri, RDF.type, subclass_uri))

    # 3) Если есть поле content — добавляем как аннотацию
    if 'content' in entity:
        g.add((entity_uri, EX['hasAnnotation'], Literal(escape_xml(entity['content']))))

    # 4) Обрабатываем поля инфобокса (если есть)
    entity = clean_infobox(entity)
    if 'infobox' in entity and isinstance(entity['infobox'], dict):
        class_map = property_maps.get(parent_class_name, {})
        for key, values in entity['infobox'].items():
            if key not in class_map:
                print(f"Свойство '{key}' пропущено для класса '{parent_class_name}'")
                continue
            renamed_property = rename_property(key, parent_class_name, property_renaming_map)
            prop_details = class_map.get(key, {"name": renamed_property, "range": []})
            prop_uri = URIRef(EX[clean_uri(prop_details["name"])])

            if isinstance(values, list):
                for v in values:
                    target_uri = title_to_uri_map.get(clean_uri(v), Literal(escape_xml(v)))
                    g.add((entity_uri, prop_uri, target_uri))
            elif isinstance(values, str):
                target_uri = title_to_uri_map.get(clean_uri(values), Literal(escape_xml(values)))
                g.add((entity_uri, prop_uri, target_uri))

for entity in cleaned_data:
    add_individuals(entity)

g.serialize(output_ttl, format="turtle")
print(f"Ontology saved to '{output_ttl}'")

In [None]:
def clean_uri(uri):
    """Удаляет или кодирует недопустимые символы в URI."""
    if not isinstance(uri, str):
        return uri
    return (quote(unquote(uri))
            .replace('%22', '')
            .replace('%23', '#')
            .replace('%3A', ':')
            .replace('%2C', ',')
            .replace('%22', '')
            .replace('"', '')
            .replace('(', '_')  # Заменяем скобки на _
            .replace(')', '_')
            .replace('<', '_')  # Заменяем недопустимые символы
            .replace('>', '_')
            .replace('|', '_')
            .replace(' ', '_')  # Пробелы заменяем на _
            .strip())

def escape_literal(literal):
    """Удаляет или заменяет специальные символы в литералах."""
    if not isinstance(literal, str):
        return literal
    return (literal
            .replace('&amp;', '&')
            .replace('&lt;', '<')
            .replace('&gt;', '>')
            .replace('&quot;', '"')
            .replace('&apos;', "'")
            .replace('🐙', '')  # Удаляем специфические символы
            .strip())

def process_ttl(input_ttl, output_ttl):
    g = Graph()
    g.parse(input_ttl, format="turtle")

    updated_graph = Graph()
    updated_graph.bind("ex", URIRef("http://example.org/lovecraft#"))
    updated_graph.bind("owl", OWL)
    updated_graph.bind("rdfs", RDFS)

    # Очистка URI и литералов
    for s, p, o in g:
        try:
            # Обработка субъектов (s) и объектов (o)
            if isinstance(s, URIRef):
                s = URIRef(clean_uri(str(s)))
            if isinstance(o, URIRef):
                o = URIRef(clean_uri(str(o)))

            # Очистка предикатов (p)
            if isinstance(p, URIRef):
                namespace, p_name = str(p).rsplit("#", 1)
                p = URIRef(f"{namespace}#{clean_uri(p_name)}")

            # Замена пустых значений
            if p in [URIRef("http://example.org/lovecraft#hasOccupation"),
                     URIRef("http://example.org/lovecraft#isLocatedAt")] and o == URIRef("http://example.org/lovecraft#N/A"):
                o = OWL.Nothing

            # Экранирование литералов
            if isinstance(o, Literal):
                o = Literal(escape_literal(str(o)))

            # Добавление триплета в новый граф
            updated_graph.add((s, p, o))
        except Exception as e:
            print(f"Ошибка при обработке триплета ({s}, {p}, {o}): {e}")

    # Сохранение обновленного графа
    updated_graph.serialize(output_ttl, format="turtle")
    print(f"Processed TTL saved to {output_ttl}")

# Пример вызова функции
process_ttl("lovecraft_ontology.ttl", "processed_lovecraft_ontology.ttl")

## Следующую часть кода можно избежать, поднастроив немного парсинг. В данном случае, идет объединение всех значений в указанных полях в одну строку, чтобы не было конфликта в Протеже

In [None]:
def validate_and_merge_ttl(input_file, output_file):
    try:
        g = Graph()
        g.parse(input_file, format='turtle')  # Чтение TTL

        # Пространство имен
        EX = Namespace("http://example.org/lovecraft#")
        
        # Список полей для объединения
        fields_to_merge = [
            "wasCreatedOn", "wasCreatedBy", "wasWrittenOn", "isInLanguage",
            "hasPageCount", "wasPublishedOn", "wasReleasedOn", "hasWriter",
            "madeFirstAppearanceOn", "hasAlternativeName", "wasBornOn",
            "hasBirthplace", "diedOn", "hasFullName", "hasNationality",
            "hasWebsite", "isInCountry", "wasDestroyedOn", "isLocatedAt",
            "hasOrigin", "hasPlaceOfOrigin", "ceasedOn", "usesLanguage", ""
]
        # Словарь для хранения объединённых значений
        merged_data = {}

        # Объединяем данные по указанным полям
        for field in fields_to_merge:
            for subject, obj in g.subject_objects(EX[field]):
                key = (subject, EX[field])
                if key not in merged_data:
                    merged_data[key] = []
                merged_data[key].append(str(obj))

        # Удаляем старые данные из графа и добавляем объединённые
        for (subject, predicate), values in merged_data.items():
            g.remove((subject, predicate, None))
            merged_value = " / ".join(values)
            g.add((subject, predicate, Literal(merged_value)))

        # Сохраняем обновлённый граф в формате TTL
        g.serialize(destination=output_file, format='turtle')
        print(f"Merged TTL saved to '{output_file}'")
    except Exception as e:
        print(f"Ошибка: {e}")

# Запуск
validate_and_merge_ttl('processed_lovecraft_ontology.ttl', 'processed_lovecraft_ontology_merged.ttl')