In [1]:
%pip install rdflib

Collecting rdflib
  Downloading rdflib-7.4.0-py3-none-any.whl.metadata (12 kB)
Downloading rdflib-7.4.0-py3-none-any.whl (569 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m569.0/569.0 kB[0m [31m5.8 MB/s[0m  [33m0:00:00[0m
[?25hInstalling collected packages: rdflib
Successfully installed rdflib-7.4.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import ast
from rdflib import Graph, Namespace, Literal, XSD, RDF, URIRef

# Подготовка к триплетам

In [132]:
def parse_list(val):
    if val is None:
        return []
    if isinstance(val, str):
        try:
            parsed = ast.literal_eval(val)
            if isinstance(parsed, list):
                return parsed
            else:
                return [parsed]
        except Exception:
            return [val]
    return val

def clean(df, list_columns=None):
    df = df.where(pd.notna(df), None)
    
    if list_columns:
        for col in list_columns:
            df[col] = df[col].apply(parse_list)
    
    return df


In [133]:
df_books = pd.read_csv('data/books.csv')
df_books

Unnamed: 0,title,year
0,The Last Wish,1990
1,Sword of Destiny,1992
2,Blood of Elves,1994
3,Time of Contempt,1995
4,Baptism of Fire,1996
5,The Tower of the Swallow,1997
6,The Lady of the Lake,1999
7,Season of Storms,2013
8,Crossroads of Ravens,2024


In [134]:
df_games = pd.read_csv('data/games.csv')
df_games

Unnamed: 0,title,year
0,The Witcher,2007
1,The Witcher 2: Assassins of Kings,2011
2,The Witcher 3: Wild Hunt,2015
3,Blood and Wine expansion,2016
4,Thronebreaker: The Witcher Tales,2018
5,Gwent: The Witcher Card Game,2018


In [135]:
df_char = pd.read_csv('data/characters_info_cleaned.csv')
list_columns = [ 'affiliations', 'appears_books', 'appears_games']

df_char = clean(df_char, list_columns=list_columns)
df_char.head(3)

Unnamed: 0,character,full name,nationality,hair_color,eye_color,race,gender,status,titles,profession,affiliations,abilities,appears_books,appears_games,isAlive
0,Cahir,Cahir Mawr Dyffryn aep Ceallach,Vicovaro,Dark,Dark blue,Human,Male,Deceased,Count,Knight,"[Nilfgaardian Secret Service, Geralt's company]","Swordmanship, Horsemanship","[Blood of Elves, Time of Contempt, Baptism of ...","[The Witcher 2: Assassins of Kings, The Witche...",False
1,Calanthe,Calanthe Fiona Riannon,Cintra,Ashen-gray,Emerald green,Human,Female,Deceased,Queen of Cintra,,[],,"[The Last Wish, Sword of Destiny, Blood of Elv...",[Gwent: The Witcher Card Game],False
2,Ciri,Cirilla Fiona Elen Riannon,Cintra,Ashen-gray,Emerald-green,Human,Female,,"Heiress to the throne of Cintra, Heiress to In...","Witcher, Empress of Nilfgaard",[],"Elder Blood gene, Swordsmanship, Magic","[Sword of Destiny, Blood of Elves, Time of Con...","[The Witcher, The Witcher 2: Assassins of King...",True


In [136]:
df_org = pd.read_csv('data/organisations_info_cleaned.csv')
list_columns = ['founder', 'leader', 'members', 'headquarters', 'appears_books', 'appears_games']

df_org = clean(df_org, list_columns=list_columns)
df_org.head(3)

Unnamed: 0,organisation,full name,status,area served,country,type,purpose,founder,leader,members,headquarters,appears_books,appears_games
0,Redanian Secret Service,Redanian Secret Service,Extant,Redania,Redania,Secret service,Spying,[Casimir de Lauterpacht],"[Sigismund Dijkstra, Erik Demartin]",[],[Oxenfurt],"[Blood of Elves, Time of Contempt, Baptism of ...",[Gwent: The Witcher Card Game]
1,Codringher and Fenn,Codringher and Fenn,Extinct,,Temeria,Detective agency and Law firm,,"[Codringher, Jacob Fenn]",[],[],[],"[Time of Contempt, Baptism of Fire]",[The Witcher]
2,Chapter of the Gift and the Art,Chapter of the Gift and the Art,Extinct,,,Inner circle of mages,sets rules and regulations for all magic in th...,[],[],[],[Mirthe],"[Blood of Elves, Time of Contempt, Baptism of ...",[Gwent: The Witcher Card Game]


# Добавление триплетов в онтологию

In [None]:
g = Graph()
g.parse("witcher.ttl", format="turtle")

In [None]:
def URI(name):
    return URIRef(g.namespace_manager.expand_curie(f":{name.strip().replace(' ', '_')}"))

def add_literal(g, subject, predicate, value, datatype=XSD.string):
    if value is not None:
        g.add((subject, URIRef(predicate), Literal(value, datatype=datatype)))

def add_uri(g, subject, predicate, value):
    if value is None:
        return
    if isinstance(value, list):
        for v in value:
            if v is not None:
                g.add((subject, URIRef(predicate), URI(v.strip().replace(" ", "_"))))
    else:
        g.add((subject, URIRef(predicate), URI(value.strip().replace(" ", "_"))))


In [None]:
for _, row in df_books.iterrows():
    if not row['title']:
        continue
    book_uri = URI(row['title'])
    g.add((book_uri, RDF.type, URI("Book")))
    add_literal(g, book_uri, ":releaseYear", row['year'], XSD.gYear)

for _, row in df_games.iterrows():
    if not row['title']:
        continue
    game_uri = URI(row['title'])
    g.add((game_uri, RDF.type, URI("Game")))
    add_literal(g, game_uri, ":releaseYear", row['year'], XSD.gYear)

for _, row in df_char.iterrows():
    if not row['character']:
        continue
    char_uri = URI(row['character'])
    g.add((char_uri, RDF.type, URI("Character")))

    if pd.notna(row['race']):
        race_uri = URI(row['race'])
        g.add((char_uri, URI("hasRace"), race_uri))
        g.add((race_uri, RDF.type, URI("Race")))

    add_literal(g, char_uri, ":hasFullName", row['full name'])
    add_literal(g, char_uri, ":hasGender", row['gender'])
    add_literal(g, char_uri, ":hasHairColor", row['hair_color'])
    add_literal(g, char_uri, ":hasEyeColor", row['eye_color'])
    add_literal(g, char_uri, ":hasProfession", row['profession'])
    add_literal(g, char_uri, ":hasAbility", row['abilities'])
    add_literal(g, char_uri, ":isAlive", row['isAlive'], XSD.boolean)

    add_uri(g, char_uri, ":appearsIn", row['appears_books'])
    add_uri(g, char_uri, ":appearsIn", row['appears_games'])
    add_uri(g, char_uri, ":hasAffiliation", row['affiliations'])

for _, row in df_org.iterrows():
    if not row['organisation']:
        continue
    org_uri = URI(row['full name'])
    g.add((org_uri, RDF.type, URI("Organisation")))

    add_literal(g, org_uri, ":hasStatus", row['status'])
    add_literal(g, org_uri, ":hasType", row['type'])
    add_literal(g, org_uri, ":hasPurpose", row['purpose'])

    add_uri(g, org_uri, ":inCountry", row['country'])
    add_uri(g, org_uri, ":hasFounder", row['founder'])
    add_uri(g, org_uri, ":hasLeader", row['leader'])
    add_uri(g, org_uri, ":hasMember", row['members'])
    add_uri(g, org_uri, ":appearsInOrg", row['appears_books'])
    add_uri(g, org_uri, ":appearsInOrg", row['appears_games'])


In [130]:
# сохраняем заполненный граф
g.serialize("witcher_filled.ttl", format="turtle")

<Graph identifier=Na33c4ee951a14b549502f22079f2bf7d (<class 'rdflib.graph.Graph'>)>

# Итог

In [162]:
g = Graph()
g.parse("witcher-final.ttl", format="turtle")
print(f"Количество триплетов: {len(g)}")

Количество триплетов: 2170
