In [None]:
import csv
from rdflib import Graph, Literal, Namespace, URIRef
from rdflib.namespace import DCTERMS, RDF, RDFS, SKOS, XSD
import pandas as pd
import numpy
import warnings
warnings.filterwarnings('ignore')

In [None]:
movies = pd.read_csv("movies_metadata.csv")
movies.head(1)

In [None]:
mov = movies.loc[:,["id", "genres", "original_language", "production_countries", "title","release_date"]]
mov = mov.dropna()
mov

In [None]:
mov['release_date'] = pd.to_datetime(mov['release_date'])

# Sélectionner uniquement l'année
mov['year'] = mov['release_date'].dt.year
mov = mov.drop('release_date', axis=1)

mov

In [None]:
import langcodes

# Liste des codes de langue
lang_codes = mov['original_language']

l = []

# Boucle pour obtenir le nom complet de chaque langue
for i in lang_codes:
    lang_name = langcodes.Language.get(i).language_name()
    l.append(lang_name)

mov['language'] = l
mov = mov.drop('original_language', axis=1)

mov

In [None]:
import ast
import json

serie_liste = mov['genres']
genres = []

# Convertir la chaîne en une liste Python
ma_liste = [ast.literal_eval(element) for element in serie_liste]

# Ne garder que les genres
for item in ma_liste:
    
    lst = []
    for el in item:
        objet = json.loads(str(el).replace("'", "\""))
        nom = objet["name"]
        lst.append(nom)
    genres.append(lst)
        
genres

In [None]:
exists = set(["Thriller", "Crime", "Action", "Drama", "Comedy"])
authorized = []

for item in genres:
    
    lst = []
    for el in item:
        if el in exists:
            lst.append(el)
    authorized.append(lst)
    
authorized

In [None]:
mov['genres'] = authorized
mov = mov.drop(mov[mov['genres'].apply(len) == 0].index)
mov

In [None]:
import ast

mov['production_countries'] = mov['production_countries'].apply(lambda x: ast.literal_eval(x))
mov['production_countries'] = mov['production_countries'].apply(lambda x: x[0]['name'] if len(x) > 0 else None)

mov = mov.dropna(subset=['production_countries'])

mov = mov.drop_duplicates(subset='id')
mov = mov.drop_duplicates(subset='title')
mov = mov.rename(columns={'production_countries': 'country'})
mov['id'] = mov['id'].astype('int64')
mov

In [None]:
import pandas as pd
df = pd.read_csv("credits.csv")
df.head(1)

In [None]:
import ast

def extract_actors(cast):
    cast_list = ast.literal_eval(cast)
    return [(actor['name'], actor['gender'], 0) for actor in cast_list]

def extract_crew(crew, job):
    crew_list = ast.literal_eval(crew)
    return [(member['name'], member['gender'], 0) for member in crew_list if member['job'] == job]


df['actors'] = df['cast'].apply(extract_actors)
df['director'] = [extract_crew(row['crew'], 'Director') for _, row in df.iterrows()]
df['writers'] = [extract_crew(row['crew'], 'Writer') for _, row in df.iterrows()]


# drop the original "cast" and "crew" columns
df.drop(['cast', 'crew'], axis=1, inplace=True)

df


In [None]:
merged_df = pd.merge(mov, df, left_on='id', right_on='id')
light_df = merged_df.sample(n=1000)
light_df.to_csv('mov.csv', index=False)

light_df

In [None]:
import ast
from owlready2 import *
import pandas as pd

# Load the ontology
onto = get_ontology("Ontology1.owl").load()

# Define the classes in the ontology
df = pd.read_csv('mov.csv')
df['genres'] = df['genres'].apply(lambda x: ast.literal_eval(x))
df['actors'] = df['actors'].apply(lambda x: ast.literal_eval(x))
df['director'] = df['director'].apply(lambda x: ast.literal_eval(x))
df['writers'] = df['writers'].apply(lambda x: ast.literal_eval(x))
# Loop through the rows of the CSV file and create instances of the Movie class
df

In [None]:
for index, row in df.iterrows():

    ##print(row)
    # Create instances of the Genre class
    genres = []
    for g in row['genres']:
        match g:
            case 'Action':
                genres.append(onto.Action)
            case 'Drama':
                genres.append(onto.Drama)
            case 'Comedy':
                genres.append(onto.Comedy)
            case 'Thriller':
                genres.append(onto.Thriller)
            case 'Crime':
                genres.append(onto.Crime)
            case _:
                print(index)
                pass
    # Create instances of the Person class
    directors = [onto.Director(name=n.strip(), Sexe=[str(s)], Age=[a]) for (n,s,a) in row['director']]
    writers = [onto.Writer(name=n.strip(), Sexe=[str(s)], Age=[a]) for (n,s,a) in row['writers']]
    actors = [onto.Actor(name=n.strip(), Sexe=[str(s)], Age=[a]) for (n,s,a) in row['actors']]
    # Create an instance of the Movie class and add the directors, writers, actors, genres, and other properties
    try:
        movie = onto.Movie(name=row['title'].strip())  
        movie.hasGenre = genres
        movie.Year = [row['year']]
        movie.Country = [row['country']]
        movie.Language = [row['language']]
        movie.hasDirector = directors
        movie.hasWriter = writers
        movie.hasActor = actors
    except:
        print("error")
    
    # Add the instance to the ontology
    print(index)

onto.save(file="Imported1.owl", format = "rdfxml")
    #print("Number of instances after:", len(list(onto.individuals())))

list(default_world.sparql("""SELECT (COUNT(?x) AS ?nb){ ?x a owl:Class . FILTER(ISIRI(?x)) }"""))




In [None]:
# Define the SPARQL query

query = """
    SELECT ?Inst WHERE{
    ?Inst rdf:type Ontology1:Actor
    } GROUP BY ?Inst
"""

filename = "Query1.txt"

# Execute the query and convert the result to a list
result = list(default_world.sparql(query))

# Open the file for writing with UTF-8 encoding
with open(filename, "w", encoding="utf-8") as f:
    # Iterate over the list and write each item to the file
    for item in result:
        f.write(str(item))
        f.write('\n')

In [None]:
# Define the SPARQL query
import rdflib as rdf

Ontology1 = rdf.Namespace("http://www.semanticweb.org/movie-ontology#")

query = """
    SELECT ?movies ?director WHERE{
    ?movies Ontology1:hasGenre Ontology1:Thriller .
    OPTIONAL {?director Ontology1:isDirectorOf ?movies}
    }
"""

graph = default_world.as_rdflib_graph()

graph.bind("Ontology1", Ontology1)
result = list(graph.query_owlready(query))
filename = "Query2.txt"

# Execute the query and convert the result to a list
#result = list(default_world.sparql(query))

# Open the file for writing with UTF-8 encoding
with open(filename, "w", encoding="utf-8") as f:
    # Iterate over the list and write each item to the file
    for item in result:
        f.write(str(item))
        f.write('\n')


In [None]:
# Define the SPARQL query
import rdflib as rdf

Ontology1 = rdf.Namespace("http://www.semanticweb.org/movie-ontology#")

query = """
    SELECT ?movies WHERE{
    ?movies Ontology1:hasGenre Ontology1:Thriller .
    ?movies Ontology1:hasGenre Ontology1:Crime
    }
"""
graph = default_world.as_rdflib_graph()

graph.bind("Ontology1", Ontology1)
result = list(graph.query_owlready(query))

filename = "Query3.txt"

# Execute the query and convert the result to a list
#result = list(default_world.sparql(query))

# Open the file for writing with UTF-8 encoding
with open(filename, "w", encoding="utf-8") as f:
    # Iterate over the list and write each item to the file
    for item in result:
        f.write(str(item))
        f.write('\n')


In [None]:
# Define the SPARQL query
import rdflib as rdf

Ontology1 = rdf.Namespace("http://www.semanticweb.org/movie-ontology#")

query = """
    SELECT ?actors WHERE{
    ?actors Ontology1:Age ?age .
    ?actors rdf:type Ontology1:Actor .
    FILTER(?age>51)
    }
"""
graph = default_world.as_rdflib_graph()

graph.bind("Ontology1", Ontology1)
result = list(graph.query_owlready(query))

filename = "Query4.txt"

# Execute the query and convert the result to a list
#result = list(default_world.sparql(query))

# Open the file for writing with UTF-8 encoding
with open(filename, "w", encoding="utf-8") as f:
    # Iterate over the list and write each item to the file
    for item in result:
        f.write(str(item))
        f.write('\n')


In [None]:
# Define the SPARQL query
import rdflib as rdf

Ontology1 = rdf.Namespace("http://www.semanticweb.org/movie-ontology#")

query = """
    SELECT ?movies ?actor ?year WHERE{
    ?movies Ontology1:hasGenre Ontology1:Comedy .
    OPTIONAL{?movies Ontology1:Year ?year} .
    OPTIONAL {?movies Ontology1:hasActor ?actor}
    }
"""
graph = default_world.as_rdflib_graph()

graph.bind("Ontology1", Ontology1)
result = list(graph.query_owlready(query))

filename = "Query6.txt"

# Execute the query and convert the result to a list
#result = list(default_world.sparql(query))

# Open the file for writing with UTF-8 encoding
with open(filename, "w", encoding="utf-8") as f:
    # Iterate over the list and write each item to the file
    for item in result:
        f.write(str(item))
        f.write('\n')


In [None]:
# Define the SPARQL query
import rdflib as rdf

Ontology1 = rdf.Namespace("http://www.semanticweb.org/movie-ontology#")

query = """
    SELECT ?movies ?year ?Country WHERE{ 
    ?movies Ontology1:Year ?year .
    ?movies Ontology1:Country ?Country .
    {?movies Ontology1:hasGenre Ontology1:Comedy . ?movies Ontology1:hasGenre Ontology1:Crime .}
    UNION
    {?movies Ontology1:Language Ontology1:English .}

    }
"""
graph = default_world.as_rdflib_graph()

graph.bind("Ontology1", Ontology1)
result = list(graph.query_owlready(query))

filename = "Query7.txt"

# Execute the query and convert the result to a list
#result = list(default_world.sparql(query))

# Open the file for writing with UTF-8 encoding
with open(filename, "w", encoding="utf-8") as f:
    # Iterate over the list and write each item to the file
    for item in result:
        f.write(str(item))
        f.write('\n')

In [None]:
# Define the SPARQL query
import rdflib as rdf

Ontology1 = rdf.Namespace("http://www.semanticweb.org/movie-ontology#")

query = """
     CONSTRUCT {?movie Ontology1:Countries ?countries .
                ?movie Ontology1:Language ?lang}
     WHERE{?movie Ontology1:Country ?countries .
            ?movie Ontology1:Language ?lang}
    
"""
graph = default_world.as_rdflib_graph()

graph.bind("Ontology1", Ontology1)
result = list(graph.query_owlready(query))

filename = "Query8.txt"

# Execute the query and convert the result to a list
#result = list(default_world.sparql(query))

# Open the file for writing with UTF-8 encoding
with open(filename, "w", encoding="utf-8") as f:
    # Iterate over the list and write each item to the file
    for item in result:
        f.write(str(item))
        f.write('\n')

In [None]:
# Define the SPARQL query
import rdflib as rdf

Ontology1 = rdf.Namespace("http://www.semanticweb.org/movie-ontology#")

query = """
    ASK{ ?movie Ontology1:Language "English" .
        ?movie Ontology1:Country "France"} 
    
"""
graph = default_world.as_rdflib_graph()

graph.bind("Ontology1", Ontology1)
result = list(graph.query_owlready(query))

filename = "Query9.txt"

# Execute the query and convert the result to a list
#result = list(default_world.sparql(query))

# Open the file for writing with UTF-8 encoding
with open(filename, "w", encoding="utf-8") as f:
    # Iterate over the list and write each item to the file
    for item in result:
        f.write(str(item))
        f.write('\n')