# Proyek Semantik Web: RDF for Top 250 Movie by IMDb

### Import Library

In [1]:
import pandas as pd
from rdflib import Graph, Namespace, Literal, RDF, RDFS, OWL
from rdflib.namespace import XSD

### Load Data

In [2]:
movies_df = pd.read_csv("result-data/movies.csv")
persons_df = pd.read_csv("result-data/persons.csv")
genres_df = pd.read_csv("result-data/genres.csv")

### Membuat OWL

In [3]:
g = Graph()

EX = Namespace("http://example.org/")
g.bind("ex", EX)
g.bind("rdf", RDF)
g.bind("rdfs", RDFS)
g.bind("owl", OWL)
g.bind("xsd", XSD)

Movie = EX.Movie
Person = EX.Person
Genre = EX.Genre

g.add((Movie, RDF.type, OWL.Class))
g.add((Person, RDF.type, OWL.Class))
g.add((Genre, RDF.type, OWL.Class))

movie_properties = [
    (EX.hasRank, XSD.integer),
    (EX.hasTitle, XSD.string),
    (EX.hasIMDBLink, XSD.string),
    (EX.hasIMDBVotes, XSD.string),
    (EX.hasIMDBRating, XSD.float),
    (EX.hasDuration, XSD.string),
    (EX.hasGenre, EX.Genre),
    (EX.hasCast, EX.Person),
    (EX.hasDirector, EX.Person),
    (EX.hasWriter, EX.Person),
    (EX.hasStoryline, XSD.string),
    (EX.hasRottenRating, XSD.string),
    (EX.hasRottenVotes, XSD.string),
    (EX.hasRottenLink, XSD.string),
    (EX.hasPoster, XSD.string),
    (EX.hasYear, XSD.string),
]

for prop, range in movie_properties:
    g.add((prop, RDF.type, OWL.DatatypeProperty))
    g.add((prop, RDFS.range, range))

person_properties = [
    (EX.hasName, XSD.string),
    (EX.hasBirthday, XSD.string),
    (EX.hasPlaceOfBirth, XSD.string),
    (EX.hasPicture, XSD.string),
    (EX.hasProfileLink, XSD.string),
]

for prop, range in person_properties:
    g.add((prop, RDF.type, OWL.DatatypeProperty))
    g.add((prop, RDFS.range, range))

genre_properties = [
    (EX.hasGenreName, XSD.string),
]

for prop, range in genre_properties:
    g.add((prop, RDF.type, OWL.DatatypeProperty))
    g.add((prop, RDFS.range, range))

g.serialize("result-rdf/movies.owl", format="xml")


<Graph identifier=Nef81b41635284f7bb89b2f7d91e203b4 (<class 'rdflib.graph.Graph'>)>

### Membuat Turtle

In [4]:
g = Graph()
EX = Namespace("http://example.org/")
g.bind("ex", EX)

for _, row in persons_df.iterrows():
    person_uri = EX["person/" + row["person_name"].replace(" ", "_")]
    g.add((person_uri, RDF.type, Person))
    g.add((person_uri, EX.hasName, Literal(row["person_name"])))
    g.add((person_uri, EX.hasBirthday, Literal(row["birthday"])))
    g.add((person_uri, EX.hasPlaceOfBirth, Literal(row["place_birth"])))
    g.add((person_uri, EX.hasPicture, Literal(row["picture"])))
    g.add((person_uri, EX.hasProfileLink, Literal(row["profile_link"])))

for _, row in genres_df.iterrows():
    genre_uri = EX["genre/" + row["genre"].replace(" ", "_")]
    g.add((genre_uri, RDF.type, Genre))
    g.add((genre_uri, EX.hasGenreName, Literal(row["genre"])))

for _, row in movies_df.iterrows():
    movie_uri = EX["movie/" + row["title"].replace(" ", "_")]
    g.add((movie_uri, RDF.type, Movie))
    g.add((movie_uri, EX.hasRank, Literal(row["rank"])))
    g.add((movie_uri, EX.hasTitle, Literal(row["title"])))
    g.add((movie_uri, EX.hasIMDBLink, Literal(row["imdb_link"])))
    g.add((movie_uri, EX.hasIMDBVotes, Literal(row["imdb_votes"])))
    g.add((movie_uri, EX.hasIMDBRating, Literal(row["imdb_rating"])))
    g.add((movie_uri, EX.hasDuration, Literal(row["duration"])))
    g.add((movie_uri, EX.hasStoryline, Literal(row["storyline"])))
    g.add((movie_uri, EX.hasRottenRating, Literal(row["rotten_rating"])))
    g.add((movie_uri, EX.hasRottenVotes, Literal(row["rotten_votes"])))
    g.add((movie_uri, EX.hasRottenLink, Literal(row["rotten_link"])))
    g.add((movie_uri, EX.hasPoster, Literal(row["poster"])))
    g.add((movie_uri, EX.hasYear, Literal(row["year"])))

    for genre in row["genre"].split(","):
        genre_uri = EX["genre/" + genre.strip().replace(" ", "_")]
        g.add((movie_uri, EX.hasGenre, genre_uri))
    
    for person_role, prop in [("cast_name", EX.hasCast), ("director_name", EX.hasDirector), ("writer_name", EX.hasWriter)]:
        for person in row[person_role].split(","):
            person_uri = EX["person/" + person.strip().replace(" ", "_")]
            g.add((movie_uri, prop, person_uri))

g.serialize("result-rdf/movies.ttl", format="turtle")

<Graph identifier=N113568bcd70840f19230224edce6e9ba (<class 'rdflib.graph.Graph'>)>