In [1]:
! pip install SPARQLWrapper



In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

### Fact Table - Movie

In [32]:
# Configure the SPARQL endpoint
sparql = SPARQLWrapper("http://localhost:7200/repositories/movies")
sparql.setQuery("""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX schema: <http://schema.org/>
PREFIX imdb: <http://academy.ontotext.com/imdb/>

SELECT ?movie ?title ?genre ?commentCount ?countryName ?directorName ?leadActorName ?actorName
WHERE {
    # Select movies of type imdb:ColorMovie (or schema:Movie to include all movies)
    ?movie rdf:type imdb:ColorMovie .  
    
    # Retrieve the title of the movie
    ?movie schema:name ?title .
    
    # Retrieve genres for the movie as a list of literals
    OPTIONAL {
        ?movie schema:genre ?genreLiteral .
        # Bind genreLiteral to genre if it is a literal, otherwise set as "Unknown"
        BIND(IF(isLiteral(?genreLiteral), ?genreLiteral, "Unknown") AS ?genre)
    }
    
    # Retrieve the comment count for the movie
    OPTIONAL {
        ?movie schema:commentCount ?commentCount .
    }
    
    # Retrieve the country of origin for the movie
    OPTIONAL {
        ?movie schema:countryOfOrigin ?country .
        ?country schema:name ?countryName .
    }
    
    # Retrieve the director of the movie
    OPTIONAL {
        ?movie schema:director ?director .
        ?director schema:name ?directorName .
    }
    
    # Retrieve the lead actor for the movie
    OPTIONAL {
        ?movie imdb:leadActor ?leadActor .
        ?leadActor schema:name ?leadActorName .
    }
    
    # Retrieve all other actors for the movie
    OPTIONAL {
        ?movie schema:actor ?actor .
        ?actor schema:name ?actorName .
    }
}
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [33]:
# Extract the results into a list of dictionaries
data = []
for result in results["results"]["bindings"]:
    row = {var: result[var]["value"] for var in result}
    data.append(row)

# Convert the list of dictionaries into a DataFrame
df = pd.DataFrame(data)

In [35]:
# Save to a CSV file
df.to_csv('Movies (Fact Table).csv', index=False)