In [15]:
# Este modulo se encarga de realizar la consulta para nuestro dominio (peliculas)
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON

In [16]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")


ConsultaPeliculas = '''
SELECT ?television_film ?television_filmLabel ?genreLabel ?publication_date ?country_of_originLabel ?directorLabel ?duration WHERE {
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  ?television_film wdt:P31 wd:Q11424.
  OPTIONAL {?television_film wdt:P136 ?genre.}
  OPTIONAL { ?television_film wdt:P577 ?publication_date. }
  OPTIONAL { ?television_film wdt:P495 ?country_of_origin. }
  OPTIONAL { ?television_film wdt:P57 ?director. }
  OPTIONAL { ?television_film wdt:P2047 ?duration. }
}
LIMIT 1000
'''
sparql.setQuery(ConsultaPeliculas)
sparql.setReturnFormat(JSON)

# Realizamos la consulta (query()) y la convertimos en JSON (convert())
# El valor de "results" es un diccionario Python basado en el formato JSON
results = sparql.query().convert()

In [17]:
# Se va a realizar un dataset con base a esta consulta
Uri = [result["television_film"]["value"]
       for result in results["results"]["bindings"]]
Titulo = [result["television_filmLabel"]["value"]
          for result in results["results"]["bindings"]]
Genero = [result["genreLabel"]["value"] if "genreLabel" in result else float('Nan')
          for result in results["results"]["bindings"]]
FechaLanzamiento = [result["publication_date"]["value"] if "publication_date" in result else float('Nan')
                    for result in results["results"]["bindings"]]
PaisLanzamiento = [result["country_of_originLabel"]["value"] if "country_of_originLabel" in result else float('Nan')
                   for result in results["results"]["bindings"]]
Director = [result["directorLabel"]["value"] if "directorLabel" in result else float('Nan')
            for result in results["results"]["bindings"]]
Duracion = [result["duration"]["value"] if "duration" in result else float('Nan')
            for result in results["results"]["bindings"]]

In [18]:
data = {'Uri': Uri, 'Titulo': Titulo, 'Género': Genero, 'Fecha de lanzamiento': FechaLanzamiento,
        'País': PaisLanzamiento, 'Director': Director, 'Duración en minutos': Duracion}
Peliculas = pd.DataFrame(data)


In [19]:
Peliculas

Unnamed: 0,Uri,Titulo,Género,Fecha de lanzamiento,País,Director,Duración en minutos
0,http://www.wikidata.org/entity/Q47296,Family Plot,comedy thriller,1976-08-16T00:00:00Z,United States of America,Alfred Hitchcock,115
1,http://www.wikidata.org/entity/Q47296,Family Plot,film based on a novel,1976-08-16T00:00:00Z,United States of America,Alfred Hitchcock,115
2,http://www.wikidata.org/entity/Q47296,Family Plot,comedy film,1976-08-19T00:00:00Z,United States of America,Alfred Hitchcock,115
3,http://www.wikidata.org/entity/Q47296,Family Plot,crime film,1976-08-19T00:00:00Z,United States of America,Alfred Hitchcock,115
4,http://www.wikidata.org/entity/Q47296,Family Plot,mystery film,1976-08-19T00:00:00Z,United States of America,Alfred Hitchcock,115
...,...,...,...,...,...,...,...
995,http://www.wikidata.org/entity/Q59721,Russian Ark,historical film,2002-05-22T00:00:00Z,Canada,Alexander Sokurov,99
996,http://www.wikidata.org/entity/Q59721,Russian Ark,drama film,2002-11-08T00:00:00Z,Canada,Alexander Sokurov,99
997,http://www.wikidata.org/entity/Q59721,Russian Ark,fantasy film,2002-11-08T00:00:00Z,Canada,Alexander Sokurov,99
998,http://www.wikidata.org/entity/Q59721,Russian Ark,mystery film,2002-11-08T00:00:00Z,Canada,Alexander Sokurov,99


In [20]:
# Se eliminan las filas con valores nulos
Peliculas.dropna(axis=0,inplace=True)

In [21]:
# Vamos a realizar unas modificaciones
PeliculasModificadas = Peliculas.copy()


In [22]:
PeliculasModificadas.head()

Unnamed: 0,Uri,Titulo,Género,Fecha de lanzamiento,País,Director,Duración en minutos
0,http://www.wikidata.org/entity/Q47296,Family Plot,comedy thriller,1976-08-16T00:00:00Z,United States of America,Alfred Hitchcock,115
1,http://www.wikidata.org/entity/Q47296,Family Plot,film based on a novel,1976-08-16T00:00:00Z,United States of America,Alfred Hitchcock,115
2,http://www.wikidata.org/entity/Q47296,Family Plot,comedy film,1976-08-19T00:00:00Z,United States of America,Alfred Hitchcock,115
3,http://www.wikidata.org/entity/Q47296,Family Plot,crime film,1976-08-19T00:00:00Z,United States of America,Alfred Hitchcock,115
4,http://www.wikidata.org/entity/Q47296,Family Plot,mystery film,1976-08-19T00:00:00Z,United States of America,Alfred Hitchcock,115


In [23]:
PeliculasModificadas.set_index(["Uri","Titulo","Fecha de lanzamiento","Duración en minutos"],inplace=True)

In [24]:
PeliculasModificadas = pd.get_dummies(PeliculasModificadas[["Género","País","Director"]])
PeliculasModificadas

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Género_Christmas film,Género_Huis clos,Género_LGBT-related film,Género_Peplum film genre,Género_Revisionist Western,Género_Slavic fantasy,Género_Western film,Género_action film,Género_action thriller,Género_adventure film,...,Director_Terry Gilliam,Director_Terry Jones,Director_Timur Bekmambetov,Director_Tom DiCillo,Director_Tony Bui,Director_Tony Scott,Director_Victor Fleming,Director_Vincente Minnelli,Director_Volker Schlöndorff,Director_Éric Toledano
Uri,Titulo,Fecha de lanzamiento,Duración en minutos,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
http://www.wikidata.org/entity/Q47296,Family Plot,1976-08-16T00:00:00Z,115,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q47296,Family Plot,1976-08-16T00:00:00Z,115,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q47296,Family Plot,1976-08-19T00:00:00Z,115,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q47296,Family Plot,1976-08-19T00:00:00Z,115,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q47296,Family Plot,1976-08-19T00:00:00Z,115,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
http://www.wikidata.org/entity/Q59721,Russian Ark,2002-05-22T00:00:00Z,99,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q59721,Russian Ark,2002-11-08T00:00:00Z,99,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q59721,Russian Ark,2002-11-08T00:00:00Z,99,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q59721,Russian Ark,2002-11-08T00:00:00Z,99,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
df_a = PeliculasModificadas.groupby(level = 0).max()
df_a

Unnamed: 0_level_0,Género_Christmas film,Género_Huis clos,Género_LGBT-related film,Género_Peplum film genre,Género_Revisionist Western,Género_Slavic fantasy,Género_Western film,Género_action film,Género_action thriller,Género_adventure film,...,Director_Terry Gilliam,Director_Terry Jones,Director_Timur Bekmambetov,Director_Tom DiCillo,Director_Tony Bui,Director_Tony Scott,Director_Victor Fleming,Director_Vincente Minnelli,Director_Volker Schlöndorff,Director_Éric Toledano
Uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
http://www.wikidata.org/entity/Q11593,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q11618,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q1365,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q2201,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q2345,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
http://www.wikidata.org/entity/Q59721,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q7216,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q8328,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
http://www.wikidata.org/entity/Q8547,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
# Ya están las categorías bien realizadas, ahora falta acoplar los otros valores
# 1. reset index
Modificacion = df_a.reset_index(level=0)

In [27]:
# 2. Necesitamos: "Titulo","Fecha de lanzamiento","Duración en minutos"
# 2.1 Titulo
def getTitulo(row):
    return Peliculas[Peliculas["Uri"] == row["Uri"]].iloc[0]['Titulo']
Modificacion["Titulo"] = Modificacion.apply(lambda row: getTitulo(row),axis=1)
# 2.2 Fecha de lanzamiento
def getFecha(row):
    return Peliculas[Peliculas["Uri"] == row["Uri"]].iloc[0]['Fecha de lanzamiento']
Modificacion["Fecha de lanzamiento"] = Modificacion.apply(lambda row: getFecha(row),axis=1)
# 2.3 Duración en minutos
def getMinutos(row):
    return Peliculas[Peliculas["Uri"] == row["Uri"]].iloc[0]['Duración en minutos']
Modificacion["Duración en minutos"] = Modificacion.apply(lambda row: getMinutos(row),axis=1)

In [28]:
# 3. Cambiar el formato de la fecha a sólo años
Modificacion["Fecha de lanzamiento"] = pd.to_datetime(Modificacion["Fecha de lanzamiento"])
Modificacion["Fecha de lanzamiento"] = pd.DatetimeIndex(Modificacion["Fecha de lanzamiento"]).year

In [29]:
# 4. Relocalizar las variables continuas al principio de la tabla (para que queden separados los datos en numericos y categoricos)
for variables in ["Duración en minutos","Fecha de lanzamiento","Titulo"]:
    columna = Modificacion.pop(variables)
    Modificacion.insert(1, variables, columna)

In [30]:
Modificacion

Unnamed: 0,Uri,Titulo,Fecha de lanzamiento,Duración en minutos,Género_Christmas film,Género_Huis clos,Género_LGBT-related film,Género_Peplum film genre,Género_Revisionist Western,Género_Slavic fantasy,...,Director_Terry Gilliam,Director_Terry Jones,Director_Timur Bekmambetov,Director_Tom DiCillo,Director_Tony Bui,Director_Tony Scott,Director_Victor Fleming,Director_Vincente Minnelli,Director_Volker Schlöndorff,Director_Éric Toledano
0,http://www.wikidata.org/entity/Q11593,The Last Airbender,2010,99,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,http://www.wikidata.org/entity/Q11618,Eraserhead,1977,89,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,http://www.wikidata.org/entity/Q1365,Swept Away,1974,114,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,http://www.wikidata.org/entity/Q2201,Kick-Ass,2010,117,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,http://www.wikidata.org/entity/Q2345,12 Angry Men,1957,95,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,http://www.wikidata.org/entity/Q59721,Russian Ark,2002,99,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
103,http://www.wikidata.org/entity/Q7216,Pirates II: Stagnetti's Revenge,2008,138,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
104,http://www.wikidata.org/entity/Q8328,Gotcha!,1985,101,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
105,http://www.wikidata.org/entity/Q8547,Special Effects: Anything Can Happen,1996,40,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
Modificacion.to_csv('Consulta.csv',index=False)