In [1]:
import json
import pandas
import pathlib
import pydash
import requests

def value_extract(row, column):

    ''' Extract dictionary values. '''
    
    return pydash.get(row[column], 'value')

def sparql_query(query, service):

    ''' Send sparql request, and formulate results into a dataframe. '''

    response = requests.get(service, params={'format': 'json', 'query': query}, timeout=120)
    results = pydash.get(response.json(), 'results.bindings')
    data_frame = pandas.DataFrame.from_dict(results)
    for column in data_frame.columns:
        data_frame[column] = data_frame.apply(value_extract, column=column, axis=1)
    
    return data_frame

tmdb_query = '''
    select ?tmdb_id ?wikidata_id ?wikidata_idLabel where {
        ?wikidata_id wdt:P4947 ?tmdb_id .
        service wikibase:label { bd:serviceParam wikibase:language "en". }
        }'''

tmdb_dataframe = sparql_query(tmdb_query, 'https://query.wikidata.org/sparql')

constellation_path = pathlib.Path.cwd().parents[0] / 'acmi-api' / 'app' / 'json' / 'constellations'
constellation_jsons = sorted([x for x in constellation_path.iterdir() if x.suffix == '.json'])

external_sources = list()
for c in constellation_jsons:
    with open(c) as constellation_data:
        constellation_data = json.load(constellation_data)
    external_sources.append(pydash.get(constellation_data, 'key_work.external_references'))
    if 'links' in constellation_data:
        for link in constellation_data['links']:
            external_sources.append(pydash.get(link, 'start.external_references'))
            external_sources.append(pydash.get(link, 'end.external_references'))
external_sources = pydash.flatten(pydash.uniq(external_sources))
external_sources = [x['source_identifier'] for x in external_sources if pydash.get(x, 'source.name') == 'TMDB-Movie']

constellation_dataframe = pandas.DataFrame(external_sources, columns=['tmdb_id'])
constellation_dataframe = pandas.merge(constellation_dataframe, tmdb_dataframe, on='tmdb_id', how='left')

print(len(constellation_dataframe))
constellation_dataframe.head()

827


Unnamed: 0,tmdb_id,wikidata_id,wikidata_idLabel
0,39780,http://www.wikidata.org/entity/Q2033052,Oscar and Lucinda
1,47340,http://www.wikidata.org/entity/Q5245167,Dead End Drive-In
2,302401,http://www.wikidata.org/entity/Q19315473,Snowden
3,293310,http://www.wikidata.org/entity/Q18218448,Citizenfour
4,501395,http://www.wikidata.org/entity/Q56274013,True History of the Kelly Gang
