In [1]:
import io
from surprise import Dataset
from surprise import KNNWithMeans
from surprise import get_dataset_dir
from collections import defaultdict
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import requests
import re
from IPython.display import display, HTML

k=4
similarFilms = 5
userID = input("Enter user ID: ")

data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()
simOptions = {'name':'cosine','min_support':similarFilms}

alg=KNNWithMeans(k=k, min_k=k, sim_options = simOptions)
alg.fit(trainset)

testSet = trainset.build_anti_testset()
testSet = filter(lambda x: x[0] == userID, testSet)
predictions = alg.test(testSet)
topN = defaultdict(list)

for uid, iid, _, est, _ in predictions:
    topN[uid].append((iid, round(est, 3)))

for uid, userRnk in topN.items():
    userRnk.sort(key=lambda x: x[1], reverse=True)
    topN[uid] = userRnk[:similarFilms]

file = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
data = {}
rowFormat = "{:^4} {:<95} {:^1}"
movies = []

with open(file, 'r') as f:
    for line in f:
        line = line.split('|')
        data[line[0]] = (line[1], line[2])

print(f'User {userID}:')
for movieID, rating in topN[userID]:
    print(rowFormat.format(movieID,str(data[movieID]),rating))
    movie_title = data[movieID][0]
    year = re.findall('[0-9]{4}', movie_title)
    
    if movie_title.find(", The") != -1:
        last = movie_title.find(", The")
        movie = movie_title[:last]
    else:
        last = movie_title.find(" (")
        movie = movie_title[:last]
    
    movies.append([movie, year[0]])

Enter user ID: 1
Computing the cosine similarity matrix...
Done computing similarity matrix.
User 1:
302  ('L.A. Confidential (1997)', '01-Jan-1997')                                                     4.953
902  ('Big Lebowski, The (1998)', '26-Dec-1997')                                                     4.87
1367 ('Faust (1994)', '01-Jan-1994')                                                                 4.761
516  ('Local Hero (1983)', '01-Jan-1983')                                                            4.76
1449 ('Pather Panchali (1955)', '22-Mar-1996')                                                       4.736


In [2]:
def querySPARQL(sparql, wiki_id, title):
    spaqrql_query = """
        SELECT ?movieLabel
        WHERE 
        {
            {SELECT ?release_year #Finding release year of the recommended film
                WHERE
                {
                  wd:""" + wiki_id + """ wdt:P577 ?release_date.
                  BIND(str(YEAR(?release_date)) AS ?release_year).
                }
            ORDER BY ASC(?release_year) LIMIT 1}
          ?movie wdt:P31 wd:Q11424. #object is a film
          ?movie p:P166 ?awardstatement. #with list of awards
          ?awardstatement ps:P166 ?award. #takes each award to check if it`s Palme D`Or
          ?awardstatement pq:P585 ?date. #takes date of each award to check if it`s receiver in the year of the recommended film`s release
          FILTER(str(YEAR(?date)) = ?release_year && ?award = wd:Q179808) #checks if year is the same and award if Palme D`or
          SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
        }
        """
    sparql.setQuery(spaqrql_query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    if results['results']['bindings']:
        results_df = pd.io.json.json_normalize(results['results']['bindings'])
        print( "Movies that were given Palme d'Or in " + title[1] + " ('" + title[0] + "'):")
        display(HTML(results_df[['movieLabel.value']].to_html()))
        print("\n")
    else:
        print(title[0] + ", " + title[1] + " do not able to find movies with Palme d'Or given in" + title[1])

In [3]:
API_ENDPOINT = "https://www.wikidata.org/w/api.php"
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

for i in range(len(movies)):
    search = 0
    run = True
    while run: 
        params = { 'action' : 'wbsearchentities','format' : 'json','language' : 'en','continue' : search,'search': movies[i][0] }    
        res = requests.get(API_ENDPOINT, params = params)
        res_json = res.json()['search']
        
        if len(res_json):
            for j in range(len(res_json)):            
                if res_json[j]['description'].find(movies[i][1]) != -1:
                    wiki_id = res_json[j]['id']
                    run = False
                    querySPARQL(sparql, wiki_id, movies[i])
                    break
            search +=7
        else:
            print("Could not find movie with Palme d'Or in the same year " + movies[i][0] + ", " + movies[i][1] + " was published.\n")
            run = False

Movies that were given Palme d'Or in 1997 ('L.A. Confidential'):


Unnamed: 0,movieLabel.value
0,Taste of Cherry
1,The Eel




Movies that were given Palme d'Or in 1998 ('Big Lebowski'):


Unnamed: 0,movieLabel.value
0,Eternity and a Day




Movies that were given Palme d'Or in 1994 ('Faust'):


Unnamed: 0,movieLabel.value
0,Pulp Fiction




Movies that were given Palme d'Or in 1983 ('Local Hero'):


Unnamed: 0,movieLabel.value
0,The Ballad of Narayama




Movies that were given Palme d'Or in 1955 ('Pather Panchali'):


Unnamed: 0,movieLabel.value
0,Marty




