In [17]:
import io
from collections import defaultdict
from surprise import KNNBaseline
from surprise import Dataset
from surprise import get_dataset_dir

input_user="5"
#input("Enter User ID:")


def get_items():
    file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
    rid_to_name = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('|')
            rid_to_name[line[0]] = (line[1], line[2])
    return rid_to_name


data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()
algo = KNNBaseline(k=4,sim_options={'name':'cosine','user_base':True,'min_support':5})
algo.fit(trainset)

testset = trainset.build_anti_testset()
testset = filter(lambda x: x[0] == input_user, testset)
predictions = algo.test(testset)

top_n = defaultdict(list)
for uid, iid, _, est, _ in predictions:
    top_n[uid].append((iid, round(est,3)))

for uid, user_ratings in top_n.items():
    user_ratings.sort(key=lambda x: x[1], reverse=True)
    top_n[uid] = user_ratings[:5]
    
item_map=get_items()

        
# Print the recommended items for each user
arr=[]
for movie_rid, rating in top_n[input_user]:
    arr.append(item_map[movie_rid][0])
    print('{:4s} {:<60s} {}'.format(movie_rid, str(item_map[movie_rid]), rating))
arr

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
1463 ('Boys, Les (1997)', '01-Jan-1997')                          5
1536 ('Aiqing wansui (1994)', '22-Jul-1996')                      4.94
814  ('Great Day in Harlem, A (1994)', '01-Jan-1994')             4.889
1599 ("Someone Else's America (1995)", '10-May-1996')             4.615
1500 ('Santa with Muscles (1996)', '08-Nov-1996')                 4.613


['Boys, Les (1997)',
 'Aiqing wansui (1994)',
 'Great Day in Harlem, A (1994)',
 "Someone Else's America (1995)",
 'Santa with Muscles (1996)']

In [18]:
#Formating strings for search in wikidata
for k,_ in enumerate(arr):
    arr[k]=arr[k][:-6]
    if(',' in arr[k]):
        temp=arr[k].split(',')
        arr[k]=temp[1]+temp[0]
    arr[k]=arr[k].strip()
arr

['Les Boys',
 'Aiqing wansui',
 'A Great Day in Harlem',
 "Someone Else's America",
 'Santa with Muscles']

In [19]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import requests

API_ENDPOINT = "https://www.wikidata.org/w/api.php"

films=[]

for query in arr:
    params = {
        'action' : 'wbsearchentities',
        'format' : 'json',
        'language' : 'en',
        'search': query
    }
    res = requests.get(API_ENDPOINT, params = params)
    if res.json()['search']:
        for item in res.json()['search']:
            if('film' in item['description']):
                films.append((query,item['id']))
                break
films

[('Les Boys', 'Q3231134'),
 ('A Great Day in Harlem', 'Q4657171'),
 ("Someone Else's America", 'Q7219297'),
 ('Santa with Muscles', 'Q1631700')]

In [20]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
final=[]
for item in films:
    spaqrql_query = """
SELECT DISTINCT ?film ?filmLabel (GROUP_CONCAT(DISTINCT ?genreLabel;separator=", ") as ?genreLabel_shown)
(SAMPLE(?year) as ?year_shown)
WHERE 
{                          
  ?film wdt:P31 wd:Q11424.
  wd:"""+item[1]+""" wdt:P136 ?genre.
  wd:"""+item[1]+""" wdt:P577 ?date.
  ?film wdt:P577 ?year.
  ?film wdt:P136 ?genre.
  FILTER(YEAR(?year)=YEAR(?date))
  FILTER(?film != wd:"""+item[1]+""")
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
                         ?genre rdfs:label ?genreLabel.
                         ?film rdfs:label ?filmLabel.}
}
GROUP BY ?film ?filmLabel
"""
    sparql.setQuery(spaqrql_query)
    sparql.setReturnFormat(JSON)
    results=(sparql.query().convert())
    final.append((sparql.query().convert()))

In [22]:
output=[]
for item in final:
    output.append(pd.io.json.json_normalize(item['results']['bindings']))

for k,_ in enumerate(films):
    print("Type number",k,"to get movies for film:",films[k][0])
i=int(input("Your Number:"))
output[i][['film.value', 'filmLabel.value','genreLabel_shown.value','year_shown.value']].head()

Type number 0 to get movies for film: Les Boys
Type number 1 to get movies for film: A Great Day in Harlem
Type number 2 to get movies for film: Someone Else's America
Type number 3 to get movies for film: Santa with Muscles


Your Number: 2


Unnamed: 0,film.value,filmLabel.value,genreLabel_shown.value,year_shown.value
0,http://www.wikidata.org/entity/Q2309994,Empire Records,tragicomedy,1995-01-01T00:00:00Z
1,http://www.wikidata.org/entity/Q156516,Underground,tragicomedy,1995-04-01T00:00:00Z
2,http://www.wikidata.org/entity/Q7598722,Q7598722,tragicomedy,1995-01-01T00:00:00Z
