In [4]:
import findspark
findspark.init()
import pyspark
import sys
from pyspark import SparkConf, SparkContext
from pyspark.mllib.recommendation import ALS, Rating
from ipywidgets import widgets, interact_manual, Layout
from IPython.display import display
from urllib.request import urlopen
import matplotlib.pyplot as plt

# Define function 
def loadAnimeNames():
    animeNames = {}
    with open("anime_cleaned_1.csv", encoding='ascii', errors="ignore") as f:
        for line in f:
            fields = line.split(',')
            animeNames[int(fields[0])] = [fields[1],fields[2]]
    return animeNames

def loadUserNames():
    usernames = {}
    with open("animelists_als.csv", encoding='ascii', errors="ignore") as f:
        for line in f:
            fields = line.split(',')
            usernames[int(fields[0])] = fields[1]
    return usernames

def parseline(line):
    fields = line.split(',')
    userid = fields[0]
    username = fields[1]
    animeid = fields[2]
    score = fields[3]
    return (userid, animeid, score)

# pyspark set-up
conf = SparkConf().setMaster("local[*]").setAppName("AnimeRecommendationsALS")
sc = SparkContext(conf = conf)
sc.setCheckpointDir('checkpoint')

# Build rating object for ALS 
nameDict = loadAnimeNames()
usernameDict = loadUserNames()

lines = sc.textFile("animelists_als.csv")
parsedlines = lines.map(parseline)
ratings = parsedlines.map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2]))).cache()

print("Training model...")
# Build the recommendation model using Alternating Least Squares
rank = 5
numIterations = 20
model = ALS.train(ratings, rank, numIterations)

def RecommendationSystem():

    userID = int(text.value)
    print("\nTop 5 recommendations for ",usernameDict[userID],":\n")
    recommendations = model.recommendProducts(userID, 5)
    for recommendation in recommendations:
        print (nameDict[int(recommendation[1])][0])

    image_list = []
    for user, item, rating in model.recommendProducts(userID, 5):
        image_list.append(urlopen(nameDict[int(item)][1]))

    item_list = []
    for user, item, rating in model.recommendProducts(userID, 5):
        item_list.append((nameDict[int(item)][0]))

    final_list = list(zip(item_list,image_list))

    fig, (ax0,ax1,ax2,ax3,ax4) = plt.subplots(1,5,figsize=(30,30))
    for i, (item,img) in enumerate(final_list):
        f = plt.imread(img,format='jpg')
        eval('ax{}'.format(i)).imshow(f)
        eval('ax{}'.format(i)).set_title(item)
    plt.show()
    
print("DONE!")


Training model...
DONE!


In [5]:
text=widgets.Text()
display("Enter user id:",text)
interact_manual.opts['manual_name'] = 'Run'
bt = interact_manual(RecommendationSystem)

'Enter user id:'

Text(value='')

interactive(children=(Button(description='Run', style=ButtonStyle()), Output()), _dom_classes=('widget-interac…

In [3]:
#sc.stop()

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
nn_predict = pd.read_csv("NeuralNets_pred.csv")

from collections import defaultdict
def get_top_n(predictions, n=5):
    # First map the predictions to each user.
    top_n = defaultdict(list)
#    top_n = {}
    for i in tqdm(range(len(predictions))):
        top_n[predictions["username"].iloc[i]].append((predictions["anime_id"].iloc[i], predictions["score"].iloc[i]))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = [i[0] for i in user_ratings[:n]]

    return top_n

NN_recommend_dict = get_top_n(nn_predict)
NN_recommend_dict

HBox(children=(IntProgress(value=0, max=6249719), HTML(value='')))

In [8]:
for userid, username in usernameDict.items():
    if username == 'karthiga':
        print(userid)

0
