In [90]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

In [77]:
df_users = pd.read_csv('ratings_data.csv')
df_movies = pd.read_csv('movies_data.csv')

In [110]:
df_movies.head()

Unnamed: 0.1,Unnamed: 0,Name,MovieID
0,0,#Alive,0.0
1,1,#Remolove: Futsuu no Koi wa Jado,1.0
2,2,&Audition - The Howling -,2.0
3,3,0.1% World,3.0
4,4,1 Litre no Namida,4.0


In [70]:
df_users.head()

Unnamed: 0.1,Unnamed: 0,Movie_name,Rating,User,MovieID,Country,Score,Description,Tag,Genres
0,0,The King's Affection,0.0,0.0,4093.0,South Korea,8.3,"The story is set during the Joseon Dynasty, at...",Cross-Dressing Joseon Dynasty Hidden Identity ...,"Historical, Romance, Drama"
1,1,Journey to the West,0.0,0.0,1827.0,Hong Kong,8.2,There is a fairy stone on Huaguo Mountain whic...,Adapted From A Novel Martial Arts Historical A...,"Comedy, Wuxia, Fantasy"
2,2,Journey to the West,0.0,0.0,1827.0,China,8.1,The story follows the adventures of Sun Wu Kon...,Adapted From A Novel Martial Arts Historical A...,"Wuxia, Fantasy"
3,3,BORDER,0.0,0.0,328.0,Japan,8.1,Ango Ishikawa is a detective with smarts and a...,Detective Male Lead Smart Male Lead Miniseries...,"Thriller, Mystery, Psychological, Supernatu..."
4,4,Bad and Crazy,0.0,0.0,388.0,South Korea,8.5,"Soo Yeol, the competent ‘bad boy’ on the polic...",Injustice Corruption Investigation Eccentric M...,"Action, Thriller, Mystery, Comedy"


In [78]:
v = df_users.MovieID.value_counts()
df_users = df_users[df_users.MovieID.isin(v.index[v.gt(200)])]

In [79]:
v = df_users.User.value_counts()
df_users = df_users[df_users.User.isin(v.index[v.gt(50)])]

In [80]:
df_users.Score.min()

7.6

In [81]:
df_users = df_users[['User','MovieID','Rating']]
df_users.head()

Unnamed: 0,User,MovieID,Rating
54,1.0,4368.0,8.0
55,1.0,2667.0,10.0
56,1.0,1155.0,8.0
57,1.0,1092.0,8.5
58,1.0,4223.0,9.5


In [82]:
df_users.nunique()

User       8275
MovieID    1629
Rating       22
dtype: int64

In [84]:
df_users = df_users.reset_index().pivot_table(index='MovieID',columns='User',values = 'Rating')


In [113]:
df_users.head()

User,1.0,4.0,5.0,6.0,8.0,10.0,12.0,15.0,17.0,18.0,...,15430.0,15434.0,15436.0,15437.0,15440.0,15443.0,15444.0,15445.0,15447.0,15450.0
MovieID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,8.0,7.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.5,6.0,0.0
4.0,9.0,0.0,8.0,0.0,0.0,10.0,0.0,0.0,9.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.5,...,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0
11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17.0,9.0,0.0,0.0,10.0,0.0,0.0,0.0,9.5,7.0,0.0,...,0.0,6.5,0.0,7.0,0.0,0.0,0.0,0.0,10.0,0.0


In [101]:
df_users = df_users.fillna(0)

In [102]:
user_matrix = csr_matrix(df_users.values)

In [104]:
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)

model_knn.fit(user_matrix)

In [115]:
movie_to_idx = {
    movie: i for i, movie in 
    enumerate(list(df_movies.set_index('MovieID').loc[df_users.index].Name))
}

In [177]:
from fuzzywuzzy import fuzz
def fuzzy_matching(mapper, fav_movie, verbose=True):
    """
    return the closest match via fuzzy ratio. If no match found, return None
    
    Parameters
    ----------    
    mapper: dict, map movie title name to index of the movie in data

    fav_movie: str, name of user input movie
    
    verbose: bool, print log if True

    Return
    ------
    index of the closest match
    """
    match_tuple = []
    # get match
    for title, idx in mapper.items():
        ratio = fuzz.ratio(title.lower(), fav_movie.lower())
        if ratio >= 60:
            match_tuple.append((title, idx, ratio))
    # sort
    match_tuple = sorted(match_tuple, key=lambda x: x[2])[::-1]
    if not match_tuple:
        #print('Oops! No match is found')
        return
    return match_tuple[0][1]

In [176]:

def make_recommendation(model_knn, data, mapper, fav_movie, n_recommendations):
    dic = {}
    model_knn.fit(data)
    
    #print('You have input movie:', fav_movie)
    #the function below is a helper function defined to check presence of Movie Name
    idx = fuzzy_matching(mapper, fav_movie, verbose=True)
    
    num_rows, num_cols = data.shape


    if idx != None: 
        if idx <  num_rows :
            distances, indices = model_knn.kneighbors(data[idx], n_neighbors=n_recommendations+1)
            # get list of raw idx of recommendations
            raw_recommends = \
                sorted(list(zip(indices.squeeze().tolist(), distances.squeeze().tolist())), key=lambda x: x[1])[:0:-1]
            # get reverse mapper
            reverse_mapper = {v: k for k, v in mapper.items()}
            # print recommendations
            #print('Recommendations for {}:'.format(fav_movie))
            for i, (idx, dist) in enumerate(raw_recommends):
                if idx in reverse_mapper.keys():
                    #print('{0}: {1}, with distance of {2}'.format(i+1, reverse_mapper[idx], dist))
                    dic[reverse_mapper[idx]] = dist
    
    return dic

In [186]:
def merge_dictionaries(dic1,dic2, rating):
    for key,value in dic2.items():
        if key in dic1.keys():
            dic1[key] = (dic1[key] + (rating * value)/10)
        else:
            dic1[key] = value
    return dic1

In [187]:
def make_recommendations_list(df):

    title_list = list(df.Name)
    recom_dic = {}

    for index,row in df.iterrows():
        dic = make_recommendation(
            model_knn=model_knn,
            data=user_matrix,
            fav_movie=row.Name,
            mapper=movie_to_idx,
            n_recommendations=800)
        
        if dic != None:
            
            recom_dic=merge_dictionaries(recom_dic,dic, row.Rating)

    return sorted(recom_dic.items(),key = lambda x:x[1],reverse=True)




In [190]:
import scrapper
scr= scrapper.Scrapper()


user= 'MellOut'
user_df = scr.get_user_list(user)

recommendations = make_recommendations_list(user_name = user_df)

In [191]:
print(recommendations)

[('Mr. Queen', 20.708962964112715), ('Love Playlist', 20.572838985011817), ('Be My Princess', 20.278992200764232), ('Nodame Cantabile', 20.214859977118877), ('Imawa no Kuni no Alice', 20.203735023451685), ('Dr. Romantic Season 3', 20.187698729468302), ('Pachinko', 20.04255175445608), ('The Psychologist', 20.033560058061468), ('Between Us', 20.01438465487589), ('The Golden Spoon', 19.990729126083473), ('Utsukushii Kare Season 2', 19.90576253638869), ('Love Scenery', 19.90156845711489), ('The Heavenly Idol', 19.88583996472202), ('Kill It', 19.822751306452474), ('Switched', 19.821978210817097), ('Perfume', 19.798301732127335), ('Decision to Leave', 19.774803625596356), ('Fukou-kun wa Kiss Suru Shikanai!', 19.738101355025155), ('Produce X 101', 19.736040191289664), ('Alchemy of Souls', 19.703979618501503), ('School 2013', 19.70225581314087), ('Triangle', 19.698708699165397), ('Money Flower', 19.693878749962177), ('The King in Love', 19.682315433124142), ('Devilish Joy', 19.676656786061685)