In [1]:
## LOADING IN LIBRARIES

from surprise import KNNWithZScore
from surprise import Dataset,Reader

import pandas as pd

In [2]:
## IMPORTING MOVIE LENS DATASET AND PROCESSING INTO SURPRISE DATA FORMAT.

ml_df = pd.read_csv('C:\\Users\\Nithin\\My_Courses\\Top3_Movie_Recommendations_CF\\ml-100k\\u.data',
               header=None,delimiter='\t')
ml_df.columns = ['user_id','item_id','rating','timestamp']

ml_original = ml_df.copy()
ml_df.drop('timestamp',axis=1,inplace=True)
ml_df.head()

Unnamed: 0,user_id,item_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1


In [3]:
## Preparing data in built in Surprise Dataset format.

reader = Reader()

data = Dataset.load_from_df(ml_df,reader)

In [4]:
# Creating train and test sets

train_set = data.build_full_trainset()
test_set = train_set.build_anti_testset()

In [5]:
## Model training and predictions

knn = KNNWithZScore(sim_options={'name':'Cosine',
                                'user_based':False})

knn.train(train_set)

predictions = knn.test(test_set)




Computing the cosine similarity matrix...
Done computing similarity matrix.


In [10]:
## To pull out Top3 recommendations for each user.

from collections import defaultdict

def top_3_recommendations(predictions,n=3):
    
    top_n = defaultdict(list)
    
    for uid,iid,true_rating,est,_ in predictions:
        top_n[uid].append((iid,est))
    
    for uid,user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1],reverse=True)
        top_n[uid] = user_ratings[:n]
    
    return top_n

recommended_items_to_user = top_3_recommendations(predictions)
#recommended_items_to_user

In [11]:
## Parsing u.Item file.

def id_to_movie_map(filename):
    with open(filename,'r') as fo:
        x = fo.read()
    im = {}
    for line in x.splitlines():
        im[eval(line.split('|')[0])] = line.split('|')[1]
    
    return im

mapper = id_to_movie_map('C:\\Users\\Nithin\\My_Courses\\Top3_Movie_Recommendations_CF\\ml-100k\\u.item')

#print(mapper)

In [8]:
# To easily visualize using print method.

##for uid, user_ratings in recommended_items_to_user.items():
##    print(uid, [mapper[iid] for (iid, _) in user_ratings])

In [9]:
## VISUALIZING IN A DATAFRAME

## Mapping Ids to Movie Names.

from collections import OrderedDict
movies = OrderedDict()
for uid,recommendations in recommended_items_to_user.items():
    lst = []
    for i in recommendations:
        lst.append(mapper[i[0]])
    
    movies[uid] = '|'.join(lst)

### CREATING DATAFRAME

Top3_Movie_Recommendations = pd.DataFrame(index=movies.keys(),data=list(movies.values()))
Top3_Movie_Recommendations = Top3_Movie_Recommendations.iloc[:-1]
Top3_Movie_Recommendations.columns = ['Movies']
Top3_Movie_Recommendations.index.rename('User_ID',inplace=True)

for index,movie_no in enumerate(['Movie_No1', 'Movie_No2', 'Movie_No3']):
    Top3_Movie_Recommendations[movie_no] = Top3_Movie_Recommendations.Movies.apply(lambda x:x.split('|')[index])

Top3_Movie_Recommendations.drop('Movies',axis=1)
Top3_Movie_Recommendations.head() 

Unnamed: 0_level_0,Movies,Movie_No1,Movie_No2,Movie_No3
User_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
196,"Prefontaine (1997)|Great Day in Harlem, A (199...",Prefontaine (1997),"Great Day in Harlem, A (1994)",Aiqing wansui (1994)
186,"Prefontaine (1997)|Great Day in Harlem, A (199...",Prefontaine (1997),"Great Day in Harlem, A (1994)","Saint of Fort Washington, The (1993)"
22,Prefontaine (1997)|Santa with Muscles (1996)|G...,Prefontaine (1997),Santa with Muscles (1996),"Great Day in Harlem, A (1994)"
244,Prefontaine (1997)|Santa with Muscles (1996)|G...,Prefontaine (1997),Santa with Muscles (1996),"Great Day in Harlem, A (1994)"
166,"Prefontaine (1997)|Great Day in Harlem, A (199...",Prefontaine (1997),"Great Day in Harlem, A (1994)",Star Kid (1997)
