# AML/Movie Recommender system/MDS201803

In [46]:
import numpy as np
import pandas as pd
from math import sqrt

In [27]:
data = pd.read_csv("http://files.grouplens.org/datasets/movielens/ml-100k/u.data", delimiter="\t", header = None)
data.columns = ['user_id','item_id','rating','timestamp']
del data['timestamp']
data.head(7)

Unnamed: 0,user_id,item_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1
5,298,474,4
6,115,265,2


Creating a dictionary of movie critics and their ratings of movies

In [28]:
ratings = {}
for index,row in data.iterrows():
    try:
        ratings[row['user_id']][row['item_id']] = row['rating']
    except Exception:
        ratings[row['user_id']] = {row['item_id']:row['rating']}

Function for measuring similarity between 2 users based on their movie ratings. 
<br>Using Eucleadian distance 

In [42]:
def sim_distance(prefs,user_1,user_2):  # Get the list of shared_items  
    si={}       #stores 1 if the movie is rated by both the users, else stores 0
    for item in prefs[user_1]:    
        if item in prefs[user_2]:       
            si[item]=1
    # if they have no ratings in common, return 0  
    if len(si)==0: 
        return 0
    # Add up the squares of all the differences  
    for item in prefs[user_1]:
        if item in prefs[user_2]:
            sum_of_squares=sum([pow(prefs[user_1][item]-prefs[user_2][item],2)])                      
    return(1/(1+sum_of_squares)) 

Function for measuring similarity between 2 users based on their movie ratings. 
<br>Using Pearson's correlation coefficient

In [47]:
def sim_pearson(prefs,p1,p2):  # Get the list of mutually rated items  
    si={}      #stores 1 if the movie is rated by both the users, else stores 0
    for item in prefs[p1]:    
        if item in prefs[p2]: 
            si[item]=1
    # Find the number of elements  
    n=len(si)
    # if they are no ratings in common, return 0  
    if n==0: 
        return 0
    # Add up all the preferences  
    sum1=sum([prefs[p1][it] for it in si])  
    sum2=sum([prefs[p2][it] for it in si])
    # Sum up the squares  
    sum1Sq=sum([pow(prefs[p1][it],2) for it in si])  
    sum2Sq=sum([pow(prefs[p2][it],2) for it in si])
    # Sum up the products  
    pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
    # Calculate Pearson score  
    num=pSum-(sum1*sum2/n)  
    den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))  
    if den==0: 
        return 0
    r=num/den
    return r

Function which outputs the top matching users of a perticular user

In [69]:
# Number of results and similarity function are optional params. 
def topMatches(prefs,person,n,similarity):  # n = No. of outputs needed
    scores=[(similarity(prefs,person,other),other) for other in prefs if other!=person]
  # Sort the list so the highest scores appear at the top  
    scores.sort()  
    scores.reverse()  
    return scores[0:n]

Function for getting recommendations of top 10 unwatched movies

In [70]:
def getRecommendations(prefs,person,similarity):
    totals={}
    simSums={}
    for other in prefs:
        # don't compare me to myself
        if other==person: continue
        sim=similarity(prefs,person,other)
        # ignore scores of zero or lower
        if sim<=0: continue
        for item in prefs[other]:
            # only score movies I haven't seen yet
            if item not in prefs[person] or prefs[person][item]==0:
                # Similarity * Score
                totals.setdefault(item,0)
                totals[item]+=prefs[other][item]*sim
                # Sum of similarities
                simSums.setdefault(item,0)
                simSums[item]+=sim
    # Create the normalized list
    rankings=[(total/simSums[item],item) for item,total in totals.items( )]
    # Return the sorted list
    rankings.sort( )
    rankings.reverse( )
    return rankings[:10]

In [71]:
getRecommendations(ratings,196,sim_distance)

[(5.0, 1653),
 (5.0, 1599),
 (5.0, 1536),
 (5.0, 1500),
 (5.0, 1467),
 (5.0, 1293),
 (5.0, 1201),
 (5.0, 1189),
 (5.0, 1122),
 (5.0, 814)]