In [47]:
from math import sqrt
from numpy import genfromtxt
import numpy as np
from collections import OrderedDict


In [23]:
def load_data():
    data = genfromtxt('./data/small-dataset.csv', delimiter=',',dtype=None)
    print (data)
    return data

In [5]:
data = load_data()

[(b'us', 7, 6, 7, 4, 5, 4) (b'as', 6, 7, 0, 4, 3, 4)
 (b'uas', 0, 3, 3, 1, 1, 0) (b'usda', 1, 2, 2, 3, 3, 4)
 (b'ufsds', 1, 0, 1, 2, 3, 3)]


In [24]:
def get_rating_user(user):
    return np.array([user[i] for i in range(1,len(user))])

![title](./img/cosine.png)

In [7]:
def user_sim_cosine_sim(person1, person2):
    # computes similarity between two users based on the cosine similarity metric
    
    rating1 = get_rating_user(person1)
    rating2 = get_rating_user(person2)
    
    return np.dot(rating2, rating1) / (sqrt(np.dot(rating1, rating1)) * sqrt(np.dot(rating2, rating2)))

![title](./img/pearson.png)

In [8]:
def user_sim_pearson_corr(person1, person2):
    # computes similarity between two users based on the cosine similarity metric
    
    rating1 = get_rating_user(person1)
    rating2 = get_rating_user(person2)
    
    mean1 = np.mean(rating1)
    mean2 = np.mean(rating2)
    
    normalize1 = rating1 - mean1
    normalize2 = rating2 - mean2
    
    return np.dot(normalize1, normalize2) / (sqrt(np.dot(normalize1, normalize1)) * sqrt(np.dot(normalize2, normalize2)))

In [9]:
def most_similar_users(data, position_person , number_of_users, metric):
    # returns top-K similar users for the given
    
    person_data = data[position_person]
    metricsData =  {i: metric(person_data, data[i]) for i in range(0, len(data)) if i != position_person}
    return sorted(metricsData.items(), key=lambda x: x[1], reverse=True)[:number_of_users]

In [54]:
def user_recommendations(data, person_position, metric):
    # generate recommendations for the given user
    
    results = most_similar_users(data, person_position, 3, metric)
    measuring_person = get_rating_user(data[person_position])
    
    tup_ratings = []
    
    for user_index, user_similarity in results:
        tup_ratings += [(item_index, other_user_rating, user_similarity, user_index) 
                        for item_index, other_user_rating in enumerate(get_rating_user(data[user_index])) 
                        if measuring_person[item_index] == 0 and other_user_rating != 0]
    
    tup_ratings = sorted(tup_ratings, key=lambda x: (x[1],x[3]), reverse=True)
    return tup_ratings , set([x[0] for x in tup_ratings])

In [56]:
user_recommendations(data, 2, user_sim_cosine_sim)

([(0, 7, 0.77662176202868816, 0),
  (0, 6, 0.55777335102271697, 1),
  (5, 4, 0.61379490552342619, 3),
  (5, 4, 0.55777335102271697, 1),
  (5, 4, 0.77662176202868816, 0),
  (0, 1, 0.61379490552342619, 3)],
 {0, 5})

In [57]:
user_recommendations(data, 3, user_sim_cosine_sim)

([], set())

In [58]:
user_recommendations(data, 4, user_sim_cosine_sim)

([(1, 7, 0.63646884652164448, 1),
  (1, 6, 0.72372533430255415, 0),
  (1, 2, 0.9338592095470355, 3)],
 {1})

In [18]:
from sklearn.metrics.pairwise import cosine_similarity

In [22]:
cosine_similarity([getRatingForUser(x) for x in data])

array([[ 1.        ,  0.84444105,  0.77662176,  0.83861511,  0.72372533],
       [ 0.84444105,  1.        ,  0.55777335,  0.77438219,  0.63646885],
       [ 0.77662176,  0.55777335,  1.        ,  0.61379491,  0.36514837],
       [ 0.83861511,  0.77438219,  0.61379491,  1.        ,  0.93385921],
       [ 0.72372533,  0.63646885,  0.36514837,  0.93385921,  1.        ]])