# Incremental Collaborative Filtering (ICR) - Algorithm experiments

Movielens 100k data set: https://maxhalford.github.io/files/datasets/ml_100k.zip

In [1]:
import csv

In [28]:
class myICF():
    def __init__(self):
        self.user_ratings = {} # Dict to store ratings
        self.user_meta = {} # Dict to cache users info on number of ratings and average ratings
        self.user_pair_meta = {} # Dict to cache info on factors calculated for pairs of users (B, C, D, sum of ratings to co-rated items)
   
    def _new_user(self, user, item, rating):
        # initialize new user
        self.user_ratings[user] = {} # initializes user in ratings dict
        self.user_meta[user] = {'q': 0, 'avg.rating': 0} # initializes user in meta dict, assign number of items user has rated and avg rating of user

        # initializes pairs of existing users with new user in user_pair_meta dict
        for u in self.user_meta.keys(): 
            if u == user:
                continue
            self.user_pair_meta[(u, user)] = {'B': 0, 'C': 0, 'D': 0}
            self.user_pair_meta[(u, user)]['sum.co_ratings'] = {u: 0, user: 0, 'n': 0}
            
    def _new_rating(self, user, item, rating):
        # Submission of a new rating
        q = self.user_meta[user]['q'] # gets number of items user has rated
        A_avg_rating = self.user_meta[user]['avg.rating']
        new_avg = round( ( rating/( q+1 ) ) + ( q/( q+1 ) )*A_avg_rating, 2 ) # calculates new avg rating for active user
        delta_avg = new_avg - A_avg_rating # difference of user's previous and current avg rating
        
        for userB in self.user_meta.keys():
            if userB == user:
                continue
                
            B_avg_rating = self.user_meta[userB]['avg.rating']
            
            if item in self.user_ratings[userB].keys():
                # User B has rated the item                
                A_sum_coratings, B_sum_coratings, n_coratings = self._update_get_coratings(user, userB, item, rating, new_rating=True)
                B_rating = self.user_ratings[userB][item]
                
                e = ( rating-new_avg )*( B_rating-B_avg_rating ) - delta_avg*( B_sum_coratings - n_coratings*B_avg_rating )
                f = ( rating-new_avg )**2 + n_coratings*delta_avg**2 - 2*delta_avg*( A_sum_coratings - n_coratings*A_avg_rating )
                g = ( B_rating-B_avg_rating )**2
            else:
                # User B had not rated the item
                A_sum_coratings, B_sum_coratings, n_coratings = self._get_coratings(user, userB)
                e = - delta_avg*( B_sum_coratings - n_coratings*B_avg_rating )
                f = n_coratings*delta_avg**2 - 2*delta_avg*( A_sum_coratings - n_coratings*A_avg_rating )
                g = 0

        self.user_ratings[user][item] = rating # updates rating given by user to item
        self.user_meta[user]['q'] += 1 # updates number of items user has rated
        self.user_meta[user]['avg.rating'] = new_avg # updates avg rating
        
        return new_avg, delta_avg
        
    def _update_rating(self, user, item, rating):
        # Update of an existing rating
        delta_rating = rating - self.user_ratings[user][item] # difference of user's previous and current rating for item
        q = self.user_meta[user]['q'] # gets number of items user has rated
        A_avg_rating = self.user_meta[user]['avg.rating']
        new_avg = round( delta_rating/q + A_avg_rating, 2 ) # calculates new avg rating for active user
        delta_avg = new_avg - A_avg_rating # difference of user's previous and current avg rating
        
        for userB in self.user_meta.keys():
            if userB == user:
                continue
                            
            B_avg_rating = self.user_meta[userB]['avg.rating']
            
            if item in self.user_ratings[userB].keys():
                # User B has rated the item                
                A_sum_coratings, B_sum_coratings, n_coratings = self._update_get_coratings(user, userB, item, rating, new_rating=False)
                B_rating = self.user_ratings[userB][item]
                
                e = delta_rating*( B_rating-B_avg_rating ) - delta_avg*( B_sum_coratings - n_coratings*B_avg_rating )
                # calculus of f is not clear for both cases
                f = delta_rating**2 + 2*delta_rating*( rating-new_avg ) + n_coratings*delta_avg**2 - 2*delta_avg*( A_sum_coratings - n_coratings*A_avg_rating )
                g = 0
            else:
                # User B had not rated the item
                A_sum_coratings, B_sum_coratings, n_coratings = self._get_coratings(user, userB)
                e = - delta_avg*( B_sum_coratings - n_coratings*B_avg_rating )
                # calculus of f is not clear for both cases
                f = n_coratings*delta_avg**2 - 2*delta_avg*( A_sum_coratings - n_coratings*A_avg_rating )
                g = 0
                
        self.user_ratings[user][item] = rating # updates rating given by user to item
        self.user_meta[user]['q'] += 1 # updates number of items user has rated
        self.user_meta[user]['avg.rating'] = new_avg # updates avg rating
        
        return new_avg, delta_avg
        
    def _get_coratings(self, userA, userB):
        if (userB, userA) in self.user_pair_meta.keys():
            key = (userB, userA)
        else:
            key = (userA, userB)
        A_sum_coratings = self.user_pair_meta[key]['sum.co_ratings'][userA]
        B_sum_coratings = self.user_pair_meta[key]['sum.co_ratings'][userB]
        n_coratings = self.user_pair_meta[key]['sum.co_ratings']['n']

        return A_sum_coratings, B_sum_coratings, n_coratings
        
    def _update_get_coratings(self, userA, userB, item, rating, new_rating=True):
        if (userB, userA) in self.user_pair_meta.keys():
            key = (userB, userA)
        else:
            key = (userA, userB)
        if new_rating:            
            self.user_pair_meta[key]['sum.co_ratings'][userA] += rating
            A_sum_coratings = self.user_pair_meta[key]['sum.co_ratings'][userA]
            self.user_pair_meta[key]['sum.co_ratings'][userB] += self.user_ratings[userB][item]
            B_sum_coratings = self.user_pair_meta[key]['sum.co_ratings'][userB]
            self.user_pair_meta[key]['sum.co_ratings']['n'] += 1
            n_coratings = self.user_pair_meta[key]['sum.co_ratings']['n']
        else:
            self.user_pair_meta[key]['sum.co_ratings'][userA] = self.user_pair_meta[key]['sum.co_ratings'][userA] - self.user_ratings[userA][item] + rating
            A_sum_coratings = self.user_pair_meta[key]['sum.co_ratings'][userA]
            B_sum_coratings = self.user_pair_meta[key]['sum.co_ratings'][userB]
            n_coratings = self.user_pair_meta[key]['sum.co_ratings']['n']
                
        return A_sum_coratings, B_sum_coratings, n_coratings
    
    def run(self, user, item, rating):
        if user not in self.user_meta.keys(): # initialize new user
            self._new_user(user, item, rating)
            
        # Submission of a new rating
        if item not in self.user_ratings[user].keys(): 
            new_avg, delta_avg = self._new_rating(user, item, rating)
            
        # Update of an existing rating
        else: 
            new_avg, delta_avg = self._update_rating(user, item, rating)

In [29]:
def stream(filepath, delimiter, max_cases=500):
    with open(filepath, 'r') as csvf:
        #load csv file data using csv library's dictionary reader
        csvReader = csv.DictReader(csvf, delimiter=delimiter)
        n=0
        for row in csvReader:
            if n == max_cases:
                break
            n+=1
            yield row['user'], row['item'], float(row['rating'])

In [30]:
icf = myICF()
for user, item, rating in stream(filepath='ml_100k.csv', delimiter='\t', max_cases=500):    
    icf.run(user, item, rating)

new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new 0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new 0.0 0.0 0
new 0.0 0.0 0
new 0.0 0.0 0
new 0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new 0.0 0.0 0
new 0.0 0.0 0
new 0.0 0.0 0
new 0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new -0.0 0.0 0
new 0.06360000000000009 0.2777000000000002 0.014400000000000026
new -0.0 0.0 0
new -0.003600000000000033 -0.03330000000000029 0
new -0.0 0.0 0
new 0.022800000000000067 0.24130000000000057 0
new 0.0 0.0 0
new 0.0011999999999999754 0.014699999999999692 0
new 0.0 0.0 0
new 0.00719999999999996 0.0923999999999994 0
new 0.0 0.0 0
new 0.012000000000000075 0.17000000000000093 0
new 0.0 0.0 0
new -0.0 0.0 0
new -0.

In [12]:
# user_ratings = {} # Dict to store ratings
# user_meta = {} # Dict to cache users info on number of ratings and average ratings
# user_pair_meta = {} # Dict to cache info on factors calculated for pairs of users (B, C, D, sum of ratings to co-rated items)
# # first_run = True

# # Simulate streaming data by reading user ratings one by one
# for stream in test:
#     user, item, rating = stream['user'], stream['item'], float(stream['rating'])
    
#     # initialize new user
#     if user not in user_meta.keys():
#         user_ratings[user] = {} # initializes user in ratings dict
#         user_meta[user] = {'q': 0, 'avg.rating': 0} # initializes user in meta dict, assign number of items user has rated and avg rating of user
        
#         # initializes pairs of existing users with new user in user_pair_meta dict
#         for u in user_meta.keys(): 
#             if u == user:
#                 continue
#             user_pair_meta[(u, user)] = {'B': 0, 'C': 0, 'D': 0}
#             user_pair_meta[(u, user)]['sum.co_ratings'] = {u: 0, user: 0}
            
#     # if user submits a new rating
#     if item not in user_ratings[user].keys():        
#         q = user_meta[user]['q'] # gets number of items user has rated
#         new_avg = round( ( rating/( q+1 ) ) + ( q/( q+1 ) )*user_meta[user]['avg.rating'], 2 ) # calculates new avg rating for active user
#         delta_avg = new_avg - user_meta[user]['avg.rating'] # difference of user's previous and current avg rating        

#     # if user updates a rating
#     else:
#         print('update')
#         delta_rating = rating - user_ratings[user][item] # difference of user's previous and current rating for item
#         q = user_meta[user]['q'] # gets number of items user has rated
#         new_avg = round( delta_rating/q + user_meta[user]['avg.rating'], 2 ) # calculates new avg rating for active user
#         delta_avg = new_avg - user_meta[user]['avg.rating'] # difference of user's previous and current avg rating        

#     user_meta[user]['avg.rating'] = new_avg # updates avg rating
#     user_ratings[user][item] = rating # updates rating given by user to item
#     user_meta[user]['q'] += 1 # updates number of items user has rated    
    
#     for userB in user_meta.keys():
#         if userB == user:
#             continue
#         for itemB in user_ratings[userB].keys():
#                 if itemB == item:
#                     try:
#                         user_pair_meta[(userB, user)]['sum.co_ratings'][userB] += user_ratings[userB][item]
#                         user_pair_meta[(userB, user)]['sum.co_ratings'][user] += user_ratings[user][item]
#                         print(f'users - A:{user}, B: {userB} | co-rated sums - {user_pair_meta[(userB, user)]["sum.co_ratings"]}')
#                     except:
#                         user_pair_meta[(user, userB)]['sum.co_ratings'][userB] += user_ratings[userB][item]
#                         user_pair_meta[(user, userB)]['sum.co_ratings'][user] += user_ratings[user][item]
#                         print(f'users - A:{user}, B: {userB} | co-rated sums - {user_pair_meta[(user, userB)]["sum.co_ratings"]}')
                

    
