# Incremental Collaborative Filtering (ICR) - Algorithm experiments

Movielens 100k data set: https://maxhalford.github.io/files/datasets/ml_100k.zip

In [1]:
import csv

In [3]:
class myICF():
    def __init__(self):
        self.user_ratings = {} # Dict to store ratings
        self.user_meta = {} # Dict to cache users info on number of ratings and average ratings
        self.user_pair_meta = {} # Dict to cache info on factors calculated for pairs of users (B, C, D, sum of ratings to co-rated items)
   
    def _new_user(self, user, item, rating):
        # initialize new user
        self.user_ratings[user] = {} # initializes user in ratings dict
        self.user_meta[user] = {'q': 0, 'avg.rating': 0} # initializes user in meta dict, assign number of items user has rated and avg rating of user

        # initializes pairs of existing users with new user in user_pair_meta dict
        for u in self.user_meta.keys(): 
            if u == user:
                continue
            self.user_pair_meta[(u, user)] = {'B': 0, 'C': 0, 'D': 0}
            self.user_pair_meta[(u, user)]['sum.co_ratings'] = {u: 0, user: 0}
            
    def _new_rating(self, user, item, rating):
        # Submission of a new rating
        q = self.user_meta[user]['q'] # gets number of items user has rated
        new_avg = round( ( rating/( q+1 ) ) + ( q/( q+1 ) )*self.user_meta[user]['avg.rating'], 2 ) # calculates new avg rating for active user
        delta_avg = new_avg - self.user_meta[user]['avg.rating'] # difference of user's previous and current avg rating
        
#         for userB in self.user_meta.keys():
#             if userB == user:
#                 continue
            
#             if item in self.user_ratings[userB].keys():
#                 # User B has rated the item
#                     e = ( rating*new_avg ) * ( self.user_ratings[userB][item] - self.user_meta[userB]['avg.rating'] ) -\
#                     delta_avg * 
#                     f =
#                     g = 
#             else:
#                 # User B had not rated the item
                        
        
        return new_avg, delta_avg
        
    def _update_rating(self, user, item, rating):
        # Update of an existing rating
        delta_rating = rating - self.user_ratings[user][item] # difference of user's previous and current rating for item
        q = self.user_meta[user]['q'] # gets number of items user has rated
        new_avg = round( delta_rating/q + self.user_meta[user]['avg.rating'], 2 ) # calculates new avg rating for active user
        delta_avg = new_avg - self.user_meta[user]['avg.rating'] # difference of user's previous and current avg rating      
        return new_avg, delta_avg
    
    def _update_coratings(self, user, item):
        for userB in self.user_meta.keys():
            if userB == user:
                continue
            if item in self.user_ratings[userB].keys():
                try:
                    self.user_pair_meta[(userB, user)]['sum.co_ratings'][userB] += self.user_ratings[userB][item]
                    self.user_pair_meta[(userB, user)]['sum.co_ratings'][user] += self.user_ratings[user][item]
                    print(f'users - A:{user}, B: {userB} | co-rated sums - {self.user_pair_meta[(userB, user)]["sum.co_ratings"]}')
                except:
                    self.user_pair_meta[(user, userB)]['sum.co_ratings'][userB] += self.user_ratings[userB][item]
                    self.user_pair_meta[(user, userB)]['sum.co_ratings'][user] += self.user_ratings[user][item]
                    print(f'users - A:{user}, B: {userB} | co-rated sums - {self.user_pair_meta[(user, userB)]["sum.co_ratings"]}')
        
    
    
    def run(self, user, item, rating):
        if user not in self.user_meta.keys(): # initialize new user
            self._new_user(user, item, rating)
            
        # Submission of a new rating
        if item not in self.user_ratings[user].keys(): 
            new_avg, delta_avg = self._new_rating(user, item, rating)
            
        # Update of an existing rating
        else: 
            new_avg, delta_avg = self._update_rating(user, item, rating)
            
        self.user_meta[user]['avg.rating'] = new_avg # updates avg rating
        self.user_ratings[user][item] = rating # updates rating given by user to item
        self.user_meta[user]['q'] += 1 # updates number of items user has rated
        
        self._update_coratings(user, item)

In [4]:
def stream(filepath, delimiter, max_cases=500):
    with open(filepath, 'r') as csvf:
        #load csv file data using csv library's dictionary reader
        csvReader = csv.DictReader(csvf, delimiter=delimiter)
        n=0
        for row in csvReader:
            if n == max_cases:
                break
            n+=1
            yield row['user'], row['item'], float(row['rating'])

In [6]:
icf = myICF()
for user, item, rating in stream(filepath='ml_100k.csv', delimiter='\t', max_cases=500):    
    icf.run(user, item, rating)

users - A:712, B: 259 | co-rated sums - {'259': 4.0, '712': 5.0}
users - A:712, B: 259 | co-rated sums - {'259': 8.0, '712': 10.0}
users - A:851, B: 259 | co-rated sums - {'259': 3.0, '851': 5.0}
users - A:119, B: 259 | co-rated sums - {'259': 4.0, '119': 5.0}
users - A:119, B: 712 | co-rated sums - {'712': 4.0, '119': 5.0}
users - A:119, B: 259 | co-rated sums - {'259': 8.0, '119': 9.0}
users - A:119, B: 851 | co-rated sums - {'851': 4.0, '119': 4.0}
users - A:119, B: 851 | co-rated sums - {'851': 9.0, '119': 9.0}
users - A:119, B: 712 | co-rated sums - {'712': 9.0, '119': 9.0}
users - A:119, B: 259 | co-rated sums - {'259': 12.0, '119': 14.0}
users - A:119, B: 851 | co-rated sums - {'851': 13.0, '119': 12.0}
users - A:119, B: 259 | co-rated sums - {'259': 15.0, '119': 18.0}
users - A:119, B: 851 | co-rated sums - {'851': 18.0, '119': 16.0}
users - A:119, B: 259 | co-rated sums - {'259': 19.0, '119': 21.0}
users - A:119, B: 851 | co-rated sums - {'851': 21.0, '119': 19.0}
users - A:64

In [12]:
# user_ratings = {} # Dict to store ratings
# user_meta = {} # Dict to cache users info on number of ratings and average ratings
# user_pair_meta = {} # Dict to cache info on factors calculated for pairs of users (B, C, D, sum of ratings to co-rated items)
# # first_run = True

# # Simulate streaming data by reading user ratings one by one
# for stream in test:
#     user, item, rating = stream['user'], stream['item'], float(stream['rating'])
    
#     # initialize new user
#     if user not in user_meta.keys():
#         user_ratings[user] = {} # initializes user in ratings dict
#         user_meta[user] = {'q': 0, 'avg.rating': 0} # initializes user in meta dict, assign number of items user has rated and avg rating of user
        
#         # initializes pairs of existing users with new user in user_pair_meta dict
#         for u in user_meta.keys(): 
#             if u == user:
#                 continue
#             user_pair_meta[(u, user)] = {'B': 0, 'C': 0, 'D': 0}
#             user_pair_meta[(u, user)]['sum.co_ratings'] = {u: 0, user: 0}
            
#     # if user submits a new rating
#     if item not in user_ratings[user].keys():        
#         q = user_meta[user]['q'] # gets number of items user has rated
#         new_avg = round( ( rating/( q+1 ) ) + ( q/( q+1 ) )*user_meta[user]['avg.rating'], 2 ) # calculates new avg rating for active user
#         delta_avg = new_avg - user_meta[user]['avg.rating'] # difference of user's previous and current avg rating        

#     # if user updates a rating
#     else:
#         print('update')
#         delta_rating = rating - user_ratings[user][item] # difference of user's previous and current rating for item
#         q = user_meta[user]['q'] # gets number of items user has rated
#         new_avg = round( delta_rating/q + user_meta[user]['avg.rating'], 2 ) # calculates new avg rating for active user
#         delta_avg = new_avg - user_meta[user]['avg.rating'] # difference of user's previous and current avg rating        

#     user_meta[user]['avg.rating'] = new_avg # updates avg rating
#     user_ratings[user][item] = rating # updates rating given by user to item
#     user_meta[user]['q'] += 1 # updates number of items user has rated    
    
#     for userB in user_meta.keys():
#         if userB == user:
#             continue
#         for itemB in user_ratings[userB].keys():
#                 if itemB == item:
#                     try:
#                         user_pair_meta[(userB, user)]['sum.co_ratings'][userB] += user_ratings[userB][item]
#                         user_pair_meta[(userB, user)]['sum.co_ratings'][user] += user_ratings[user][item]
#                         print(f'users - A:{user}, B: {userB} | co-rated sums - {user_pair_meta[(userB, user)]["sum.co_ratings"]}')
#                     except:
#                         user_pair_meta[(user, userB)]['sum.co_ratings'][userB] += user_ratings[userB][item]
#                         user_pair_meta[(user, userB)]['sum.co_ratings'][user] += user_ratings[user][item]
#                         print(f'users - A:{user}, B: {userB} | co-rated sums - {user_pair_meta[(user, userB)]["sum.co_ratings"]}')
                

    
