In [18]:
import import_ipynb
from user_based_cf import *
import pandas as pd
import numpy as np

In [19]:
dsPath = 'ml-latest-small/ratings.csv'
df = pd.read_csv(dsPath)

print(df.head())
print("\nRow num: ", df.shape[0])

   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931

Row num:  100836


In [27]:
users = [1, 2, 5]

items = []
for user in users:
    tmp = getRecommendedItems(df, user)
    items.extend([x[0] for x in tmp])

print(items)

[68954, 903, 3476, 3244, 8641, 3255, 3948, 33493, 314, 7, 3972, 36850, 112515, 54910, 76093, 80126, 122926, 3249, 74789, 3253, 56715, 112454, 1262, 1945, 6874, 32, 1921, 2336, 1961, 2542]


In [30]:
def addUsersPred(df, users, items):
    for user in users:
        for item in items:
            uRating = df[(df['userId'] == user) & (df['movieId'] == item)]['rating']
            
            if uRating.empty:
                newRow = {'userId': user, 'movieId': item, 'rating': basePred(df, user, item),  'timestamp': 0}
                df.loc[len(df)] = newRow
            
addUsersPred(df, users, items)

In [33]:
def getGroupAveragePred(df, items, users, k=10):
    item2pred = []
    for item in items:
        tmp = 0
        for user in users:
            uRating = df[(df['userId'] == user) & (df['movieId'] == item)]['rating']

            if uRating.empty:
                uRating = basePred(df, user, item)
            else:
                uRating = uRating.values[0]
            
            tmp += uRating
            
        item2pred.append((item, tmp/len(users)))

    return sorted(item2pred, key=lambda x: x[1], reverse=True)[:k]

print(getGroupAveragePred(df, items, users))

[(112454, 5.329314613733847), (56715, 5.11767718221981), (3972, 4.537117716356514), (36850, 4.534014779459512), (1945, 4.5163416674705035), (1262, 4.420983459165439), (1921, 4.407946913577528), (2542, 4.370779275621489), (32, 4.350258170495184), (1961, 4.341392606069912)]


In [23]:
def getGroupLeastMiseryPred(df, items, users, k=10):
    item2pred = []
    for item in items:
        min = np.inf
        for user in users:
            uRating = df[(df['userId'] == user) & (df['movieId'] == item)]['rating']

            if uRating.empty:
                uRating = basePred(df, user, item)
            else:
                uRating = uRating.values[0]
                
            if uRating < min:
                min = uRating
            
        item2pred.append((item, min))

    return sorted(item2pred, key=lambda x: x[1], reverse=True)[:k]

print(getGroupLeastMiseryPred(df, items, users))

[(1123, 4.15314887120955), (55820, 4.0623997056210746), (3741, 4.0389594139800105), (1875, 3.962164894182067), (58559, 3.9576849345604836), (1193, 3.9389130081484947), (2324, 3.9277534171701634), (2935, 3.9013178774048307), (7028, 3.8851539270938322), (965, 3.8792055374038625)]


In [24]:
def getUserPred(df, user, items, k=10):
    userPred = []

    for item in items:
        uRating = df[(df['userId'] == user) & (df['movieId'] == item)]['rating']

        if uRating.empty:
            uRating = basePred(df, user, item)
        else:
            uRating = uRating.values[0]
    
        userPred.append(uRating)
    
    return sorted(userPred, key=lambda x: x, reverse=True)[:k]

user = 1
print(getUserPred(df, user, items))        

[5.30070336765371, 5.238236814032719, 5.037678205866246, 4.976355757272191, 4.859246031860111, 4.818156938145612, 4.771934586838677, 4.750960654687634, 4.74253557651834, 4.73058850775497]


In [25]:
def getSatisfaction(df, items, user):
    den = sum(getUserPred(df, user, items, len(items)))
    
    for item in items:
        num = 0
            
        uRating = df[(df['userId'] == user) & (df['movieId'] == item)]['rating']

        if uRating.empty:
            uRating = basePred(df, user, item)
        else:
            uRating = uRating.values[0]
            
        num += uRating
    
    return num / den
        
user = 1
print(getSatisfaction(df, items, user))

0.0335263091404797


In [26]:
def getSequentialRecommendations(df, items, users, k=10):
    items = [x[0] for x in getGroupAveragePred(df, items, users, int(len(items)/2))]
    candidateSet = [items.pop(0)]

    for _ in range(0, k):
        min = np.inf
        bestItem = None

        for item in items:
            satisfaction = 0
            tmp = candidateSet.copy()
            tmp.append(item)
            
            for i in range(0, len(users)):
                for j in range(i+1, len(users)):
                    satisfaction += abs(getSatisfaction(df, tmp, users[i]) - getSatisfaction(df, tmp, users[j]))

            if satisfaction < min:
                min = satisfaction
                bestItem = item

        items.remove(bestItem)
        candidateSet.append(bestItem)
    
    return candidateSet

print(getSequentialRecommendations(df, items, users, k=10))



[70862, 3451, 2935, 2324, 84152, 1193, 7028, 3741, 55820, 965, 1123]
