# Implementation of the first model: time-weighted feedback model

Let's suppose we have a Matrix C such that each value r_ij is the rating of item j by user i (i.e. if user i clicked / viewed / bought the item j)

Let's suppose we have a Matrix T such that each value t_ij is the time of item j by user i (i.e. the last time that user i did the action r_ij on the item j)

n: number of closest items to a specific item to compute

K: number of clusters

l: number of items to recommend

### Ratings: 

r_ij = 0 iff user i never bought nor viewed nor clicked the item j

r_ij = 1 iff user i clicked the item j

r_ij = 2 iff user i viewed the item j

r_ij = 3 iff user i bought the item j

In [1]:
import numpy as np
import pandas as pd

## Variables

In [2]:
df_ratings = pd.DataFrame(data = {'item0': [0, 2, 1],  'item1': [0, 3, 1], 'item2': [1, 0, 2],  'item3': [2, 3, 0]}, index=['user0', 'user1', 'user2'])
df_ratings

Unnamed: 0,item0,item1,item2,item3
user0,0,0,1,2
user1,2,3,0,3
user2,1,1,2,0


In [3]:
df_time = pd.DataFrame(data = {'item0': [658, 2350, 15232],  'item1': [735, 3606, 14787], 'item2': [500, 4000, 15986],  'item3': [800, 2006, 14642]}, index=['user0', 'user1', 'user2'])
df_time

Unnamed: 0,item0,item1,item2,item3
user0,658,735,500,800
user1,2350,3606,4000,2006
user2,15232,14787,15986,14642


In [4]:
C = df_ratings.values
T = df_time.values
n = 2
K = 2
l = 2

In [5]:
C.dtype

dtype('int64')

## Extraction of variables

In [6]:
C = np.load('rating_matrix.npy')
T = np.load('time_matrix.npy')
n = 2
K = 2
l = 2

In [7]:
item_id = pd.read_csv('item_index.csv', header=None)

In [8]:
item_id

Unnamed: 0,0,1
0,40156,0
1,15632,1
2,36956,2
3,34157,3
4,4953,4
...,...,...
2995,2253,2995
2996,2406,2996
2997,2463,2997
2998,2556,2998


In [9]:
int(item_id[item_id[1] == 4][0])

4953

In [10]:
list(item_id[0])

[40156,
 15632,
 36956,
 34157,
 4953,
 14614,
 32394,
 32769,
 31013,
 14889,
 9186,
 14179,
 4099,
 14784,
 6576,
 43548,
 49272,
 1049,
 7017,
 6445,
 1364,
 2843,
 6909,
 375755,
 6249,
 10858,
 75457,
 332,
 8758,
 11986,
 13831,
 30340,
 32757,
 67334,
 35015,
 36462,
 375069,
 4453,
 6327,
 7101,
 60992,
 9654,
 10274,
 4954,
 8047,
 11542,
 60638,
 131151,
 5285,
 10573,
 14515,
 25769,
 35889,
 67792,
 103581,
 12970,
 30711,
 34952,
 36121,
 36351,
 82984,
 128733,
 129045,
 7771,
 15394,
 30440,
 32138,
 64884,
 3511,
 7258,
 33073,
 35383,
 81684,
 2905,
 6828,
 14520,
 35311,
 36083,
 36780,
 73719,
 76663,
 88855,
 89764,
 381164,
 594,
 768,
 918,
 7107,
 8960,
 14096,
 53294,
 68131,
 79141,
 2866,
 3113,
 7096,
 7492,
 22046,
 34797,
 34972,
 35378,
 35734,
 49920,
 69276,
 70368,
 181750,
 1255,
 1702,
 2322,
 2453,
 5275,
 5400,
 7268,
 7589,
 9582,
 30720,
 35106,
 36992,
 74782,
 90068,
 90884,
 124306,
 126492,
 45,
 488,
 1189,
 1253,
 2633,
 4523,
 6961,
 9460,


## Functions

### Compute item similarity

In [11]:
from scipy import spatial, stats

In [25]:
def cosine_sim(item1, item2, j):
    '''
    Compute cosine similarity between item1 and item2
    '''
    return 1 - spatial.distance.cosine(item1, item2), int(item_id[item_id[1] == j][0])

In [13]:
def pearson(item1, item2):
    '''
    Compute pearson correlation coefficient between item1 and item2
    '''
    sim, _ = stats.pearsonr(item1, item2)
    return sim

In [14]:
def prob(item1, item2):
    '''
    Compute conditional probability-based similarity between item1 and item2
    Probability of purchasing item2 given that item1 has been purchased
    '''
    freq1_2 = len([i for i, j in zip(item1, item2) if i == 3 and j == 3])
    freq1 = list(item1).count(3)
    if freq1 == 0:
        return 0
    return freq1_2 / freq1

In [34]:
def compute_item_similarity(C,n,app='cos'):
    '''
    Compute n closest items for each item in C
    '''
    closest = {}
    for i, item1 in enumerate(C.T):
        sim, sim_id = [], []
        if app=='pearson':
            sim = [pearson(item1,item2) if i!=j else -1 for j, item2 in enumerate(C.T)]
        elif app=='prob':
            sim = [prob(item1,item2) if i!=j else -1 for j, item2 in enumerate(C.T)]
        else:
            sim = [cosine_sim(item1,item2,j) if i!=j else (-1,j) for j, item2 in enumerate(C.T)]
        closest[int(item_id[item_id[1] == i][0])] = [t2 for t1, t2 in sorted(sim, key=lambda x: x[0])[:n]]
        #closest[df_ratings.columns[i]] = [x for _, x in sorted(zip(sim, range(len(df_ratings.columns))), reverse=True)][:n]
    return closest

In [36]:
M = compute_item_similarity(C,n,'cos')
M

KeyboardInterrupt: 

In [None]:
#compute_item_similarity(C,n,'pearson')

In [17]:
#compute_item_similarity(C,n,'prob')

### Learning parameters

In [17]:
import math

In [18]:
def f(t, l):
    try:
        return math.exp(-l*t)
    except:
        return -1 

In [19]:
def pred_time(user, item1, i, j, t):
    '''
    Predict opinion of user i on item j
    '''
    s1, s2 = 0, 0
    for c in M['item'+ str(j)]:
        p = C[i][c] # pred(user, item2, i, c, t)
        q = cosine_sim(item1, C.T[c])*f(T[i][c], 1/t)
        s1 += p*q
        s2 += q
    return s1 / s2

In [20]:
def pred(user, item1, i, j):
    '''
    Predict opinion of user i on item j
    '''
    s1, s2 = 0, 0
    for c in M['item'+ str(j)]:
        p = C[i][c] # pred(user, item2, i, c, t)
        q = cosine_sim(item1, C.T[c])
        s1 += p*q
        s2 += q
    return s1 / s2

In [23]:
M[0]

KeyError: 0

In [39]:
def find_T02(user, i):
    s, m, t_0 = 0, 0, 100
    for j, item in enumerate(C.T):
        pred_ij = pred(user, item, i, j, t_0)
        q_ij = C[i][j]
        s += abs(pred_ij - q_ij) / len(C.T)
    return t_0

In [40]:
def find_T0(user, i):
    s, m, t_0 = 0, 564124554, 1
    for t in range(-1000, 1000):
        if t == 0:
            continue
        s = 0
        for j, item in enumerate(C.T):
            pred_ij = pred(user, item, i, j, t)
            q_ij = C[i][j]
            s += abs(pred_ij - q_ij) / len(C.T)
        if s < m:
            m = s
            t_0 = t
    return t_0

In [41]:
def learning_parameters(C,M,T,n,K,l):
    '''
    Search T0 for each user such that the error is minimum
    '''
    parameters = {}
    for i, user in enumerate(C):
        print(i)
        parameters[df_ratings.index[i]] = find_T0(user, i)
    return parameters

In [33]:
learning_parameters(C,M,T,n,K,l)

0
1


  return s1 / s2


2


{'user0': 1, 'user1': -3, 'user2': -22}

In [44]:
def n_max(l, n):
    maximums = []
    for i in range(n):
        m = max(l)
        maximums.append(m)
        l.remove(m)
    return maximums

In [52]:
def predict_items(C, n, l):
    M = compute_item_similarity(C,n,'cos')
    predictions = {}
    for i, user in enumerate(C):
        pred_i = []
        for j, item in enumerate(C.T):
            pred_i.append(pred(user, item, i, j))
        predictions[df_ratings.index[i]] = [x for _, x in sorted(zip(pred_i, range(len(df_ratings.columns))), reverse=True)][:n]
    return predictions

In [53]:
predict_items(C, n, l)

{'user0': [1, 0], 'user1': [0, 3], 'user2': [3, 2]}