# Implementation of the first model: time-weighted feedback model

Let's suppose we have a Matrix C such that each value r_ij is the rating of item j by user i (i.e. if user i clicked / viewed / bought the item j)

Let's suppose we have a Matrix T such that each value t_ij is the time of item j by user i (i.e. the last time that user i did the action r_ij on the item j)

n: number of closest items to a specific item to compute

K: number of clusters

l: number of items to recommend

### Ratings: 

r_ij = 0 iff user i never bought nor viewed nor clicked the item j

r_ij = 1 iff user i clicked the item j

r_ij = 2 iff user i viewed the item j

r_ij = 3 iff user i bought the item j

In [25]:
import numpy as np
import pandas as pd

## Variables

In [26]:
df_ratings = pd.DataFrame(data = {'item0': [0, 2, 1],  'item1': [0, 3, 1], 'item2': [1, 0, 2],  'item3': [2, 3, 0]}, index=['user0', 'user1', 'user2'])
df_ratings

Unnamed: 0,item0,item1,item2,item3
user0,0,0,1,2
user1,2,3,0,3
user2,1,1,2,0


In [27]:
df_time = pd.DataFrame(data = {'item0': [658, 2350, 15232],  'item1': [735, 3606, 14787], 'item2': [500, 4000, 15986],  'item3': [800, 2006, 14642]}, index=['user0', 'user1', 'user2'])
df_time

Unnamed: 0,item0,item1,item2,item3
user0,658,735,500,800
user1,2350,3606,4000,2006
user2,15232,14787,15986,14642


In [28]:
C = df_ratings.values
T = df_time.values
n = 2
K = 2
l = 2

In [29]:
C.dtype

dtype('int64')

## Extraction of variables

In [30]:
C = np.load('rating_matrix.npy')
T = np.load('time_matrix.npy')
n = 2
K = 2
l = 2

In [31]:
item_id = pd.read_csv('item_index.csv', header=None)

In [32]:
item_id

Unnamed: 0,0,1
0,40156,0
1,15632,1
2,36956,2
3,34157,3
4,4953,4
...,...,...
2995,2253,2995
2996,2406,2996
2997,2463,2997
2998,2556,2998


In [33]:
int(item_id[item_id[1] == 4][0])

4953

In [34]:
list(item_id[0])

[40156,
 15632,
 36956,
 34157,
 4953,
 14614,
 32394,
 32769,
 31013,
 14889,
 9186,
 14179,
 4099,
 14784,
 6576,
 43548,
 49272,
 1049,
 7017,
 6445,
 1364,
 2843,
 6909,
 375755,
 6249,
 10858,
 75457,
 332,
 8758,
 11986,
 13831,
 30340,
 32757,
 67334,
 35015,
 36462,
 375069,
 4453,
 6327,
 7101,
 60992,
 9654,
 10274,
 4954,
 8047,
 11542,
 60638,
 131151,
 5285,
 10573,
 14515,
 25769,
 35889,
 67792,
 103581,
 12970,
 30711,
 34952,
 36121,
 36351,
 82984,
 128733,
 129045,
 7771,
 15394,
 30440,
 32138,
 64884,
 3511,
 7258,
 33073,
 35383,
 81684,
 2905,
 6828,
 14520,
 35311,
 36083,
 36780,
 73719,
 76663,
 88855,
 89764,
 381164,
 594,
 768,
 918,
 7107,
 8960,
 14096,
 53294,
 68131,
 79141,
 2866,
 3113,
 7096,
 7492,
 22046,
 34797,
 34972,
 35378,
 35734,
 49920,
 69276,
 70368,
 181750,
 1255,
 1702,
 2322,
 2453,
 5275,
 5400,
 7268,
 7589,
 9582,
 30720,
 35106,
 36992,
 74782,
 90068,
 90884,
 124306,
 126492,
 45,
 488,
 1189,
 1253,
 2633,
 4523,
 6961,
 9460,


In [35]:
df = pd.DataFrame(C, columns = list(item_id[0]))

In [38]:
user_id = pd.read_csv('user_index.csv', header=None)

In [39]:
user_id

Unnamed: 0,0,1
0,17732,0
1,4094,1
2,20346,2
3,13255,3
4,24034,4
...,...,...
995,57354,995
996,57538,996
997,57676,997
998,70472,998


In [40]:
df.index = list(user_id[0])

In [41]:
df

Unnamed: 0,40156,15632,36956,34157,4953,14614,32394,32769,31013,14889,...,1904,1932,1942,2034,2250,2253,2406,2463,2556,2666
17732,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4094,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
20346,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
13255,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
24034,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57354,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
57538,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
57676,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
70472,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Functions

### Compute item similarity

In [11]:
from scipy import spatial, stats

In [12]:
def cosine_sim(item1, item2, j):
    '''
    Compute cosine similarity between item1 and item2
    '''
    return 1 - spatial.distance.cosine(item1, item2), int(item_id[item_id[1] == j][0])

In [13]:
def pearson(item1, item2):
    '''
    Compute pearson correlation coefficient between item1 and item2
    '''
    sim, _ = stats.pearsonr(item1, item2)
    return sim

In [14]:
def prob(item1, item2):
    '''
    Compute conditional probability-based similarity between item1 and item2
    Probability of purchasing item2 given that item1 has been purchased
    '''
    freq1_2 = len([i for i, j in zip(item1, item2) if i == 3 and j == 3])
    freq1 = list(item1).count(3)
    if freq1 == 0:
        return 0
    return freq1_2 / freq1

In [24]:
def compute_item_similarity(C,n,app='cos'):
    '''
    Compute n closest items for each item in C
    '''
    closest = {}
    for i, item1 in enumerate(C.T):
        print(f'{i}/{len(C.T)}')
        sim = [cosine_sim(item1,item2,j) if i!=j else (-1,j) for j, item2 in enumerate(C.T)]
        #closest[int(item_id[item_id[1] == i][0])] = [t2 for t1, t2 in sorted(sim, key=lambda x: x[0])[:n]]
        #closest[df.columns[i]] = [x for _, x in sorted(zip(sim, range(len(df.columns))), reverse=True)][:n]
        closest[df.columns[i]] = [x for _, x in sorted(zip(sim, df.columns), reverse=True)][:n]
    return closest

In [25]:
M = compute_item_similarity(C,n,'cos')
M

0/3000
1/3000
2/3000
3/3000
4/3000
5/3000
6/3000
7/3000
8/3000
9/3000
10/3000
11/3000
12/3000
13/3000
14/3000
15/3000
16/3000
17/3000
18/3000
19/3000
20/3000
21/3000
22/3000
23/3000
24/3000
25/3000
26/3000
27/3000
28/3000
29/3000
30/3000
31/3000
32/3000
33/3000
34/3000
35/3000
36/3000
37/3000
38/3000
39/3000
40/3000
41/3000
42/3000
43/3000
44/3000
45/3000
46/3000
47/3000
48/3000
49/3000
50/3000
51/3000
52/3000
53/3000
54/3000
55/3000
56/3000
57/3000
58/3000
59/3000
60/3000
61/3000
62/3000
63/3000
64/3000
65/3000
66/3000
67/3000
68/3000
69/3000
70/3000
71/3000
72/3000
73/3000
74/3000
75/3000
76/3000
77/3000
78/3000
79/3000
80/3000
81/3000
82/3000
83/3000
84/3000
85/3000
86/3000
87/3000
88/3000
89/3000
90/3000
91/3000
92/3000
93/3000
94/3000
95/3000
96/3000
97/3000
98/3000
99/3000
100/3000
101/3000
102/3000
103/3000
104/3000
105/3000
106/3000
107/3000
108/3000
109/3000
110/3000
111/3000
112/3000
113/3000
114/3000
115/3000
116/3000
117/3000
118/3000
119/3000
120/3000
121/3000
122/3000
123

923/3000
924/3000
925/3000
926/3000
927/3000
928/3000
929/3000
930/3000
931/3000
932/3000
933/3000
934/3000
935/3000
936/3000
937/3000
938/3000
939/3000
940/3000
941/3000
942/3000
943/3000
944/3000
945/3000
946/3000
947/3000
948/3000
949/3000
950/3000
951/3000
952/3000
953/3000
954/3000
955/3000
956/3000
957/3000
958/3000
959/3000
960/3000
961/3000
962/3000
963/3000
964/3000
965/3000
966/3000
967/3000
968/3000
969/3000
970/3000
971/3000
972/3000
973/3000
974/3000
975/3000
976/3000
977/3000
978/3000
979/3000
980/3000
981/3000
982/3000
983/3000
984/3000
985/3000
986/3000
987/3000
988/3000
989/3000
990/3000
991/3000
992/3000
993/3000
994/3000
995/3000
996/3000
997/3000
998/3000
999/3000
1000/3000
1001/3000
1002/3000
1003/3000
1004/3000
1005/3000
1006/3000
1007/3000
1008/3000
1009/3000
1010/3000
1011/3000
1012/3000
1013/3000
1014/3000
1015/3000
1016/3000
1017/3000
1018/3000
1019/3000
1020/3000
1021/3000
1022/3000
1023/3000
1024/3000
1025/3000
1026/3000
1027/3000
1028/3000
1029/3000
1030/30

1750/3000
1751/3000
1752/3000
1753/3000
1754/3000
1755/3000
1756/3000
1757/3000
1758/3000
1759/3000
1760/3000
1761/3000
1762/3000
1763/3000
1764/3000
1765/3000
1766/3000
1767/3000
1768/3000
1769/3000
1770/3000
1771/3000
1772/3000
1773/3000
1774/3000
1775/3000
1776/3000
1777/3000
1778/3000
1779/3000
1780/3000
1781/3000
1782/3000
1783/3000
1784/3000
1785/3000
1786/3000
1787/3000
1788/3000
1789/3000
1790/3000
1791/3000
1792/3000
1793/3000
1794/3000
1795/3000
1796/3000
1797/3000
1798/3000
1799/3000
1800/3000
1801/3000
1802/3000
1803/3000
1804/3000
1805/3000
1806/3000
1807/3000
1808/3000
1809/3000
1810/3000
1811/3000
1812/3000
1813/3000
1814/3000
1815/3000
1816/3000
1817/3000
1818/3000
1819/3000
1820/3000
1821/3000
1822/3000
1823/3000
1824/3000
1825/3000
1826/3000
1827/3000
1828/3000
1829/3000
1830/3000
1831/3000
1832/3000
1833/3000
1834/3000
1835/3000
1836/3000
1837/3000
1838/3000
1839/3000
1840/3000
1841/3000
1842/3000
1843/3000
1844/3000
1845/3000
1846/3000
1847/3000
1848/3000
1849/3000


2570/3000
2571/3000
2572/3000
2573/3000
2574/3000
2575/3000
2576/3000
2577/3000
2578/3000
2579/3000
2580/3000
2581/3000
2582/3000
2583/3000
2584/3000
2585/3000
2586/3000
2587/3000
2588/3000
2589/3000
2590/3000
2591/3000
2592/3000
2593/3000
2594/3000
2595/3000
2596/3000
2597/3000
2598/3000
2599/3000
2600/3000
2601/3000
2602/3000
2603/3000
2604/3000
2605/3000
2606/3000
2607/3000
2608/3000
2609/3000
2610/3000
2611/3000
2612/3000
2613/3000
2614/3000
2615/3000
2616/3000
2617/3000
2618/3000
2619/3000
2620/3000
2621/3000
2622/3000
2623/3000
2624/3000
2625/3000
2626/3000
2627/3000
2628/3000
2629/3000
2630/3000
2631/3000
2632/3000
2633/3000
2634/3000
2635/3000
2636/3000
2637/3000
2638/3000
2639/3000
2640/3000
2641/3000
2642/3000
2643/3000
2644/3000
2645/3000
2646/3000
2647/3000
2648/3000
2649/3000
2650/3000
2651/3000
2652/3000
2653/3000
2654/3000
2655/3000
2656/3000
2657/3000
2658/3000
2659/3000
2660/3000
2661/3000
2662/3000
2663/3000
2664/3000
2665/3000
2666/3000
2667/3000
2668/3000
2669/3000


{40156: [118410, 30725],
 15632: [725, 10130],
 36956: [76534, 14784],
 34157: [171245, 67698],
 4953: [67334, 33070],
 14614: [79117, 135708],
 32394: [8758, 8759],
 32769: [2866, 6445],
 31013: [9992, 3093],
 14889: [14890, 31981],
 9186: [16652, 4540],
 14179: [8047, 7025],
 4099: [47534, 9739],
 14784: [42775, 1281],
 6576: [36463, 19906],
 43548: [53307, 135708],
 49272: [35458, 4515],
 1049: [3451, 58958],
 7017: [46871, 7015],
 6445: [5352, 32769],
 1364: [31083, 31579],
 2843: [32756, 10951],
 6909: [375205, 66175],
 375755: [130822, 20484],
 6249: [14000, 128966],
 10858: [36602, 34911],
 75457: [70487, 83667],
 332: [34915, 37940],
 8758: [9867, 32394],
 11986: [41310, 375205],
 13831: [6327, 2863],
 30340: [64100, 38133],
 32757: [10957, 10274],
 67334: [4953, 40588],
 35015: [768, 81684],
 36462: [58181, 68986],
 375069: [7270, 34797],
 4453: [34968, 70797],
 6327: [1055, 13831],
 7101: [12445, 83190],
 60992: [146397, 79582],
 9654: [30792, 36642],
 10274: [3148, 74297],
 

In [26]:
M

{40156: [118410, 30725],
 15632: [725, 10130],
 36956: [76534, 14784],
 34157: [171245, 67698],
 4953: [67334, 33070],
 14614: [79117, 135708],
 32394: [8758, 8759],
 32769: [2866, 6445],
 31013: [9992, 3093],
 14889: [14890, 31981],
 9186: [16652, 4540],
 14179: [8047, 7025],
 4099: [47534, 9739],
 14784: [42775, 1281],
 6576: [36463, 19906],
 43548: [53307, 135708],
 49272: [35458, 4515],
 1049: [3451, 58958],
 7017: [46871, 7015],
 6445: [5352, 32769],
 1364: [31083, 31579],
 2843: [32756, 10951],
 6909: [375205, 66175],
 375755: [130822, 20484],
 6249: [14000, 128966],
 10858: [36602, 34911],
 75457: [70487, 83667],
 332: [34915, 37940],
 8758: [9867, 32394],
 11986: [41310, 375205],
 13831: [6327, 2863],
 30340: [64100, 38133],
 32757: [10957, 10274],
 67334: [4953, 40588],
 35015: [768, 81684],
 36462: [58181, 68986],
 375069: [7270, 34797],
 4453: [34968, 70797],
 6327: [1055, 13831],
 7101: [12445, 83190],
 60992: [146397, 79582],
 9654: [30792, 36642],
 10274: [3148, 74297],
 

### Learning parameters

In [27]:
import math

In [28]:
def f(t, l):
    try:
        return math.exp(-l*t)
    except:
        return -1 

In [123]:
def pred_time(user, item1, i, j, t):
    '''
    Predict opinion of user i on item j
    '''
    
    s1, s2 = 0, 0
    for c in M[df.columns[j]]:
        p = float(C[i][item_id[item_id[0] == c][1]]) # pred(user, item2, i, c, t)
        q, _ = cosine_sim(item1, C.T[item_id[item_id[0] == c][1]], j)*f(T[i][c], 1/t)
        s1 += p*q
        s2 += q
    return s1 / s2

In [130]:
def pred(user, item1, i, j):
    '''
    Predict opinion of user i on item j
    '''
    s1, s2 = 0, 0
    for c in M[df.columns[j]]:
        p = float(C[i][item_id[item_id[0] == c][1]]) # pred(user, item2, i, c, t)
        q, _ = cosine_sim(item1, C.T[item_id[item_id[0] == c][1]], j)
        s1 += p*q
        s2 += q
    return s1 / s2

In [131]:
M[0]

KeyError: 0

In [132]:
def find_T02(user, i):
    s, m, t_0 = 0, 0, 100
    for j, item in enumerate(C.T):
        pred_ij = pred(user, item, i, j, t_0)
        q_ij = C[i][item_id[item_id[0] == j][1]]
        s += abs(pred_ij - q_ij) / len(C.T)
    return t_0

In [133]:
def find_T0(user, i):
    s, m, t_0 = 0, 564124554, 1
    for t in range(-1000, 1000):
        if t == 0:
            continue
        s = 0
        for j, item in enumerate(C.T):
            pred_ij = pred(user, item, i, j, t)
            q_ij = C[i][j]
            s += abs(pred_ij - q_ij) / len(C.T)
        if s < m:
            m = s
            t_0 = t
    return t_0

In [45]:
def learning_parameters(C,M,T,n,K,l):
    '''
    Search T0 for each user such that the error is minimum
    '''
    parameters = {}
    for i, user in enumerate(C):
        parameters[df.index[i]] = find_T0(user, i)
    return parameters

In [135]:
learning_parameters(C,M,T,n,K,l)

0


TypeError: pred() takes 4 positional arguments but 5 were given

In [136]:
def n_max(l, n):
    maximums = []
    for i in range(n):
        m = max(l)
        maximums.append(m)
        l.remove(m)
    return maximums

In [None]:
users_df = 

In [43]:
df.index[0]

17732

In [44]:
def predict_items(C, n, l):
    #M = compute_item_similarity(C,n,'cos')
    predictions = {}
    for i, user in enumerate(C):
        print(f'{i}/{len(C)}')
        pred_i = []
        for j, item in enumerate(C.T):
            pred_i.append(pred(user, item, i, j))
        
        predictions[df.index[i]] = [x for _, x in sorted(zip(pred_i, df.columns), reverse=True)][:n]
    return predictions

In [140]:
predictions = predict_items(C, n, l)

0/1000


  return s1 / s2


1/1000
2/1000
3/1000
4/1000
5/1000
6/1000
7/1000
8/1000
9/1000
10/1000
11/1000
12/1000
13/1000
14/1000
15/1000
16/1000
17/1000
18/1000
19/1000
20/1000
21/1000
22/1000
23/1000
24/1000
25/1000
26/1000
27/1000
28/1000
29/1000
30/1000
31/1000
32/1000
33/1000
34/1000
35/1000
36/1000
37/1000
38/1000
39/1000
40/1000
41/1000
42/1000
43/1000
44/1000
45/1000
46/1000
47/1000
48/1000
49/1000
50/1000
51/1000
52/1000
53/1000
54/1000
55/1000
56/1000
57/1000
58/1000
59/1000
60/1000
61/1000
62/1000
63/1000
64/1000
65/1000
66/1000
67/1000
68/1000
69/1000
70/1000
71/1000
72/1000
73/1000
74/1000
75/1000
76/1000
77/1000
78/1000
79/1000
80/1000
81/1000
82/1000
83/1000
84/1000
85/1000
86/1000
87/1000
88/1000
89/1000
90/1000
91/1000
92/1000
93/1000
94/1000
95/1000
96/1000
97/1000
98/1000
99/1000
100/1000
101/1000
102/1000
103/1000
104/1000
105/1000
106/1000
107/1000
108/1000
109/1000
110/1000
111/1000
112/1000
113/1000
114/1000
115/1000
116/1000
117/1000
118/1000
119/1000
120/1000
121/1000
122/1000
123/1000
1

924/1000
925/1000
926/1000
927/1000
928/1000
929/1000
930/1000
931/1000
932/1000
933/1000
934/1000
935/1000
936/1000
937/1000
938/1000
939/1000
940/1000
941/1000
942/1000
943/1000
944/1000
945/1000
946/1000
947/1000
948/1000
949/1000
950/1000
951/1000
952/1000
953/1000
954/1000
955/1000
956/1000
957/1000
958/1000
959/1000
960/1000
961/1000
962/1000
963/1000
964/1000
965/1000
966/1000
967/1000
968/1000
969/1000
970/1000
971/1000
972/1000
973/1000
974/1000
975/1000
976/1000
977/1000
978/1000
979/1000
980/1000
981/1000
982/1000
983/1000
984/1000
985/1000
986/1000
987/1000
988/1000
989/1000
990/1000
991/1000
992/1000
993/1000
994/1000
995/1000
996/1000
997/1000
998/1000
999/1000


In [143]:
predictions

{40156: [10588, 6366],
 15632: [87141, 79218],
 36956: [379857, 376542],
 34157: [198, 5595],
 4953: [73711, 58114],
 14614: [5582, 6848],
 32394: [92881, 6076],
 32769: [133052, 197344],
 31013: [68986, 51459],
 14889: [375966, 35369],
 9186: [136360, 76002],
 14179: [40078, 31160],
 4099: [36195, 8397],
 14784: [22615, 28201],
 6576: [2873, 94524],
 43548: [392143, 374973],
 49272: [69013, 6639],
 1049: [35338, 23016],
 7017: [177170, 58579],
 6445: [196226, 79995],
 1364: [36884, 58638],
 2843: [378237, 179386],
 6909: [206093, 142463],
 375755: [67846, 42000],
 6249: [45493, 8328],
 10858: [11815, 9037],
 75457: [402831, 13656],
 332: [35980, 32166],
 8758: [179616, 14311],
 11986: [31331, 71417],
 13831: [424039, 415713],
 30340: [94811, 81976],
 32757: [319850, 133077],
 67334: [47864, 14683],
 35015: [44544, 67698],
 36462: [388617, 383035],
 375069: [14784, 36956],
 4453: [9186, 4540],
 6327: [136155, 128833],
 7101: [169914, 37082],
 60992: [424039, 418039],
 9654: [19636, 447

In [3]:
with open('')

NameError: name 'predictions' is not defined

In [8]:
import json

In [16]:
with open("restored.json") as f:
    predictions = json.load(f)

In [17]:
rating = np.load("Intelligent-systems-and-recommendations/rating_matrix.npy")
itemID = pd.read_csv("Intelligent-systems-and-recommendations/item_index.csv", header=None)
userID = pd.read_csv("Intelligent-systems-and-recommendations/user_index.csv", header=None)

In [18]:
df = pd.DataFrame(rating, columns = list(itemID[0]))
df.index = list(userID[0])

In [19]:
items = list(itemID[0])
items_df = []
users_df = []

In [20]:
for user in df.index:        # loop over rows of df (users)
    for index, rating in enumerate(list(df.loc[user])):   # loop over columns (items) and remember index
        if rating == 3:         # compare rating
            item = items[index]    # get itemID in list of items, at the index observed
            items_df.append(item)  # append the list of items
            users_df.append(user)  # append the list of users
            
gt = pd.DataFrame(list(zip(users_df, items_df, [1]*len(items_df))), columns =["User", "Item", "Relevance"])

In [21]:
users = []
items = []
for user in predictions.keys():
    users.extend([user]*len(predictions[user]))
    items.extend(predictions[user])

In [22]:
df_results = pd.DataFrame(list(zip(users, items)), columns =["User", "Item"])

In [23]:
labeled_results = gt.merge(df_results, how='left', on=['User', 'Relevance']).fillna(0)

relevances_rank = labeled_results.groupby(['User', 'Relevance'])['rank'].min()

ranks = relevances_rank.loc[:, 1]

reciprocal_ranks = 1 / (ranks)

reciprocal_ranks.mean()

KeyError: 'Relevance'