-
Notifications
You must be signed in to change notification settings - Fork 404
/
qmath.py
163 lines (144 loc) · 4.23 KB
/
qmath.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
from numpy.linalg import norm
from math import sqrt,exp
from numba import jit
def l1(x):
return norm(x,ord=1)
def l2(x):
return norm(x)
def common(x1,x2):
# find common ratings
common = (x1!=0)&(x2!=0)
new_x1 = x1[common]
new_x2 = x2[common]
return new_x1,new_x2
def cosine_sp(x1,x2):
'x1,x2 are dicts,this version is for sparse representation'
total = 0
denom1 = 0
denom2 =0
try:
for k in x1:
if k in x2:
total+=x1[k]*x2[k]
denom1+=x1[k]**2
denom2+=x2[k]**2
return total/(sqrt(denom1) * sqrt(denom2))
except ZeroDivisionError:
return 0
def euclidean_sp(x1,x2):
'x1,x2 are dicts,this version is for sparse representation'
total = 0
try:
for k in x1:
if k in x2:
total+=x1[k]**2-x2[k]**2
return 1/total
except ZeroDivisionError:
return 0
def cosine(x1,x2):
#find common ratings
#new_x1, new_x2 = common(x1,x2)
#compute the cosine similarity between two vectors
sum = x1.dot(x2)
denom = sqrt(x1.dot(x1)*x2.dot(x2))
try:
return sum/denom
except ZeroDivisionError:
return 0
#return cosine_similarity(x1,x2)[0][0]
def pearson_sp(x1,x2):
total = 0
denom1 = 0
denom2 = 0
overlapped=False
try:
mean1 = sum(x1.values())/len(x1)
mean2 = sum(x2.values()) /len(x2)
for k in x1:
if k in x2:
total += (x1[k]-mean1) * (x2[k]-mean2)
denom1 += (x1[k]-mean1) ** 2
denom2 += (x2[k]-mean2) ** 2
overlapped=True
return total/ (sqrt(denom1) * sqrt(denom2))
except ZeroDivisionError:
if overlapped:
return 1
return 0
def euclidean(x1,x2):
#find common ratings
new_x1, new_x2 = common(x1, x2)
#compute the euclidean between two vectors
diff = new_x1-new_x2
denom = sqrt((diff.dot(diff)))
try:
return 1/denom
except ZeroDivisionError:
return 0
def pearson(x1,x2):
#find common ratings
#new_x1, new_x2 = common(x1, x2)
#compute the pearson similarity between two vectors
#ind1 = new_x1 > 0
#ind2 = new_x2 > 0
try:
mean_x1 = x1.sum()/len(x1)
mean_x2 = x2.sum()/len(x2)
new_x1 = x1 - mean_x1
new_x2 = x2 - mean_x2
sum = new_x1.dot(new_x2)
denom = sqrt((new_x1.dot(new_x1))*(new_x2.dot(new_x2)))
return sum/denom
except ZeroDivisionError:
return 0
def similarity(x1,x2,sim):
if sim == 'pcc':
return pearson_sp(x1,x2)
if sim == 'euclidean':
return euclidean_sp(x1,x2)
else:
return cosine_sp(x1, x2)
def normalize(vec,maxVal,minVal):
'get the normalized value using min-max normalization'
if maxVal > minVal:
return (vec-minVal)/(maxVal-minVal)
elif maxVal==minVal:
return vec/maxVal
else:
print('error... maximum value is less than minimum value.')
raise ArithmeticError
def sigmoid(val):
return 1/(1+exp(-val))
def denormalize(vec,maxVal,minVal):
return minVal+(vec-0.01)*(maxVal-minVal)
@jit(nopython=True)
def find_k_largest(K,candidates):
n_candidates = []
for iid,score in enumerate(candidates[:K]):
n_candidates.append((iid, score))
n_candidates.sort(key=lambda d: d[1], reverse=True)
k_largest_scores = [item[1] for item in n_candidates]
ids = [item[0] for item in n_candidates]
# find the N biggest scores
for iid,score in enumerate(candidates):
ind = K
l = 0
r = K - 1
if k_largest_scores[r] < score:
while r >= l:
mid = int((r - l) / 2) + l
if k_largest_scores[mid] >= score:
l = mid + 1
elif k_largest_scores[mid] < score:
r = mid - 1
if r < l:
ind = r
break
# move the items backwards
if ind < K - 2:
k_largest_scores[ind + 2:] = k_largest_scores[ind + 1:-1]
ids[ind + 2:] = ids[ind + 1:-1]
if ind < K - 1:
k_largest_scores[ind + 1] = score
ids[ind + 1] = iid
return ids,k_largest_scores