Skip to content

Commit

Permalink
reduce time complexity of selecting k neighbors using heapq (#90)
Browse files Browse the repository at this point in the history
* reduce time complexity in selecting k neighbors from nlog(n) to nlog(k)

* heap by key
  • Loading branch information
hengji-liu authored and NicolasHug committed Sep 29, 2017
1 parent 6de4942 commit 49c33fa
Showing 1 changed file with 10 additions and 17 deletions.
27 changes: 10 additions & 17 deletions surprise/prediction_algorithms/knns.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
unicode_literals)
import numpy as np
from six import iteritems
import heapq

from .predictions import PredictionImpossible
from .algo_base import AlgoBase
Expand Down Expand Up @@ -98,14 +99,12 @@ def estimate(self, u, i):

x, y = self.switch(u, i)

neighbors = [(x2, self.sim[x, x2], r) for (x2, r) in self.yr[y]]

# sort neighbors by similarity
neighbors = sorted(neighbors, key=lambda tple: tple[1], reverse=True)
neighbors = [(self.sim[x, x2], r) for (x2, r) in self.yr[y]]
k_neighbors = heapq.nlargest(self.k, neighbors)

# compute weighted average
sum_sim = sum_ratings = actual_k = 0
for (_, sim, r) in neighbors[:self.k]:
for (sim, r) in k_neighbors:
if sim > 0:
sum_sim += sim
sum_ratings += sim * r
Expand Down Expand Up @@ -179,15 +178,13 @@ def estimate(self, u, i):
x, y = self.switch(u, i)

neighbors = [(x2, self.sim[x, x2], r) for (x2, r) in self.yr[y]]

# sort neighbors by similarity
neighbors = sorted(neighbors, key=lambda tple: tple[1], reverse=True)
k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[1])

est = self.means[x]

# compute weighted average
sum_sim = sum_ratings = actual_k = 0
for (nb, sim, r) in neighbors[:self.k]:
for (nb, sim, r) in k_neighbors:
if sim > 0:
sum_sim += sim
sum_ratings += sim * (r - self.means[nb])
Expand Down Expand Up @@ -280,13 +277,11 @@ def estimate(self, u, i):
return est

neighbors = [(x2, self.sim[x, x2], r) for (x2, r) in self.yr[y]]

# sort neighbors by similarity
neighbors = sorted(neighbors, key=lambda tple: tple[1], reverse=True)
k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[1])

# compute weighted average
sum_sim = sum_ratings = actual_k = 0
for (nb, sim, r) in neighbors[:self.k]:
for (nb, sim, r) in k_neighbors:
if sim > 0:
sum_sim += sim
nb_bsl = self.trainset.global_mean + self.bx[nb] + self.by[y]
Expand Down Expand Up @@ -373,15 +368,13 @@ def estimate(self, u, i):
x, y = self.switch(u, i)

neighbors = [(x2, self.sim[x, x2], r) for (x2, r) in self.yr[y]]

# sort neighbors by similarity
neighbors = sorted(neighbors, key=lambda tple: tple[1], reverse=True)
k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[1])

est = self.means[x]

# compute weighted average
sum_sim = sum_ratings = actual_k = 0
for (nb, sim, r) in neighbors[:self.k]:
for (nb, sim, r) in k_neighbors:
if sim > 0:
sum_sim += sim
sum_ratings += sim * (r - self.means[nb]) / self.sigmas[nb]
Expand Down

0 comments on commit 49c33fa

Please sign in to comment.