lethain / bossarray
- Source
- Commits
- Network (0)
- Issues (0)
- Downloads (0)
- Wiki (1)
- Graphs
-
Branch:
master
bossarray / boss_array.py
| 9513cf96 » | lethain | 2008-07-27 | 1 | from yos.boss import ysearch | |
| 2 | from yos.yql import db | ||||
| 3 | |||||
| 4 | class BossArray(object): | ||||
| 5 | |||||
| 6 | def __init__(self, term): | ||||
| 7 | self.term = term | ||||
| 8 | self.retrieved = [] | ||||
| 9 | self.length = None | ||||
| 10 | |||||
| 11 | def _cache(self, start, results,sort=True): | ||||
| 12 | i = start | ||||
| 13 | for result in results: | ||||
| 14 | self.retrieved.append((i,result)) | ||||
| 15 | i = i + 1 | ||||
| 16 | if sort is True: | ||||
| 17 | self.retrieved.sort() | ||||
| 18 | |||||
| 19 | def _cached_portion(self, start, count): | ||||
| 20 | def contains_result(n): | ||||
| 21 | for pos,val in self.retrieved: | ||||
| 22 | if pos == n: | ||||
| 23 | return val | ||||
| 24 | return None | ||||
| 25 | cached = [] | ||||
| 26 | not_cached = [] | ||||
| 27 | for i in xrange(start,start+count): | ||||
| 28 | val = contains_result(i) | ||||
| 29 | if val is not None: | ||||
| 30 | cached.append((i,val)) | ||||
| 31 | else: | ||||
| 32 | not_cached.append(i) | ||||
| 33 | return cached,not_cached | ||||
| 34 | |||||
| 35 | def _download(self, start, count, sort=True): | ||||
| 36 | rows = [] | ||||
| 37 | length = 0 | ||||
| 38 | for i in xrange(0, count, 50): | ||||
| 39 | offset = i * 50 | ||||
| 40 | pos = start + offset | ||||
| 41 | num_results = min(count - offset, 50) | ||||
| 42 | data = ysearch.search(self.term,start=pos,count=num_results) | ||||
| 43 | length = data['ysearchresponse']['totalhits'] | ||||
| 44 | rows = rows + db.create(data=data).rows | ||||
| 45 | |||||
| 46 | self.length = int(length) | ||||
| 47 | self._cache(start,rows,sort=sort) | ||||
| 48 | return rows | ||||
| 49 | |||||
| 50 | def __getitem__(self, i): | ||||
| 51 | if type(i) == int: | ||||
| f0e217f0 » | lethain | 2008-07-27 | 52 | cached, not_cached = self._cached_portion(i,1) | |
| 9513cf96 » | lethain | 2008-07-27 | 53 | if cached == []: | |
| 54 | return self._download(i,1)[0] | ||||
| 55 | else: | ||||
| 56 | return cached[0][1] # [(index,value),] | ||||
| 57 | else: | ||||
| 58 | cached, not_cached = self._cached_portion(i.start,i.stop-i.start) | ||||
| 59 | length = len(not_cached) | ||||
| 60 | if length == 0: | ||||
| 61 | return tuple(x[1] for x in cached) | ||||
| 62 | elif length == 1: | ||||
| 63 | index = not_cached[0] | ||||
| 64 | downloaded = [(index,self._download(index,1),)] | ||||
| 65 | else: | ||||
| 66 | runs = [] | ||||
| 67 | length = len(not_cached) | ||||
| 541b9eb8 » | lethain | 2008-07-27 | 68 | start_of_run = not_cached[0] | |
| 69 | prev = start_of_run | ||||
| 70 | for i in xrange(1,length): | ||||
| 71 | val = not_cached[i] | ||||
| 72 | if val - prev == 1: | ||||
| 73 | if i != length-1: | ||||
| 74 | pass | ||||
| 75 | else: | ||||
| 76 | runs.append((start_of_run,1+val-start_of_run)) | ||||
| 77 | else: | ||||
| 78 | runs.append((start_of_run,val-1-start_of_run)) | ||||
| 79 | if i < length - 1: | ||||
| 80 | start_of_run = not_cached[i] | ||||
| 81 | prev = val | ||||
| e0eb00a5 » | lethain | 2008-07-28 | 82 | # First we break apart runs into | |
| 83 | # groups of 50, since that is the | ||||
| 84 | # maximum allowable number of results | ||||
| 85 | # per request. | ||||
| 86 | legal_runs = [] | ||||
| 541b9eb8 » | lethain | 2008-07-27 | 87 | for index,count in runs: | |
| e0eb00a5 » | lethain | 2008-07-28 | 88 | if count <= 50: | |
| 89 | legal_runs.append((index,count)) | ||||
| 90 | else: | ||||
| 91 | num_runs = count / 50 | ||||
| 92 | if count % 50 > 0: | ||||
| 93 | num_runs = num_runs + 1 | ||||
| 94 | for i in xrange(0,num_runs): | ||||
| 95 | offset = i * 50 | ||||
| 96 | tmp_count = min(count - offset,50) | ||||
| 97 | legal_runs.append((index+offset,tmp_count)) | ||||
| 98 | downloaded = [] | ||||
| 99 | for index,count in legal_runs: | ||||
| 2a36e929 » | lethain | 2008-07-28 | 100 | tuples = zip(xrange(index,index+count),self._download(index,count,sort=False)) | |
| f0e217f0 » | lethain | 2008-07-27 | 101 | downloaded = downloaded + tuples | |
| e0eb00a5 » | lethain | 2008-07-28 | 102 | # We turned off sorting while downloading the runs, since | |
| 103 | # we knew we'd be adding a lot of data at once and it would | ||||
| 104 | # be easier to only sort all the additional data once. | ||||
| 105 | self.retrieved.sort() | ||||
| 541b9eb8 » | lethain | 2008-07-27 | 106 | ||
| f0e217f0 » | lethain | 2008-07-27 | 107 | cached = cached + downloaded | |
| 9513cf96 » | lethain | 2008-07-27 | 108 | cached.sort() | |
| 109 | return tuple(x[1] for x in cached) | ||||
| 110 | |||||
| 111 | def __len__(self): | ||||
| 112 | if self.length is None: | ||||
| 113 | self._download(0,1) | ||||
| 114 | return self.length | ||||
| 115 | |||||
| 116 | |||||
