This repository is private.
All pages are served over SSL and all pushing and pulling is done over SSH.
No one may fork, clone, or view it unless they are added as a member.
Every repository with this icon (
) is private.
Every repository with this icon (
This repository is public.
Anyone may fork, clone, or view it.
Every repository with this icon (
) is public.
Every repository with this icon (
Ville Tuulos (author)
Sun Oct 04 22:10:52 -0700 2009
| b2f159bc » | Taneli Mielikäinen | 2008-09-29 | 1 | import disco | |
| 2 | |||||
| 3 | def init_map(e, params): | ||||
| 4 | import random | ||||
| 5 | return [(random.randint(0,params.k-1),e[1])] | ||||
| 6 | |||||
| 7 | |||||
| 8 | def estimate_map(e, params): | ||||
| 9 | return[(min([(params.dist(c,map(float,e[1].split(' '))),i) for (i,c) in enumerate(params.centers)])[1],e[1])] | ||||
| 10 | |||||
| 11 | |||||
| 12 | def estimate_combiner(k, v, centers, done, params): | ||||
| 13 | if done: | ||||
| 14 | return [(i,' '.join(map(repr,c))) for (i,c) in centers.iteritems()] | ||||
| 15 | else: | ||||
| 16 | v=map(float,v.split(' ')) | ||||
| 17 | if not centers.has_key(k): centers[k]=[0.0]*len(v) + [0] | ||||
| 18 | for i in range(len(v)): centers[k][i]+=v[i] | ||||
| 19 | |||||
| 20 | centers[k][len(v)]+=1 | ||||
| 21 | |||||
| 22 | |||||
| 23 | def estimate_reduce(iter, out, params): | ||||
| 24 | x={} | ||||
| 25 | for k,v in iter: | ||||
| 26 | y=map(float,v.split(' ')) | ||||
| 27 | if not x.has_key(k): | ||||
| 28 | x[k]=y | ||||
| 29 | else: | ||||
| 30 | for i in y: x[k][i]+=y[i] | ||||
| 31 | |||||
| 32 | for k,v in x.iteritems(): | ||||
| 33 | for i in range(len(v)-1): v[i]/=v[-1] | ||||
| 34 | out.add(k,' '.join(map(repr,v))) | ||||
| 35 | |||||
| 36 | |||||
| 37 | def predict_map(e, params): | ||||
| 38 | return [(e[0],min([(params.dist(c,map(float,map(float,e[1].split(' ')))),i) for (i,c) in enumerate(params.centers)])[1])] | ||||
| 39 | |||||
| 40 | |||||
| 41 | def d2(x,y): return sum([(x[i]-y[i])**2 for i in range(len(x))]) | ||||
| 42 | |||||
| 43 | |||||
| 44 | def estimate(input, centers, k, iterations=10, host="disco://localhost", map_reader=disco.chain_reader, nr_reduces=None): | ||||
| 45 | if centers!=None: k=len(centers) | ||||
| 46 | if nr_reduces==None: nr_reduces=k | ||||
| 47 | |||||
| 48 | results=None | ||||
| 49 | if centers==None: | ||||
| 50 | results = disco.job(host, name = 'kmeans_init', | ||||
| 51 | input_files = input, | ||||
| 52 | map_reader = map_reader, | ||||
| 53 | fun_map = init_map, | ||||
| 54 | combiner = estimate_combiner, | ||||
| 55 | reduce = estimate_reduce, | ||||
| 56 | nr_reduces = nr_reduces, | ||||
| 57 | params = disco.Params(k=k), | ||||
| 58 | sort = False, clean = True) | ||||
| 59 | |||||
| 60 | for i in range(iterations): | ||||
| 61 | if results!=None: | ||||
| 62 | centers=[None]*k | ||||
| 63 | counts=[None]*k | ||||
| 64 | for key,value in disco.result_iterator(results): | ||||
| 65 | x=map(float,value.split(' ')) | ||||
| 66 | centers[int(key)]=x[:-1] | ||||
| 67 | counts[int(key)]=x[-1] | ||||
| 68 | |||||
| 69 | results = disco.job(host, name = 'kmeans_iterate_'+str(i), | ||||
| 70 | input_files = input, | ||||
| 71 | map_reader = map_reader, | ||||
| 72 | fun_map = estimate_map, | ||||
| 73 | combiner = estimate_combiner, | ||||
| 74 | reduce = estimate_reduce, | ||||
| 75 | nr_reduces = nr_reduces, | ||||
| 76 | params = disco.Params(centers=centers,dist=d2), | ||||
| 77 | sort = False, clean = True) | ||||
| 78 | |||||
| 79 | return centers | ||||
| 80 | |||||
| 81 | |||||
| 82 | def predict(input, centers, host="disco://localhost", map_reader=disco.chain_reader, nr_reduces=None): | ||||
| 83 | if nr_reduces==None: nr_reduces=len(centers) | ||||
| 84 | |||||
| 85 | results = disco.job(host, name = 'kmeans_output', | ||||
| 86 | input_files = input, | ||||
| 87 | map_reader = map_reader, | ||||
| 88 | fun_map = predict_map, | ||||
| 89 | nr_reduces = nr_reduces, | ||||
| 90 | params = disco.Params(centers=centers,dist=d2), | ||||
| 91 | sort = False, clean = True) | ||||
| 92 | |||||
| 93 | return results | ||||







