import sys import disco def estimate_map(e, params): x=map(float,e[1].split(' ')) y=x[params.y_id] del x[params.y_id] if params.w!=None and y*sum([x[i]*params.w[i] for i in range(len(params.w))])>0: return [] return [('',[y*a for a in x])] def estimate_combiner(k, v, w, done, params): if done: if w=={}: return [] else: return [('', ' '.join(map(repr,w[''])))] if w=={}: w['']=v else: w['']=[w[''][i]+v[i] for i in range(len(v))] def estimate_reduce(iter, out, params): w=None for key,value in iter: v=map(float,value.split(' ')) if w==None: w=[params.learning_rate*a for a in v] else: w=[w[i]+params.learning_rate*v[i] for i in range(len(v))] if w!=None: out.add('', ' '.join(map(repr,w))) def predict_map(e, params): x=map(float,e[1].split(' ')) del x[params.y_id] return [(e[0],sum([x[i]*params.w[i] for i in range(len(params.w))]))] def estimate(input, y_id, w=None, learning_rate=1.0, iterations=10, host="disco://localhost", map_reader=disco.chain_reader): for i in range(iterations): results = disco.job(host, name = 'perceptron_estimate_' + str(i), input_files = input, map_reader = map_reader, fun_map = estimate_map, combiner = estimate_combiner, reduce = estimate_reduce, params = disco.Params(w = w, learning_rate=learning_rate,y_id=y_id), sort = False, clean = True) for key,value in disco.result_iterator(results): v=map(float,value.split(' ')) if w==None: w=v else: w=[w[i]+v[i] for i in range(len(w))] print >>sys.stderr,w return w def predict(input, y_id, w, host="disco://localhost", map_reader=disco.chain_reader): results = disco.job(host, name = 'perceptron_predict', input_files = input, map_reader = map_reader, fun_map = predict_map, params=disco.Params(w=w, y_id=y_id), sort = False, clean = False) return results