<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array">
    <added>
      <filename>README.markdown</filename>
    </added>
  </added>
  <modified type="array">
    <modified>
      <diff>@@ -8,7 +8,7 @@ require 'gisting'
 SERVERS = [
   [&quot;127.0.0.1&quot;, 9081, Gisting::MapServer],
   [&quot;127.0.0.1&quot;, 9082, Gisting::MapServer],
-  # [&quot;127.0.0.1&quot;, 9083, Gisting::MapServer],
+  [&quot;127.0.0.1&quot;, 9083, Gisting::MapServer],
 
   [&quot;127.0.0.1&quot;, 9091, Gisting::ReduceServer],
   [&quot;127.0.0.1&quot;, 9092, Gisting::ReduceServer]</diff>
      <filename>bin/server</filename>
    </modified>
    <modified>
      <diff>@@ -5,9 +5,9 @@ require 'gisting'
 
 # TODO: Grab this from ARGV
 def args
-  # args = [&quot;/Users/mchung/Public/datasets/sample.data&quot;, &quot;/Users/mchung/Public/datasets/sample.data&quot;]
+  args = [&quot;/Users/mchung/Public/datasets/sample.data&quot;, &quot;/Users/mchung/Public/datasets/sample.data&quot;]
   # args = [&quot;/Users/mchung/Public/datasets/sample.data&quot;, &quot;/Users/mchung/Public/datasets/sample.data&quot;, &quot;/Users/mchung/Public/datasets/sample.data&quot;]
-  args = [&quot;/Users/mchung/Public/datasets/aoldb_dev.txt&quot;, &quot;/Users/mchung/Public/datasets/aoldb_dev.txt&quot;]
+  # args = [&quot;/Users/mchung/Public/datasets/aoldb_dev.txt&quot;, &quot;/Users/mchung/Public/datasets/aoldb_dev.txt&quot;]
   # args = [&quot;/Users/mchung/Public/datasets/sample.data&quot;, &quot;/Users/mchung/Public/datasets/aoldb_dev.txt&quot;]
   args
 end
@@ -26,7 +26,7 @@ if __FILE__ == $0
   end
   output = spec.output
   output.filebase = &quot;/Users/mchung/Public/datasets/output&quot;
-  output.num_tasks = 2
+  output.num_tasks = 1
   output.reduce do |reduce_input|
     count = 0
     reduce_input.each do |value|</diff>
      <filename>examples/term_count.rb</filename>
    </modified>
    <modified>
      <diff>@@ -19,19 +19,6 @@ module Gisting
     def receive_data(output_data)
       @result.recv_map_data!(output_data)
     end
-    
-    # def unbind
-    #   puts &quot;unbinding&quot;
-    #   if error?
-    #     puts &quot;An error occurred&quot;
-    #   else 
-    #     puts &quot;Completed successfully&quot;
-    #   end
-    # end
-    # 
-    # def connection_completed
-    #   puts &quot;..connection_completed called&quot;
-    # end
 
     protected
 </diff>
      <filename>lib/gisting/map_client.rb</filename>
    </modified>
    <modified>
      <diff>@@ -3,7 +3,6 @@ module Gisting
   class MapRunner
 
     attr_accessor :data_source, :map_proc
-    attr_reader :output
 
     def initialize(input)
       pp input</diff>
      <filename>lib/gisting/map_runner.rb</filename>
    </modified>
    <modified>
      <diff>@@ -13,20 +13,22 @@ module Gisting
     end
 
     def post_init
+      # puts &quot;post init&quot;
       send_task_if_available
     end
 
     def receive_data(output_result)
+      # puts &quot;recv data #{output_result}&quot;
       recv_task(output_result)
       send_task_if_available
     end
 
     protected
 
-
     def send_task_if_available
       next_task = @result.next_available_map_result
       if next_task
+        # pp [@output.filebase, next_task]
         send_data([@output, next_task].to_yaml)
         @result.sent_reduce_data!
       end</diff>
      <filename>lib/gisting/reduce_client.rb</filename>
    </modified>
    <modified>
      <diff>@@ -4,10 +4,10 @@ module Gisting
     attr_accessor :map_data_input, :red_proc
 
     def initialize(output, input)
-      @map_data_input = input
       @histogram = {}
       @red_proc = output.reduce_proc
       @output_file = output.filebase
+      @map_data_input = input
       setup_emit
     end
 
@@ -21,7 +21,6 @@ module Gisting
 
     def reduce!
       begin
-        puts &quot;reducing&quot;
         File.open(@map_data_input).each do |line|
           @key, val = line.strip.split(&quot;:&quot;)
           apply([val.strip])
@@ -45,19 +44,22 @@ module Gisting
     end
 
     def setup_emit
+      # puts &quot;setting up emit&quot;
       if File.exists?(@output_file)
+        # puts &quot;#{@output_file} exists... loading into memory&quot;
         File.open(@output_file).each do |line|
-          key, val = line.strip.split(&quot;:&quot;)
-          @histogram[key] = val
+          @key, val = line.strip.split(&quot;:&quot;)
+          apply([val.strip])
         end
       else
+        # puts &quot;#{@output_file} does not exist.. creating for first time&quot;
         FileUtils.touch(@output_file)
       end
     end
 
     def reduce_completed!
     	out = Tempfile.new(&quot;tempfile&quot;)
-    	pp out.path
+      # pp out.path
       @histogram.each_pair do |key, val|
         out.puts(&quot;#{key}: #{val}&quot;)
       end
@@ -65,96 +67,5 @@ module Gisting
     	FileUtils.mv(out.path, @output_file)
     end
 
-    # attr_accessor :data_source, :map_proc
-    # attr_reader :output
-    # 
-    # def initialize(map_input)
-    #   pp map_input
-    #   @data_source = map_input.file_pattern
-    #   @map_proc = map_input.map_proc
-    #   setup_emit
-    # end
-    # 
-    # def Emit(key, value)
-    #   @emit.store(key, value)
-    # end
-    # 
-    # def map!
-    #   # TODO Abstract away file data source
-    #   File.read(self.data_source).each do |line|
-    #     apply(line)
-    #   end
-    #   map_completed!
-    # end
-    # 
-    # def output
-    #   @intermediate_output
-    # end
-    # 
-    # protected
-    # 
-    # def apply(data_item)
-    #   # pp data_item
-    #   @proc ||= eval(self.map_proc)
-    #   @proc.call(data_item)
-    # end
-    # 
-    # def setup_emit
-    #   # TODO Abstract away file data source
-    #   @intermediate_output = make_intermediate_output
-    #   @output = File.new(@intermediate_output, &quot;w&quot;)
-    #   @emit = FileEmit.new(@output)
-    # end
-    # 
-    # def make_intermediate_output
-    #   # TODO Abstract away file data source
-    #   basedir = File.dirname(@data_source)
-    #   filename = File.basename(@data_source)
-    #   old_ext = File.extname(filename)
-    #   filename_no_ext = File.basename(filename, old_ext)
-    #   new_ext = rand(100).to_s
-    #   intermediate_filename = &quot;#{filename_no_ext}.#{new_ext}#{old_ext}&quot;
-    #   
-    #   File.join(basedir, &quot;results&quot;, intermediate_filename)
-    # end
-    # 
-    # def map_completed!
-    #   # TODO Abstract away file data source
-    #   @output.flush
-    #   @output.close
-    # end
-
-    # def giest_old(spec, result)
-    # 
-    #   # Map. two data sources.
-    #   EM::run {
-    #     EM::connect &quot;127.0.0.1&quot;, 8081, Gisting::Conductor, spec.map_inputs[0], result
-    #     EM::connect &quot;127.0.0.1&quot;, 8082, Gisting::Conductor, spec.map_inputs[1], result
-    #   }
-    # 
-    #   # Reduce is hacked for now.
-    #   # pp result
-    # 
-    #   data = String.new
-    #   result.responses.each do |file|
-    #     data += File.read(file)
-    #   end
-    #   # puts data
-    # 
-    #   # data = data.sort{|a, b| a &lt;=&gt; b} # need to sort?
-    #   
-    #   # red_proc ||= eval(spec.output.reduce_proc)
-    #   # data.each do |key|
-    #   #   key, val = key.strip.split(&quot;:&quot;)
-    #   #   @key = key
-    #   #   red_proc.call(val)
-    #   # end
-    #   # print_term_freq(@histogram)
-    # 
-    #   term_count(data)
-    # 
-    # 
-    # end
-
   end
 end
\ No newline at end of file</diff>
      <filename>lib/gisting/reduce_runner.rb</filename>
    </modified>
    <modified>
      <diff>@@ -16,7 +16,7 @@ module Gisting
         output, input = YAML::load(output_data)
         runner = ReduceRunner.new(output, input)
         runner.reduce!
-        pp runner.output
+        # pp [&quot;output&quot;, runner.output]
         send_data(runner.output)
         # rescue  =&gt; e
         #   e.backtrace.each do |x|</diff>
      <filename>lib/gisting/reduce_server.rb</filename>
    </modified>
    <modified>
      <diff>@@ -27,19 +27,18 @@ module Gisting
       @sent_count += 1
       if @sent_count == @spec.map_input_count
         puts &quot;Maps distributed&quot;
-      else
-        puts &quot;More maps to distribute&quot;
+      # else
+      #   puts &quot;More maps to distribute&quot;
       end
     end
 
     def recv_reduce_data!(output_result)
-      puts &quot;asdf&quot;
       @reduce_responses &lt;&lt; output_result
-      if @reduce_responses.size == @spec.reduce_output_count
+      if @reduce_responses.size == @spec.map_input_count
         puts &quot;Stopping Reduce phase&quot;
         @spec.stop!
       else
-        puts &quot;Got Reduce result data #{output_result}. #{@spec.reduce_output_count - @reduce_responses.size} remaining.&quot;
+        puts &quot;Got Reduce result data #{output_result}. #{@spec.map_input_count - @reduce_responses.size} remaining.&quot;
       end
     end
 </diff>
      <filename>lib/gisting/result.rb</filename>
    </modified>
    <modified>
      <diff>@@ -20,20 +20,20 @@ module Gisting
     def output
       @map_output ||= Output.new
     end
-    
+
     def map_input_count
       @map_inputs.size
     end
-    
+
     def reduce_output_count
       @map_output.num_tasks
     end
-    
+
     def stop!
       EM::stop_event_loop
     end
 
-    # TODO Round-robin assign available servers with jobs
+    # TODO Round-robin assign available servers with jobs. Need to map M jobs to N servers
     def run_map!(result)
       EM::run do
         # One for every map input
@@ -43,24 +43,15 @@ module Gisting
       end
     end
 
-    # TODO Round-robin assign available servers with job
+    # TODO Round-robin assign available servers with job. Need to map M jobs to N servers
     def run_reduce!(result)
       reduce = result.setup_reduce_stage(@map_output)
-      EM::run do
+      EM::run do 
         # One for every output#num_task
         EM::connect &quot;127.0.0.1&quot;, 9091, Gisting::ReduceClient, reduce[0], result
         # EM::connect &quot;127.0.0.1&quot;, 9092, Gisting::ReduceClient, reduce[1], result
       end
     end
 
-    ## should have a spec.output_task which when called, marks the object for queue and sends it to the reduce server.
-    ## reduce conductor should keep doing this until we're done with the data sets
-
-    # Reduce should initiate a call for every num_task there is, and creating a file name that's predictable based on filebase
-    # Should also modify &quot;result&quot; object to sync against the connects. for instance, creating the files from the filebase, but for each map file, dispatch against them. then unblock in similar fashion
-    # Should be able to dispatch multiple Reduce tasks round robin to only a fixed number of machines
-    # probably need a reduce conductor.. delgates calls to reduceserver. uses results to share items across #connects
-
-
   end
 end
\ No newline at end of file</diff>
      <filename>lib/gisting/spec.rb</filename>
    </modified>
  </modified>
  <removed type="array">
    <removed>
      <filename>README</filename>
    </removed>
  </removed>
  <parents type="array">
    <parent>
      <id>18d354e7e7ad97490b9d09a4130afc702cc77326</id>
    </parent>
  </parents>
  <author>
    <name>Marc Chung</name>
    <email>mchung@gmail.com</email>
  </author>
  <url>http://github.com/mchung/gisting/commit/8197df0a730daf063a37db626afd0cafea837bd8</url>
  <id>8197df0a730daf063a37db626afd0cafea837bd8</id>
  <committed-date>2008-10-28T12:18:08-07:00</committed-date>
  <authored-date>2008-10-28T12:18:08-07:00</authored-date>
  <message>Code clean up</message>
  <tree>c9fc20361b676ead885dbf1e5e709b3cf978e1b7</tree>
  <committer>
    <name>Marc Chung</name>
    <email>mchung@gmail.com</email>
  </committer>
</commit>
