Merge branch 'store_sample_sig' into file_ident

SuperCowPowers · Aug 3, 2014 · fff7d47 · fff7d47
2 parents cc0c456 + 0442fbc
commit fff7d47
Show file tree

Hide file tree

Showing 65 changed files with 86 additions and 86 deletions.
diff --git a/workbench/clients/log_meta_stream.py b/workbench/clients/log_meta_stream.py
@@ -25,7 +25,7 @@ def run():
             base_name = os.path.basename(filename)
             if base_name == '.DS_Store': continue
 
-            md5 = workbench.store_sample(base_name, f.read(), 'log')
+            md5 = workbench.store_sample(f.read(), base_name, 'log')
             results = workbench.work_request('view_log_meta', md5)
             print 'Filename: %s\n' % (base_name)
             pprint.pprint(results)

diff --git a/workbench/clients/pcap_bro_indexer.py b/workbench/clients/pcap_bro_indexer.py
@@ -26,7 +26,7 @@ def run():
 
         with open(filename, 'rb') as pcap_file:
             base_name = os.path.basename(filename)
-            md5 = workbench.store_sample(base_name, pcap_file.read(), 'pcap')
+            md5 = workbench.store_sample(pcap_file.read(), base_name, 'pcap')
 
             # Index the view_pcap output (notice we can ask for any worker output)
             # Also (super important) it all happens on the server side.

diff --git a/workbench/clients/pcap_bro_raw.py b/workbench/clients/pcap_bro_raw.py
@@ -27,7 +27,7 @@ def run():
 
         with open(filename,'rb') as f:
             base_name = os.path.basename(filename)
-            md5 = workbench.store_sample(base_name, f.read(), 'pcap')
+            md5 = workbench.store_sample(f.read(), base_name, 'pcap')
             results = workbench.work_request('pcap_bro', md5)
 
             # Results is just a dictionary of Bro log file names and their MD5s in workbench

diff --git a/workbench/clients/pcap_bro_urls.py b/workbench/clients/pcap_bro_urls.py
@@ -26,7 +26,7 @@ def run():
 
         with open(filename,'rb') as f:
             base_name = os.path.basename(filename)
-            pcap_md5 = workbench.store_sample(base_name, f.read(), 'pcap')
+            pcap_md5 = workbench.store_sample(f.read(), base_name, 'pcap')
             results = workbench.work_request('pcap_bro', pcap_md5)
 
             # Just grab the http log

diff --git a/workbench/clients/pcap_bro_view.py b/workbench/clients/pcap_bro_view.py
@@ -28,7 +28,7 @@ def run():
         # Process the pcap file
         with open(filename,'rb') as f:
             base_name = os.path.basename(filename)
-            md5 = workbench.store_sample(base_name, f.read(), 'pcap')
+            md5 = workbench.store_sample(f.read(), base_name, 'pcap')
             results = workbench.work_request('view_pcap', md5)
             print '\n<<< %s >>>' % base_name
             pprint.pprint(results)

diff --git a/workbench/clients/pcap_meta.py b/workbench/clients/pcap_meta.py
@@ -25,7 +25,7 @@ def run():
 
         with open(filename,'rb') as f:
             base_name = os.path.basename(filename)
-            md5 = workbench.store_sample(base_name, f.read(), 'pcap')
+            md5 = workbench.store_sample(f.read(), base_name, 'pcap')
             results = workbench.work_request('view_pcap', md5)
             print 'Filename: %s results:' % (base_name)
             pprint.pprint(results)

diff --git a/workbench/clients/pcap_meta_indexer.py b/workbench/clients/pcap_meta_indexer.py
@@ -23,7 +23,7 @@ def run():
 
         with open(filename,'rb') as pcap_file:
             base_name = os.path.basename(filename)
-            md5 = workbench.store_sample(base_name, pcap_file.read(), 'pcap')
+            md5 = workbench.store_sample(pcap_file.read(), base_name, 'pcap')
 
             # Index the view_pcap output (notice we can ask for any worker output)
             # Also (super important) it all happens on the server side.

diff --git a/workbench/clients/pcap_report.py b/workbench/clients/pcap_report.py
@@ -38,7 +38,7 @@ def run():
 
         # Process the pcap file
         with open(filename,'rb') as f:
-            md5 = WORKBENCH.store_sample(filename, f.read(), 'pcap')
+            md5 = WORKBENCH.store_sample(f.read(), filename, 'pcap')
             result = WORKBENCH.work_request('view_pcap', md5)
             result.update(WORKBENCH.work_request('meta', result['view_pcap']['md5']))
             results.append(result)

diff --git a/workbench/clients/pe_indexer.py b/workbench/clients/pe_indexer.py
@@ -25,7 +25,7 @@ def run():
 
         with open(filename, 'rb') as f:
             base_name = os.path.basename(filename)
-            md5 = workbench.store_sample(base_name, f.read(), 'exe')
+            md5 = workbench.store_sample(f.read(), base_name, 'exe')
 
             # Index the strings and features output (notice we can ask for any worker output)
             # Also (super important) it all happens on the server side.

diff --git a/workbench/clients/pe_peid.py b/workbench/clients/pe_peid.py
@@ -27,7 +27,7 @@ def run():
 
         with open(filename,'rb') as f:
             base_name = os.path.basename(filename)
-            md5 = workbench.store_sample(base_name, f.read(), 'exe')
+            md5 = workbench.store_sample(f.read(), base_name, 'exe')
             results = workbench.work_request('pe_peid', md5)
             pprint.pprint(results)
 

diff --git a/workbench/clients/pe_sim_graph.py b/workbench/clients/pe_sim_graph.py
@@ -21,7 +21,7 @@ def add_it(workbench, file_list, labels):
         if filename != '.DS_Store':
             with open(filename, 'rb') as pe_file:
                 base_name = os.path.basename(filename)
-                md5 = workbench.store_sample(base_name,  pe_file.read(), 'exe')
+                md5 = workbench.store_sample(pe_file.read(), base_name, 'exe')
                 workbench.add_node(md5, md5[:6], labels)
                 md5s.append(md5)
     return md5s

diff --git a/workbench/clients/short_md5s.py b/workbench/clients/short_md5s.py
@@ -27,7 +27,7 @@ def run():
 
         with open(filename,'rb') as f:
             base_name = os.path.basename(filename)
-            md5 = workbench.store_sample(base_name, f.read(), 'exe')
+            md5 = workbench.store_sample(f.read(), base_name, 'exe')
             results = workbench.work_request('meta', md5[:6])
             pprint.pprint(results)
 

diff --git a/workbench/clients/timeout_corner/stress_test.py b/workbench/clients/timeout_corner/stress_test.py
@@ -42,7 +42,7 @@ def process_files(path):
         for filename in file_list:
             with open(filename, 'rb') as f:
                 base_name = os.path.basename(filename)
-                md5 = workbench.store_sample(base_name, f.read(), type_tag)
+                md5 = workbench.store_sample(f.read(), base_name, type_tag)
                 workbench.work_request('view', md5)
                 print 'Filename: %s' % (base_name)
         total_files += num_files

diff --git a/workbench/clients/upload_file.py b/workbench/clients/upload_file.py
@@ -23,7 +23,7 @@ def run():
         # Throw file into workbench
         filename = os.path.basename(my_file)
         raw_bytes = f.read()
-        md5 = workbench.store_sample(filename, raw_bytes, 'exe')
+        md5 = workbench.store_sample(raw_bytes, filename, 'exe')
         results = workbench.work_request('view', md5)
         print 'Filename: %s' % filename
         pprint.pprint(results)

diff --git a/workbench/clients/upload_file_chunks.py b/workbench/clients/upload_file_chunks.py
@@ -31,11 +31,11 @@ def run():
         raw_bytes = f.read()
         md5_list = []
         for chunk in chunks(raw_bytes, 1024*1024):
-            md5_list.append(workbench.store_sample(filename, chunk, 'exe'))
+            md5_list.append(workbench.store_sample(chunk, filename, 'exe'))
 
         # Now we just ask Workbench to combine these
         combined_md5 = workbench.combine_samples(md5_list, filename, 'exe')
-        real_md5 = workbench.store_sample(filename, raw_bytes, 'exe')
+        real_md5 = workbench.store_sample(raw_bytes, filename, 'exe')
         assert(combined_md5 == real_md5)
 
 def test():

diff --git a/workbench/clients/workbench_shell.py b/workbench/clients/workbench_shell.py
@@ -173,7 +173,7 @@ def file_chunker(self, filename, raw_bytes, type_tag):
         chunk_size = 1*mb # 1 MB
         total_bytes = len(raw_bytes)
         for chunk in self.chunks(raw_bytes, chunk_size):
-            md5_list.append(self.workbench.store_sample(filename, chunk, type_tag))
+            md5_list.append(self.workbench.store_sample(chunk, filename, type_tag))
             sent_bytes += chunk_size
             self.progress_print(sent_bytes, total_bytes)
             # print '\t%s- Sending %.1f MB (%.1f MB)...%s' % (F.YELLOW, sent_bytes/mb, total_bytes/mb, F.RESET)

diff --git a/workbench/clients/zip_file_extraction.py b/workbench/clients/zip_file_extraction.py
@@ -21,7 +21,7 @@ def run():
     for filename in file_list:
         with open(filename,'rb') as f:
             base_name = os.path.basename(filename)
-            md5 = workbench.store_sample(base_name, f.read(), 'zip')
+            md5 = workbench.store_sample(f.read(), base_name, 'zip')
             results = workbench.work_request('view', md5)
             print 'Filename: %s ' % (base_name)
             pprint.pprint(results)

diff --git a/workbench/notebooks/Adding_Worker.ipynb b/workbench/notebooks/Adding_Worker.ipynb
@@ -172,7 +172,7 @@
       "# Okay lets load up a file, and see what this silly meta thing gives back\n",
       "filename = '../data/pe/bad/9e42ff1e6f75ae3e60b24e48367c8f26'\n",
       "with open(filename,'rb') as f:\n",
-      "    my_md5 = c.store_sample(filename, f.read(),'exe')\n",
+      "    my_md5 = c.store_sample(f.read(), filename, 'exe')\n",
       "output = c.work_request('meta', my_md5)\n",
       "output"
      ],
@@ -808,7 +808,7 @@
       "working_set = []\n",
       "for filename in file_list:\n",
       "    with open(filename,'rb') as f:\n",
-      "        md5 = c.store_sample(filename, f.read(), 'exe')\n",
+      "        md5 = c.store_sample(f.read(), filename, 'exe')\n",
       "        working_set.append(md5)"
      ],
      "language": "python",

diff --git a/workbench/notebooks/Generator_Pipelines.ipynb b/workbench/notebooks/Generator_Pipelines.ipynb
@@ -71,7 +71,7 @@
       "md5_list = []\n",
       "for filename in file_list:\n",
       "    with open(filename,'rb') as f:\n",
-      "        md5_list.append(c.store_sample(filename, f.read(), 'exe'))\n",
+      "        md5_list.append(c.store_sample(f.read(), filename, 'exe'))\n",
       "print 'Files loaded: %d' % len(md5_list)\n",
       "md5_list[:5]"
      ],

diff --git a/workbench/notebooks/PCAP_DriveBy.ipynb b/workbench/notebooks/PCAP_DriveBy.ipynb
@@ -92,7 +92,7 @@
       "# Load in the PCAP file\n",
       "filename = '../data/pcap/kitchen_boss.pcap'\n",
       "with open(filename,'rb') as f:\n",
-      "    pcap_md5 = c.store_sample(filename, f.read(), 'pcap')"
+      "    pcap_md5 = c.store_sample(f.read(), filename, 'pcap')"
      ],
      "language": "python",
      "metadata": {},

diff --git a/workbench/notebooks/PCAP_to_Dataframe.ipynb b/workbench/notebooks/PCAP_to_Dataframe.ipynb
@@ -78,7 +78,7 @@
      "input": [
       "# Load in the PCAP file\n",
       "with open('../data/pcap/gold_xxx.pcap','rb') as f:\n",
-      "    pcap_md5 = c.store_sample('gold_xxx', f.read(), 'pcap')"
+      "    pcap_md5 = c.store_sample(f.read(), 'gold_xxx', 'pcap')"
      ],
      "language": "python",
      "metadata": {},

diff --git a/workbench/notebooks/PCAP_to_Graph.ipynb b/workbench/notebooks/PCAP_to_Graph.ipynb
@@ -75,7 +75,7 @@
      "input": [
       "# Load in the PCAP file\n",
       "with open('../data/pcap/gold_xxx.pcap','rb') as f:\n",
-      "    pcap_md5 = c.store_sample('gold_xxx', f.read(), 'pcap')"
+      "    pcap_md5 = c.store_sample(f.read(), 'gold_xxx', 'pcap')"
      ],
      "language": "python",
      "metadata": {},

diff --git a/workbench/notebooks/PE_SimGraph.ipynb b/workbench/notebooks/PE_SimGraph.ipynb
@@ -67,7 +67,7 @@
       "    md5_list = []\n",
       "    for filename in file_list:\n",
       "        with open(filename,'rb') as f:\n",
-      "            md5_list.append(c.store_sample(filename, f.read(), 'exe'))\n",
+      "            md5_list.append(c.store_sample(f.read(), filename, 'exe'))\n",
       "    print 'Files loaded: %d' % len(md5_list)\n",
       "    return md5_list\n",
       "\n",

diff --git a/workbench/notebooks/PE_Static_Analysis.ipynb b/workbench/notebooks/PE_Static_Analysis.ipynb
@@ -154,7 +154,7 @@
         "\tindex_sample(md5, index_name)\n",
         "\tindex_worker_output(worker_class, md5, index_name, subfield)\n",
         "\tsearch(index_name, query)\n",
-        "\tstore_sample(filename, input_bytes, type_tag)\n",
+        "\tstore_sample(input_bytes, filename, type_tag)\n",
         "\tstore_sample_set(md5_list)\n",
         "\twork_request(worker_class, md5, subkeys=None)\n"
        ]
@@ -178,7 +178,7 @@
        "stream": "stdout",
        "text": [
         "\n",
-        " Command: store_sample(filename, input_bytes, type_tag) \n",
+        " Command: store_sample(input_bytes, filename, type_tag) \n",
         " Store a sample into the DataStore.\n",
         "            Args:\n",
         "                filename: name of the file (used purely as meta data not for lookup)\n",
@@ -290,7 +290,7 @@
       "# Okay when we load up a file, we get the md5 back\n",
       "filename = '../data/pe/bad/0cb9aa6fb9c4aa3afad7a303e21ac0f3'\n",
       "with open(filename,'rb') as f:\n",
-      "    my_md5 = c.store_sample(filename, f.read(),'exe')\n",
+      "    my_md5 = c.store_sample(f.read(), filename, 'exe')\n",
       "print my_md5"
      ],
      "language": "python",
@@ -417,7 +417,7 @@
       "working_set = []\n",
       "for filename in file_list:\n",
       "    with open(filename,'rb') as f:\n",
-      "        md5 = c.store_sample(filename, f.read(), 'exe')\n",
+      "        md5 = c.store_sample(f.read(), filename, 'exe')\n",
       "        working_set.append(md5)\n",
       "print working_set[:5]"
      ],

diff --git a/workbench/notebooks/Rekall_to_Dataframe.ipynb b/workbench/notebooks/Rekall_to_Dataframe.ipynb
@@ -85,7 +85,7 @@
      "input": [
       "# Load in the Memory Image file\n",
       "with open('../data/mem_images/exemplar4.vmem','rb') as f:\n",
-      "    mem_md5 = c.store_sample('exemplar4.vmem', f.read(), 'mem')"
+      "    mem_md5 = c.store_sample(f.read(), 'exemplar4.vmem', 'mem')"
      ],
      "language": "python",
      "metadata": {},

diff --git a/workbench/notebooks/Workbench_Demo.ipynb b/workbench/notebooks/Workbench_Demo.ipynb
@@ -30,7 +30,7 @@
       "    c = zerorpc.Client()\n",
       "    c.connect(\"tcp://127.0.0.1:4242\")\n",
       "    with open('evil.pcap','rb') as f:\n",
-      "        md5 = c.store_sample('evil.pcap', f.read())\n",
+      "        md5 = c.store_sample(f.read(), 'evil.pcap', 'pcap')\n",
       "    print c.work_request('pcap_meta', md5)\n",
       "    </pre>\n",
       "    - Output from above 'client':\n",
@@ -183,7 +183,7 @@
         "\tindex_sample(md5, index_name)\n",
         "\tindex_worker_output(worker_class, md5, index_name, subfield)\n",
         "\tsearch(index_name, query)\n",
-        "\tstore_sample(filename, input_bytes, type_tag)\n",
+        "\tstore_sample(input_bytes, filename, type_tag)\n",
         "\tstore_sample_set(md5_list)\n",
         "\twork_request(worker_class, md5, subkeys=None)\n"
        ]
@@ -207,7 +207,7 @@
        "stream": "stdout",
        "text": [
         "\n",
-        " Command: store_sample(filename, input_bytes, type_tag) \n",
+        " Command: store_sample(input_bytes, filename, type_tag) \n",
         " Store a sample into the DataStore.\n",
         "            Args:\n",
         "                filename: name of the file (used purely as meta data not for lookup)\n",
@@ -313,7 +313,7 @@
       "# Okay when we load up a file, we get the md5 back\n",
       "filename = '../data/pe/bad/0cb9aa6fb9c4aa3afad7a303e21ac0f3'\n",
       "with open(filename,'rb') as f:\n",
-      "    my_md5 = c.store_sample(filename, f.read(),'exe')\n",
+      "    my_md5 = c.store_sample(f.read(), filename, 'exe')\n",
       "print my_md5"
      ],
      "language": "python",
@@ -472,7 +472,7 @@
       "working_set = []\n",
       "for filename in file_list:\n",
       "    with open(filename,'rb') as f:\n",
-      "        md5 = c.store_sample(filename, f.read(), 'exe')\n",
+      "        md5 = c.store_sample(f.read(), filename, 'exe')\n",
       "        results = c.work_request('pe_classifier', md5)\n",
       "        working_set.append(md5)\n",
       "        print 'Results: %s' % (results)"
@@ -1016,7 +1016,7 @@
       "file_list = [os.path.join('../data/pdf/bad', child) for child in os.listdir('../data/pdf/bad')]\n",
       "for filename in file_list:\n",
       "    with open(filename,'rb') as f:\n",
-      "        md5 = c.store_sample(filename, f.read(), 'pdf')\n",
+      "        md5 = c.store_sample(f.read(), filename, 'pdf')\n",
       "        working_set.append(md5)"
      ],
      "language": "python",
@@ -1249,7 +1249,7 @@
       "results = []\n",
       "for filename in file_list:\n",
       "    with open(filename,'rb') as f:\n",
-      "        md5 = c.store_sample(os.path.basename(filename), f.read(), tag_type(filename))\n",
+      "        md5 = c.store_sample(f.read(), os.path.basename(filename), tag_type(filename))\n",
       "        results.append(c.work_request('view', md5))\n",
       "pprint.pprint(results[:5])"
      ],

diff --git a/workbench/server/workbench_server.py b/workbench/server/workbench_server.py
@@ -191,7 +191,7 @@ def combine_samples(self, md5_list, filename, type_tag):
             self.remove_sample(md5)
 
         # Store it
-        return self.store_sample(filename, total_bytes, type_tag)
+        return self.store_sample(total_bytes, filename, type_tag)
 
     def remove_sample(self, md5):
         """Remove the sample from the data store"""

diff --git a/workbench/utils/pcap_streamer.py b/workbench/utils/pcap_streamer.py
@@ -91,7 +91,7 @@ def store_file(self, filename):
         storage_name = "streaming_pcap" + str(self.pcap_index)
         print filename, storage_name
         with open(filename,'rb') as f:
-            self.workbench.store_sample(storage_name, f.read(), 'pcap')
+            self.workbench.store_sample(f.read(), storage_name, 'pcap')
         self.pcap_index += 1
 
         # Close workbench client

diff --git a/workbench/workers/json_meta.py b/workbench/workers/json_meta.py
@@ -39,7 +39,7 @@ def test():
     # Generate input for the worker
     import os
     data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/json/generated.json')
-    md5 = workbench.store_sample('unknown.json', open(data_path, 'rb').read(), 'json')
+    md5 = workbench.store_sample( open(data_path, 'rb').read(), 'unknown.json', 'json')
     input_data = workbench.get_sample(md5)
     input_data.update(workbench.work_request('meta', md5))
 

diff --git a/workbench/workers/log_meta.py b/workbench/workers/log_meta.py
@@ -31,7 +31,7 @@ def test():
     # Generate input for the worker
     import os
     data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/log/system.log')
-    md5 = workbench.store_sample('system.log', open(data_path, 'rb').read(), 'log')
+    md5 = workbench.store_sample(open(data_path, 'rb').read(), 'system.log', 'log')
     input_data = workbench.get_sample(md5)
     input_data.update(workbench.work_request('meta', md5))
 

diff --git a/workbench/workers/mem_base.py b/workbench/workers/mem_base.py
@@ -69,7 +69,7 @@ def test():
         exit(1)
 
     # Store the sample
-    md5 = c.store_sample('exemplar4.vmem', open(data_path, 'rb').read(), 'mem')
+    md5 = c.store_sample(open(data_path, 'rb').read(), 'exemplar4.vmem', 'mem')
 
     # Unit test stuff
     input_data = c.get_sample(md5)

diff --git a/workbench/workers/mem_connscan.py b/workbench/workers/mem_connscan.py
@@ -43,7 +43,7 @@ def test():
         raw_bytes = mem_file.read()
         md5 = hashlib.md5(raw_bytes).hexdigest()
         if not workbench.has_sample(md5):
-            md5 = workbench.store_sample('exemplar4.vmem', open(data_path, 'rb').read(), 'mem')
+            md5 = workbench.store_sample(open(data_path, 'rb').read(), 'exemplar4.vmem', 'mem')
 
     # Execute the worker (unit test)
     worker = MemoryImageConnScan()

diff --git a/workbench/workers/mem_dlllist.py b/workbench/workers/mem_dlllist.py
@@ -58,7 +58,7 @@ def test():
         raw_bytes = mem_file.read()
         md5 = hashlib.md5(raw_bytes).hexdigest()
         if not workbench.has_sample(md5):
-            md5 = workbench.store_sample('exemplar4.vmem', open(data_path, 'rb').read(), 'mem')
+            md5 = workbench.store_sample(open(data_path, 'rb').read(), 'exemplar4.vmem', 'mem')
 
     # Execute the worker (unit test)
     worker = MemoryImageDllList()