AtomicConductor · lawschlosser · Apr 10, 2018 · Apr 13, 2018 · Apr 4, 2018 · Apr 12, 2018
diff --git a/bin/conductor b/bin/conductor
@@ -255,6 +255,11 @@ def parse_args():
                                             description=uploader_parser_desciption,
                                             formatter_class=argparse.RawTextHelpFormatter)
 
+    uploader_parser.add_argument("--database_filepath",
+                                 help=("The filepath to the local md5 caching database. If no filepath "
+                                       "is specified, the database will be created in a temp directory. "
+                                       "Note that this flag is only active when --local_upload is True."))
+
     uploader_parser.add_argument("--location",
                                  help=('An optional string to indicate which location this uploader '
                                        'executable should register as. This option is only relevant '
@@ -280,10 +285,25 @@ def parse_args():
                                        "everyday, while storing the last 7 days "
                                        "of logs"))
 
+    uploader_parser.add_argument("--md5_caching",
+                                 help=("Use cached md5s. This can dramatically improve the uploading "
+                                       "times, as md5 checking can be very time consuming. Caching md5s "
+                                       "allows subsequent uploads (of the same files) to skip the "
+                                       "md5 generation process (if the files appear to not have been "
+                                       "modified since the last time they were submitted). The cache is "
+                                       "stored locally and uses a file's modification time and file size "
+                                       "to intelligently guess whether the file has changed. Set this "
+                                       "flag to False if there is concern that files may not be getting "
+                                       "re-uploaded properly.  "
+                                       "Note that this flag is only active when --local_upload is True."),
+                                 choices=[False, True],
+                                 type=cast_to_bool,
+                                 default=None)
+
     uploader_parser.add_argument("--thread_count",
                                  type=int,
                                  default=conductor.CONFIG.get("thread_count"),
-                                 help=('The number of threads that should download simultaneously'))
+                                 help=('The number of threads that should upload simultaneously'))
 
     uploader_parser.add_argument("--alt",
                                  help=('Run an alternative version of the downloader'),
@@ -427,13 +447,18 @@ def run_submit(args):
 
 
 def run_uploader(args):
+    '''
+    Run the Uploader
+    If the user has indicated to use the alternative uploader (and the system is not on Windows)
+    then run the alternative uploader. Otherwise run the standard uploader.
+    '''
     args_dict = vars(args)
-    if sys.platform == "win32":
-        uploader.run_uploader(args)
-    if args_dict.get("alt"):
-        uploader_v2.run_uploader(args)
-    else:
-        uploader.run_uploader(args)
+    use_alt = bool(args_dict.pop("alt", False))
+
+    if use_alt and sys.platform != "win32":
+        return uploader_v2.run_uploader(args)
+
+    return uploader.run_uploader(args)
 
 
 def run_downloader(args):

diff --git a/conductor/lib/api_client.py b/conductor/lib/api_client.py
@@ -1,7 +1,9 @@
+import gzip
 import json
 import logging
 import os
 import requests
+import StringIO
 import time
 import urlparse
 import jwt
@@ -57,7 +59,7 @@ def _make_request(self, verb, conductor_url, headers, params, data, raise_on_err
 
     def make_request(self, uri_path="/", headers=None, params=None, data=None,
                      verb=None, conductor_url=None, raise_on_error=True, tries=5,
-                     use_api_key=False):
+                     compress=False, use_api_key=False):
         '''
         verb: PUT, POST, GET, DELETE, HEAD, PATCH
         '''
@@ -91,6 +93,15 @@ def make_request(self, uri_path="/", headers=None, params=None, data=None,
 
         assert verb in self.http_verbs, "Invalid http verb: %s" % verb
 
+        # GZip Compress the content of the request
+        if compress:
+            headers["Content-Encoding"] = "gzip"
+            logger.debug("gzipping content...")
+            out_file = StringIO.StringIO()
+            with gzip.GzipFile(fileobj=out_file, mode="wb") as gzipper:
+                gzipper.write(data)
+            data = out_file.getvalue()
+
         # Create a retry wrapper function
         retry_wrapper = common.DecRetry(retry_exceptions=CONNECTION_EXCEPTIONS,
                                         tries=tries)

diff --git a/conductor/lib/common.py b/conductor/lib/common.py
@@ -7,6 +7,7 @@
 import multiprocessing
 import os
 import platform
+from pprint import pformat
 import random
 import signal
 import subprocess
@@ -671,3 +672,22 @@ class TmpLoader(loader):
 
     with open(filepath) as f:
         return yaml.load(f, loader)  # nosec  (ignore bandit static analysis warning for not using safe_load [B506:yaml_load] )
+
+
+def sstr(object_, char_count=1000, pretty=True):
+    '''
+    Return a string representation of the given object, shortened to the given
+    char_count. This can be useful when printing/logging out data for debugging
+    purposes, but don't want an overwhelming wall of text to scroll through.
+
+    pretty: bool. If true, will pretty print the object
+    '''
+
+    try:
+        s_str = pformat(object_) if pretty else str(object_)
+    except Exception:
+        s_str = "<object cannot be cast to string (%s)>" % type(object_)
+
+    if len(s_str) > char_count:
+        s_str = s_str[:char_count] + "...<TRUNCATED>"
+    return s_str
diff --git a/conductor/lib/conductor_submit.py b/conductor/lib/conductor_submit.py
@@ -303,7 +303,6 @@ def validate_args(self):
             if self.gpu_config.get("type") not in supported_gpu_types:
                 raise BadArgumentError("GPU type %s is not one of %s" % (self.gpu_config.get("type"), supported_gpu_types))
 
-
     def send_job(self, upload_files, upload_size):
         '''
         Construct args for two different cases:
@@ -378,6 +377,7 @@ def send_job(self, upload_files, upload_size):
         logger.info("Sending Job...")
         response, response_code = self.api_client.make_request(uri_path="jobs/",
                                                                data=json.dumps(submit_dict),
+                                                               compress=True,
                                                                raise_on_error=False,
                                                                use_api_key=True)
         if response_code not in [201, 204]: