From 1c51a6fa9fef24e14d5e24b66aeb88bd10ce2baf Mon Sep 17 00:00:00 2001
From: John Kranz <johnk@sldomain.com>
Date: Fri, 7 Jan 2022 08:48:50 -0700
Subject: [PATCH] RMS-6798: Add MD5 example to python SDK

---
 samples/puttingDataWithMD5.py | 105 ++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 samples/puttingDataWithMD5.py

diff --git a/samples/puttingDataWithMD5.py b/samples/puttingDataWithMD5.py
new file mode 100644
index 0000000..6751b25
--- /dev/null
+++ b/samples/puttingDataWithMD5.py
@@ -0,0 +1,105 @@
+#   Copyright 2014-2022 Spectra Logic Corporation. All Rights Reserved.
+#   Licensed under the Apache License, Version 2.0 (the "License"). You may not use
+#   this file except in compliance with the License. A copy of the License is located at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#   or in the "license" file accompanying this file.
+#   This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+#   CONDITIONS OF ANY KIND, either express or implied. See the License for the
+#   specific language governing permissions and limitations under the License.
+
+from ds3 import ds3
+import os
+import time
+import base64
+import hashlib
+
+client = ds3.createClientFromEnv()
+
+bucketName = "books"
+
+# make sure the bucket that we will be sending objects to exists
+client.put_bucket(ds3.PutBucketRequest(bucketName))
+
+# create your list of objects that will be sent to DS3
+# this example assumes that these files exist on the file system
+
+fileList = ["beowulf.txt", "sherlock_holmes.txt", "tale_of_two_cities.txt", "ulysses.txt"]
+
+# this method is used to map a file path to a Ds3PutObject
+def fileNameToDs3PutObject(filePath, prefix=""):
+    size = os.stat(pathForResource(filePath)).st_size
+    return ds3.Ds3PutObject(prefix + filePath, size)
+
+# this method is used to get the os specific path for an object located in the resources folder
+def pathForResource(resourceName):
+    currentPath = os.path.dirname(str(__file__))
+    return os.path.join(currentPath, "resources", resourceName)
+
+# get the sizes for each file
+fileList = list(map(fileNameToDs3PutObject, fileList))
+
+# submit the put bulk request to DS3
+bulkResult = client.put_bulk_job_spectra_s3(ds3.PutBulkJobSpectraS3Request(bucketName, fileList))
+
+# the bulk request will split the files over several chunks if it needs to.
+# we then need to ask what chunks we can send, and then send them making
+# sure we don't resend the same chunks
+
+# create a set of the chunk ids which will be used to track
+# what chunks have not been sent
+chunkIds = set([x['ChunkId'] for x in bulkResult.result['ObjectsList']])
+
+# while we still have chunks to send
+while len(chunkIds) > 0:
+    # get a list of the available chunks that we can send
+    availableChunks = client.get_job_chunks_ready_for_client_processing_spectra_s3(
+                             ds3.GetJobChunksReadyForClientProcessingSpectraS3Request(bulkResult.result['JobId']))
+
+    chunks = availableChunks.result['ObjectsList']
+
+    # check to make sure we got some chunks, if we did not
+    # sleep and retry.  This could mean that the cache is full
+    if len(chunks) == 0:
+        time.sleep(60)
+        continue
+
+    # for each chunk that is available, check to make sure
+    # we have not sent it, and if not, send that object
+    for chunk in chunks:
+        if not chunk['ChunkId'] in chunkIds:
+            continue
+
+        chunkIds.remove(chunk['ChunkId'])
+        for obj in chunk['ObjectList']:
+            # it is possible that if we start resending a chunk, due to the program crashing, that
+            # some objects will already be in cache.  Check to make sure that they are not, and then
+            # send the object to Spectra S3
+            if obj['InCache'] == 'false':
+                localFileName = "resources/" + obj['Name']
+                objectDataStream = open(localFileName, "rb")
+                objectDataStream.seek(int(obj['Offset']), 0)
+                objectChunk = objectDataStream.read(int(obj['Length']))
+                checksum = hashlib.md5(objectChunk)
+                encodedChecksum = base64.b64encode(checksum.digest()).decode('utf-8')
+                objectDataStream.seek(int(obj['Offset']), 0)
+                client.put_object(ds3.PutObjectRequest(bucketName,
+                                                       obj['Name'],
+                                                       obj['Length'],
+                                                       objectDataStream,
+                                                       offset=int(obj['Offset']),
+                                                       job=bulkResult.result['JobId'],
+                                                       headers={"Content-MD5": encodedChecksum}))
+
+# we now verify that all our objects have been sent to DS3
+bucketResponse = client.get_bucket(ds3.GetBucketRequest(bucketName))
+
+for obj in bucketResponse.result['ContentsList']:
+    print(obj['Key'])
+
+# delete the bucket by first deleting all the objects, and then deleting the bucket
+for obj in bucketResponse.result['ContentsList']:
+    client.delete_object(ds3.DeleteObjectRequest(bucketName, obj['Key']))
+
+client.delete_bucket(ds3.DeleteBucketRequest(bucketName))