initial commit

GoogleCloudPlatform · Dec 5, 2012 · 35af59d · 35af59d
1 parent 5ec76b6
commit 35af59d
Show file tree

Hide file tree

Showing 3 changed files with 266 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1,4 +1,44 @@
 storage-file-tranfer-json-python
 ================================
 
-Uploads and downloads files between Google Cloud Storage and the local filesystem using the Google Python Client Library.
+Uploads and downloads files between Google Cloud Storage and the local filesystem using the Google Python Client Library.Uploads and downloads files between Google Cloud Storage and the local
+filesystem using the Google APIs Python Client Library.
+
+api: storage
+keywords: cmdline, media, oauth2
+
+chunked_transfer.py
+SETUP:
+  1. Download chunked_transfer.py and client_secrets.json to the same directory.
+  2. As of August 2012, the Google APIs interface to Google Cloud Storage
+  (a.k.a. the GCS JSON API) is in Limited Preview, so users must request access
+  from the API Console Services tab:
+      <https://code.google.com/apis/console/#:services>
+  3. Visit the API Console Access tab to create a client ID and secret for an
+  installed application:
+    <https://code.google.com/apis/console/#:access>
+  4. Add your client ID and secret to your client_secrets.json file.
+  5. The first time the script runs, it will open a browser window asking for
+  permission to access Google Cloud Storage on your behalf. After you've
+  granted permission, the script creates a credentials.json file, which stores
+  the access and refresh tokens acquired from the OAuth flow.
+
+USAGE:
+  This script uploads and downloads files in chunks between Google Cloud
+  Storage and your local filesystem. It accepts two arguments: a file path and
+  a Cloud Storage resource name in "gs://<bucket>/<object>" format. The order
+  of these arguments dictates whether to upload or download (source first,
+  destination second).
+
+  For example, this command uploads filename to bucket 'foo' with object name
+  'bar':
+
+    $ python chunked_transfer.py Desktop/file_1 gs://foo/bar
+
+  and this command downloads that object back to a file on the desktop:
+
+    $ python chunked_transfer.py gs://foo/bar Desktop/file_2
+
+  When an upload concludes, the script prints the new object's JSON
+  representation.
+
diff --git a/chunked_transfer.py b/chunked_transfer.py
@@ -0,0 +1,216 @@
+# Copyright 2012 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Uploads or downloads files between Google Cloud Storage and the filesystem.
+
+The file is transfered in CHUNKSIZE pieces, and the process can resume in case
+of some failures.
+
+Usage examples:
+  $ python chunked_transfer.py gs://bucket/object ~/Desktop/filename
+  $ python chunked_transfer.py ~/Desktop/filename gs://bucket/object
+
+"""
+
+import httplib2
+import os
+import random
+import sys
+import time
+
+from apiclient.discovery import build as discovery_build
+from apiclient.errors import HttpError
+from apiclient.http import MediaFileUpload
+from apiclient.http import MediaIoBaseDownload
+from json import dumps as json_dumps
+from oauth2client.client import flow_from_clientsecrets
+from oauth2client.file import Storage as CredentialStorage
+from oauth2client.tools import run as run_oauth2
+
+
+# CLIENT_SECRETS_FILE, name of a file containing the OAuth 2.0 information for
+# this application, including client_id and client_secret. You can acquire an
+# ID/secret pair from the API Access tab on the Google APIs Console
+#   <http://code.google.com/apis/console#access>
+# For more information about using OAuth2 to access Google APIs, please visit:
+#   <https://developers.google.com/accounts/docs/OAuth2>
+CLIENT_SECRETS_FILE = 'client_secrets.json'
+
+# File where we will store authentication credentials after acquiring them.
+CREDENTIALS_FILE = 'credentials.json'
+
+# Message describing how to use the script.
+USAGE = """
+Usage examples:
+  $ python chunked_transfer.py gs://bucket/object ~/Desktop/filename
+  $ python chunked_transfer.py ~/Desktop/filename gs://bucket/object
+
+"""
+
+RW_SCOPE = 'https://www.googleapis.com/auth/devstorage.read_write'
+RO_SCOPE = 'https://www.googleapis.com/auth/devstorage.read_only'
+
+# Helpful message to display if the CLIENT_SECRETS_FILE is missing.
+MISSING_CLIENT_SECRETS_MESSAGE = """
+WARNING: Please configure OAuth 2.0
+
+To make this sample run you will need to populate the client_secrets.json file
+found at:
+
+   %s
+
+with information from the APIs Console
+<https://code.google.com/apis/console#access>.
+
+""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
+                                   CLIENT_SECRETS_FILE))
+
+# Retry transport and file IO errors.
+RETRYABLE_ERRORS = (httplib2.HttpLib2Error, IOError)
+
+# A collection of non-recoverable status codes.
+NON_RETRYABLE_CODES = (500, 502, 503, 504)
+
+# Number of bytes to send/receive in each request.
+CHUNKSIZE = 2 * 1024 * 1024
+
+# Mimetype to use if one can't be guessed from the file extension.
+DEFAULT_MIMETYPE = 'application/octet-stream'
+
+
+def get_authenticated_service(scope):
+  print 'Authenticating...'
+  flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE, scope=scope,
+                                 message=MISSING_CLIENT_SECRETS_MESSAGE)
+
+  credential_storage = CredentialStorage('credentials.json')
+  credentials = credential_storage.get()
+  if credentials is None or credentials.invalid:
+    credentials = run_oauth2(flow, credential_storage)
+
+  print 'Constructing Google Cloud Storage service...'
+  http = credentials.authorize(httplib2.Http())
+  return discovery_build('storage', 'v1beta1', http=http)
+
+
+def handle_progressless_iter(error, progressless_iters, num_retries):
+  if progressless_iters > num_retries:
+    print 'Failed to make progress for too many consecutive iterations.'
+    raise error
+
+  sleeptime = random.random() * (2**progressless_iters)
+  print ('Caught exception (%s). Sleeping for %s seconds before retry #%d.'
+         % (str(error), sleeptime, progressless_iters))
+  time.sleep(sleeptime)
+
+
+def print_with_carriage_return(s):
+  sys.stdout.write('\r' + s)
+  sys.stdout.flush()
+
+
+def upload(argv):
+  filename = argv[1]
+  bucket_name, object_name = argv[2][5:].split('/', 1)
+
+  service = get_authenticated_service(RW_SCOPE)
+
+  print 'Building upload request...'
+  media = MediaFileUpload(filename, chunksize=CHUNKSIZE, resumable=True)
+  if not media.mimetype():
+    media = MediaFileUpload(filename, DEFAULT_MIMETYPE, resumable=True)
+  request = service.objects().insert(bucket=bucket_name, name=object_name,
+                                     media_body=media)
+
+  print 'Uploading file: %s to bucket: %s object: %s ' % (filename, bucket_name,
+                                                          object_name)
+
+  progressless_iters = 0
+  response = None
+  while response is None:
+    try:
+      error = None
+      progress, response = request.next_chunk()
+      if progress:
+        print_with_carriage_return('Upload %d%%' % (100 * progress.progress()))
+      progressless_iters = 0
+    except HttpError, err:
+      error = err
+      if err.resp.status not in NON_RETRYABLE_CODES:
+        raise
+    except RETRYABLE_ERRORS, err:
+      error = err
+
+    if error is not None:
+      progressless_iters += 1
+      handle_progressless_iter(error, progressless_iters, num_retries)
+    else:
+      progressless_iters = 0
+  print '\nUpload complete!'
+
+  print 'Uploaded Object:'
+  print json_dumps(response, indent=2)
+
+
+def download(argv):
+  bucket_name, object_name = argv[1][5:].split('/', 1)
+  filename = argv[2]
+
+  service = get_authenticated_service(RO_SCOPE)
+
+  print 'Building download request...'
+  f = file(filename, 'w')
+  request = service.objects().get_media(bucket=bucket_name,
+                                        object=object_name)
+  media = MediaIoBaseDownload(f, request, chunksize=CHUNKSIZE)
+
+  print 'Downloading bucket: %s object: %s to file: %s' % (bucket_name,
+                                                           object_name,
+                                                           filename)
+
+  progressless_iters = 0
+  done = False
+  while not done:
+    try:
+      error = None
+      progress, done = media.next_chunk()
+      if progress:
+        print_with_carriage_return('Download %d%%.'
+                                % int(progress.progress() * 100))
+    except HttpError, err:
+      error = err
+      if err.resp.status not in NON_RETRYABLE_CODES:
+        raise
+    except RETRYABLE_ERRORS, err:
+      error = err
+
+    if error is not None:
+      progressless_iters += 1
+      handle_progressless_iter(error, progressless_iters, num_retries)
+    else:
+      progressless_iters = 0
+
+  print '\nDownload complete!'
+
+
+if __name__ == '__main__':
+  if len(sys.argv) < 3:
+    print 'Too few arguments.'
+    print USAGE
+  if sys.argv[2].startswith('gs://'):
+    upload(sys.argv)
+  elif sys.argv[1].startswith('gs://'):
+    download(sys.argv)
+  else:
+    print USAGE
diff --git a/client_secrets.json b/client_secrets.json
@@ -0,0 +1,9 @@
+{
+  "installed": {
+    "client_id": "INSERT CLIENT ID HERE",
+    "client_secret": "INSERT CLIENT SECRET HERE",
+    "redirect_uris": [],
+    "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+    "token_uri": "https://accounts.google.com/o/oauth2/token"
+  }
+}