Skip to content
This repository has been archived by the owner on Sep 22, 2022. It is now read-only.

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mco-gh committed Dec 5, 2012
1 parent 5ec76b6 commit 35af59d
Show file tree
Hide file tree
Showing 3 changed files with 266 additions and 1 deletion.
42 changes: 41 additions & 1 deletion README.md
@@ -1,4 +1,44 @@
storage-file-tranfer-json-python
================================

Uploads and downloads files between Google Cloud Storage and the local filesystem using the Google Python Client Library.
Uploads and downloads files between Google Cloud Storage and the local filesystem using the Google Python Client Library.Uploads and downloads files between Google Cloud Storage and the local
filesystem using the Google APIs Python Client Library.

api: storage
keywords: cmdline, media, oauth2

chunked_transfer.py
SETUP:
1. Download chunked_transfer.py and client_secrets.json to the same directory.
2. As of August 2012, the Google APIs interface to Google Cloud Storage
(a.k.a. the GCS JSON API) is in Limited Preview, so users must request access
from the API Console Services tab:
<https://code.google.com/apis/console/#:services>
3. Visit the API Console Access tab to create a client ID and secret for an
installed application:
<https://code.google.com/apis/console/#:access>
4. Add your client ID and secret to your client_secrets.json file.
5. The first time the script runs, it will open a browser window asking for
permission to access Google Cloud Storage on your behalf. After you've
granted permission, the script creates a credentials.json file, which stores
the access and refresh tokens acquired from the OAuth flow.

USAGE:
This script uploads and downloads files in chunks between Google Cloud
Storage and your local filesystem. It accepts two arguments: a file path and
a Cloud Storage resource name in "gs://<bucket>/<object>" format. The order
of these arguments dictates whether to upload or download (source first,
destination second).

For example, this command uploads filename to bucket 'foo' with object name
'bar':

$ python chunked_transfer.py Desktop/file_1 gs://foo/bar

and this command downloads that object back to a file on the desktop:

$ python chunked_transfer.py gs://foo/bar Desktop/file_2

When an upload concludes, the script prints the new object's JSON
representation.

216 changes: 216 additions & 0 deletions chunked_transfer.py
@@ -0,0 +1,216 @@
# Copyright 2012 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Uploads or downloads files between Google Cloud Storage and the filesystem.
The file is transfered in CHUNKSIZE pieces, and the process can resume in case
of some failures.
Usage examples:
$ python chunked_transfer.py gs://bucket/object ~/Desktop/filename
$ python chunked_transfer.py ~/Desktop/filename gs://bucket/object
"""

import httplib2
import os
import random
import sys
import time

from apiclient.discovery import build as discovery_build
from apiclient.errors import HttpError
from apiclient.http import MediaFileUpload
from apiclient.http import MediaIoBaseDownload
from json import dumps as json_dumps
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage as CredentialStorage
from oauth2client.tools import run as run_oauth2


# CLIENT_SECRETS_FILE, name of a file containing the OAuth 2.0 information for
# this application, including client_id and client_secret. You can acquire an
# ID/secret pair from the API Access tab on the Google APIs Console
# <http://code.google.com/apis/console#access>
# For more information about using OAuth2 to access Google APIs, please visit:
# <https://developers.google.com/accounts/docs/OAuth2>
CLIENT_SECRETS_FILE = 'client_secrets.json'

# File where we will store authentication credentials after acquiring them.
CREDENTIALS_FILE = 'credentials.json'

# Message describing how to use the script.
USAGE = """
Usage examples:
$ python chunked_transfer.py gs://bucket/object ~/Desktop/filename
$ python chunked_transfer.py ~/Desktop/filename gs://bucket/object
"""

RW_SCOPE = 'https://www.googleapis.com/auth/devstorage.read_write'
RO_SCOPE = 'https://www.googleapis.com/auth/devstorage.read_only'

# Helpful message to display if the CLIENT_SECRETS_FILE is missing.
MISSING_CLIENT_SECRETS_MESSAGE = """
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
%s
with information from the APIs Console
<https://code.google.com/apis/console#access>.
""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
CLIENT_SECRETS_FILE))

# Retry transport and file IO errors.
RETRYABLE_ERRORS = (httplib2.HttpLib2Error, IOError)

# A collection of non-recoverable status codes.
NON_RETRYABLE_CODES = (500, 502, 503, 504)

# Number of bytes to send/receive in each request.
CHUNKSIZE = 2 * 1024 * 1024

# Mimetype to use if one can't be guessed from the file extension.
DEFAULT_MIMETYPE = 'application/octet-stream'


def get_authenticated_service(scope):
print 'Authenticating...'
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE, scope=scope,
message=MISSING_CLIENT_SECRETS_MESSAGE)

credential_storage = CredentialStorage('credentials.json')
credentials = credential_storage.get()
if credentials is None or credentials.invalid:
credentials = run_oauth2(flow, credential_storage)

print 'Constructing Google Cloud Storage service...'
http = credentials.authorize(httplib2.Http())
return discovery_build('storage', 'v1beta1', http=http)


def handle_progressless_iter(error, progressless_iters, num_retries):
if progressless_iters > num_retries:
print 'Failed to make progress for too many consecutive iterations.'
raise error

sleeptime = random.random() * (2**progressless_iters)
print ('Caught exception (%s). Sleeping for %s seconds before retry #%d.'
% (str(error), sleeptime, progressless_iters))
time.sleep(sleeptime)


def print_with_carriage_return(s):
sys.stdout.write('\r' + s)
sys.stdout.flush()


def upload(argv):
filename = argv[1]
bucket_name, object_name = argv[2][5:].split('/', 1)

service = get_authenticated_service(RW_SCOPE)

print 'Building upload request...'
media = MediaFileUpload(filename, chunksize=CHUNKSIZE, resumable=True)
if not media.mimetype():
media = MediaFileUpload(filename, DEFAULT_MIMETYPE, resumable=True)
request = service.objects().insert(bucket=bucket_name, name=object_name,
media_body=media)

print 'Uploading file: %s to bucket: %s object: %s ' % (filename, bucket_name,
object_name)

progressless_iters = 0
response = None
while response is None:
try:
error = None
progress, response = request.next_chunk()
if progress:
print_with_carriage_return('Upload %d%%' % (100 * progress.progress()))
progressless_iters = 0
except HttpError, err:
error = err
if err.resp.status not in NON_RETRYABLE_CODES:
raise
except RETRYABLE_ERRORS, err:
error = err

if error is not None:
progressless_iters += 1
handle_progressless_iter(error, progressless_iters, num_retries)
else:
progressless_iters = 0
print '\nUpload complete!'

print 'Uploaded Object:'
print json_dumps(response, indent=2)


def download(argv):
bucket_name, object_name = argv[1][5:].split('/', 1)
filename = argv[2]

service = get_authenticated_service(RO_SCOPE)

print 'Building download request...'
f = file(filename, 'w')
request = service.objects().get_media(bucket=bucket_name,
object=object_name)
media = MediaIoBaseDownload(f, request, chunksize=CHUNKSIZE)

print 'Downloading bucket: %s object: %s to file: %s' % (bucket_name,
object_name,
filename)

progressless_iters = 0
done = False
while not done:
try:
error = None
progress, done = media.next_chunk()
if progress:
print_with_carriage_return('Download %d%%.'
% int(progress.progress() * 100))
except HttpError, err:
error = err
if err.resp.status not in NON_RETRYABLE_CODES:
raise
except RETRYABLE_ERRORS, err:
error = err

if error is not None:
progressless_iters += 1
handle_progressless_iter(error, progressless_iters, num_retries)
else:
progressless_iters = 0

print '\nDownload complete!'


if __name__ == '__main__':
if len(sys.argv) < 3:
print 'Too few arguments.'
print USAGE
if sys.argv[2].startswith('gs://'):
upload(sys.argv)
elif sys.argv[1].startswith('gs://'):
download(sys.argv)
else:
print USAGE
9 changes: 9 additions & 0 deletions client_secrets.json
@@ -0,0 +1,9 @@
{
"installed": {
"client_id": "INSERT CLIENT ID HERE",
"client_secret": "INSERT CLIENT SECRET HERE",
"redirect_uris": [],
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://accounts.google.com/o/oauth2/token"
}
}

0 comments on commit 35af59d

Please sign in to comment.