GoogleCloudPlatform · elibixby · Jun 19, 2015 · Jun 19, 2015 · Jun 19, 2015 · theacodes
diff --git a/bigquery/samples/export_data_to_cloud_storage.py b/bigquery/samples/export_data_to_cloud_storage.py
@@ -21,6 +21,15 @@
 def export_table(service, cloud_storage_path,
                  projectId, datasetId, tableId,
                  num_retries=5):
+    """
+    service: initialized and authorized bigquery
+    google-api-client object,
+    cloud_storage_path: fully qualified
+    path to a Google Cloud Storage location,
+    e.g. gs://mybucket/myfolder/
+    returns: an extract job resource representing the
+    job, see https://cloud.google.com/bigquery/docs/reference/v2/jobs
+    """
     # Generate a unique job_id so retries
     # don't accidentally duplicate export
     job_data = {

diff --git a/bigquery/samples/load_data_by_post.py b/bigquery/samples/load_data_by_post.py
@@ -22,6 +22,13 @@
 
 # [START make_post]
 def make_post(http, schema, data, projectId, datasetId, tableId):
+    """
+    http: an authorized httplib2 client,
+    schema: a valid bigquery schema,
+    see https://cloud.google.com/bigquery/docs/reference/v2/tables,
+    data: valid JSON to insert into the table
+    returns: an http.request object
+    """
     url = ('https://www.googleapis.com/upload/bigquery/v2/projects/' +
            projectId + '/jobs')
     # Create the body of the request, separated by a boundary of xxx

diff --git a/bigquery/samples/load_data_from_csv.py b/bigquery/samples/load_data_from_csv.py
@@ -20,6 +20,17 @@
 # [START load_table]
 def load_table(service, source_schema, source_csv,
                projectId, datasetId, tableId, num_retries=5):
+    """
+    service: an initialized and authorized bigquery
+    google-api-client object
+    source_schema: a valid bigquery schema,
+    see https://cloud.google.com/bigquery/docs/reference/v2/tables
+    source_csv: the fully qualified Google Cloud Storage location of
+    the data to load into your table
+    returns: a bigquery load job, see 
+    https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load
+    """
+
     # Generate a unique job_id so retries
     # don't accidentally duplicate query
     job_data = {

diff --git a/bigquery/samples/utils.py b/bigquery/samples/utils.py
@@ -15,8 +15,11 @@
 
 # [START get_service]
 def get_service():
+    """returns an initialized and authorized bigquery client"""
+
     from googleapiclient.discovery import build
     from oauth2client.client import GoogleCredentials
+
     credentials = GoogleCredentials.get_application_default()
     if credentials.create_scoped_required():
         credentials = credentials.create_scoped(
@@ -27,6 +30,8 @@ def get_service():
 
 # [START poll_job]
 def poll_job(service, projectId, jobId, interval=5, num_retries=5):
+    """checks the status of a job every *interval* seconds"""
+
     import time
 
     job_get = service.jobs().get(projectId=projectId, jobId=jobId)
@@ -44,6 +49,8 @@ def poll_job(service, projectId, jobId, interval=5, num_retries=5):
 
 # [START paging]
 def paging(service, request_func, num_retries=5, **kwargs):
+    """pages though the results of an asynchronous job"""
+
     has_next = True
     while has_next:
         response = request_func(**kwargs).execute(num_retries=num_retries)