In [None]:
import datalab.bigquery as bq
import seaborn as sns
import numpy as np
import shutil
import tensorflow as tf
import os
import pandas as pd
from pandas_gbq.gbq import GenericGBQException
import logging
print(tf.__version__)

In [None]:
%sql --module afewrecordsfordisplay
SELECT * FROM [mkt-cloudml-jumpstart:datasets.events_4_attributes_dataprepaggregated] Limit 1000

In [None]:
churn = bq.Query(afewrecordsfordisplay).to_dataframe()

In [None]:
def query_gbq(query, project_id, **bq_query_config_args):
    try:
        data = pd.read_gbq(
            query, project_id,
            configuration=get_bq_query_config(project_id, **bq_query_config_args)
        ) # 
    except GenericGBQException:
        logging.warning(
            "Generic GBQ exception detected, probably because the intermediate data table already exists,"
            "attempting to overwrite...")
        query_config = get_bq_query_config(project_id, **bq_query_config_args)
        query_config['query']["writeDisposition"] = "WRITE_TRUNCATE" # tells BQ to overwrite
        data = pd.read_gbq(query, project_id, configuration=query_config)
        
    return data

def get_bq_query_config(
    projectId, allowLargeResults=True, datasetId='query_intermediate_data', tableId='intermediate'
):
    return {"query": {
        "allowLargeResults": allowLargeResults,
        'destinationTable': {
            'projectId': projectId,
            'datasetId': datasetId,
            'tableId': tableId
        }
    }}

churn = query_gbq(
    "SELECT * FROM datasets.events_4_attributes_dataprepaggregated LIMIT 100000000", 
    "mkt-cloudml-jumpstart")

In [None]:
churn.head()

In [None]:
ax = sns.regplot(x="ts_absoluteday", y="sum_column_0", ci=None, truncate=True, data=churn)

In [None]:
churn.describe()

In [None]:
%bash
gsutil mb gs://cloud_ml_test_dh #change to your own

In [None]:
PROJECT = 'mkt-cloudml-jumpstart' #change to your own
BUCKET = 'cloud_ml_test_dh' #change to your own
REGION = 'europe-west1' 

In [None]:
REPO = "/content/datalab/testing_ml_engine" #change to your own
os.listdir(REPO)

In [None]:
# for bash
os.environ['PROJECT'] = PROJECT
os.environ['BUCKET'] = BUCKET
os.environ['REGION'] = REGION
os.environ['REPO'] = REPO

In [None]:
%bash
gcloud config set project $PROJECT
gcloud config set compute/region $REGION

In [None]:
%bash
PROJECT_ID=$PROJECT
AUTH_TOKEN=$(gcloud auth print-access-token)
SVC_ACCOUNT=$(curl -X GET -H "Content-Type: application/json" \
    -H "Authorization: Bearer $AUTH_TOKEN" \
    https://ml.googleapis.com/v1/projects/${PROJECT_ID}:getConfig \
    | python -c "import json; import sys; response = json.load(sys.stdin); \
    print response['serviceAccount']")

echo "Authorizing the Cloud ML Service account $SVC_ACCOUNT to access files in $BUCKET"
gsutil -m defacl ch -u $SVC_ACCOUNT:R gs://$BUCKET
gsutil -m acl ch -u $SVC_ACCOUNT:R -r gs://$BUCKET  # error message (if bucket is empty) can be ignored
gsutil -m acl ch -u $SVC_ACCOUNT:W gs://$BUCKET

In [None]:
churn.to_csv('churn.csv', sep=',')

In [None]:
%bash
#rm -rf $REPO
head -1 $REPO/churn.csv

In [None]:
%bash
echo $BUCKET
gsutil -m cp ${REPO}/churn.csv gs://${BUCKET}/