### Pre-requisite Steps

Preparation
0. Install python required libraries
1. Create data directory
2. Download credit card fraud detection dataset to local filesystem
3. Create google cloud storage bucket for project
4. Create google bigquery dataset
5. Upload credit card fraud detection dataset to bigquery dataset with auto detect schema load table
6. Create Dataflow to read data from bigquery and not to do any preprocessing and write into cloud storage bucket

#### Install required pre-requisite libraries

In [None]:
!pip install google-cloud-storage

In [None]:
!mkdir -p data

#### Import required libraries

In [None]:
import pandas as pd
from google.cloud import storage
from google.cloud import bigquery

In [None]:
# Set global variables
PROJECT = 'google-cloud-project'
BUCKET = 'google-cloud-bucket-name'
DATASET = 'credit_card_fraud_detection'
TABLE = 'creditcard'
URI = 'https://storage.googleapis.com/advanced-solutions-lab/fraud/creditcard.csv'
GS_URI = 'gs://{}/credit-card-fraud-detection/creditcard-schema.csv'.format(BUCKET)

In [None]:
import os
os.environ['PROJECT'] = PROJECT
os.environ['BUCKET'] = BUCKET
os.environ['DATASET'] = DATASET
os.environ['TABLE'] = TABLE
os.environ['URI'] = URI
os.environ['GS_URI'] = GS_URI

In [None]:
def create_storage_bucket(bucket_name):
    """Creates a new bucket."""
    storage_client = storage.Client()
    bucket = storage_client.create_bucket(bucket_name)
    print('Bucket {} created'.format(bucket.name))

In [None]:
create_storage_bucket(BUCKET)

#### Create Bigquery dataset

In [None]:
def create_bigquery_dataset(dataset_name):
  """Creates a new bigquery dataset."""
  # Get Client object
  bigquery_client = bigquery.Client()
  # create dataset reference object
  dataset_ref = bigquery_client.dataset(dataset_name)
  # Construct a full Dataset object to send to the API.
  dataset = bigquery.Dataset(dataset_ref)
  # Specify the geographic location where the dataset should reside.
  dataset.location = 'US'
  # Send the dataset to the API for creation.
  # Raises google.api_core.exceptions.AlreadyExists if the Dataset already
  # exists within the project.
  dataset = bigquery_client.create_dataset(dataset)
  print('Bigquery dataset {} created'.format(dataset_name))

In [None]:
create_bigquery_dataset(DATASET)

#### Download credit card fraud detection dataset

In [None]:
!wget $URI -O ./data/creditcard-download.csv

In [None]:
!head $PWD/data/creditcard-download.csv

In [None]:
df = pd.read_csv("./data/creditcard-download.csv", sep=",")

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.to_csv('./data/creditcard-schema.csv', sep=',', encoding='utf-8', index=False)

In [None]:
!head $PWD/data/creditcard-schema.csv

In [None]:
%bash
gsutil -m rm -rf gs://${BUCKET}/credit-card-fraud-detection/*
gsutil -m cp data/*.csv gs://${BUCKET}/credit-card-fraud-detection/

In [None]:
def load_bigquery_table_from_uri(dataset_name, table_name, uri):
  """Loads data into bigquery table from uri source."""
  # Get Client object
  bigquery_client = bigquery.Client()
  
  dataset_ref = bigquery_client.dataset(dataset_name)
  job_config = bigquery.LoadJobConfig()
  job_config.autodetect = True
  job_config.skip_leading_rows = 1
  # The source format defaults to CSV, so the line below is optional.
  job_config.source_format = bigquery.SourceFormat.CSV

  load_job = bigquery_client.load_table_from_uri(
      uri,
      dataset_ref.table(table_name),
      job_config=job_config)  # API request
  print('Starting job {}'.format(load_job.job_id))

  load_job.result()  # Waits for table load to complete.
  print('Job finished.')

  destination_table = bigquery_client.get_table(dataset_ref.table(table_name))
  print('Loaded {} rows.'.format(destination_table.num_rows))

In [None]:
load_bigquery_table_from_uri(DATASET, TABLE, GS_URI)

<pre>
# Copyright 2018 Atos. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
</pre>