# Machine Learning APIs

## Classify Text into Categories with the Natural Language API

### Create a bucket with the Cloud Storage JSON/REST API

In [None]:
# OAuth2 token
export OAUTH2_TOKEN=<YOUR_TOKEN>
export PROJECT_ID=<YOUR_PROJECT_ID>

# create a Cloud Storage bucket
curl -X POST --data-binary @values.json \
    -H "Authorization: Bearer $OAUTH2_TOKEN" \
    -H "Content-Type: application/json" \
    "https://www.googleapis.com/storage/v1/b?project=$PROJECT_ID"

# Upload the demo image to Cloud Storage bucket
curl -X POST --data-binary @$OBJECT \
    -H "Authorization: Bearer $OAUTH2_TOKEN" \
    -H "Content-Type: image/png" \
    "https://www.googleapis.com/upload/storage/v1/b/$BUCKET_NAME/o?uploadType=media&name=demo-image"

### API key

In [None]:
%%writefile request.json
 {
  "document":{
    "type":"PLAIN_TEXT",
    "content":"The sweater fits well and I love the material too. I’m buying one for my sister! "
  },
  "encodingType": "UTF8"
}

In [None]:
export API_KEY=<YOUR_API_KEY>

curl "https://language.googleapis.com/v1/documents:classifyText?key=${API_KEY}" \
  -s -X POST -H "Content-Type: application/json" --data-binary @request.json

curl -s -X POST -H "Content-Type: application/json" --data-binary @translation-request.json \
https://translation.googleapis.com/language/translate/v2?key=${API_KEY} \
-o translation-response.json

In [None]:
# Sentiment analysis
curl "https://language.googleapis.com/v1/documents:analyzeSentiment?key=${API_KEY}" \
  -s -X POST -H "Content-Type: application/json" --data-binary @request.json > result.json

In [None]:
# Entity sentiment analysis
curl "https://language.googleapis.com/v1/documents:analyzeEntitySentiment?key=${API_KEY}" \
  -s -X POST -H "Content-Type: application/json" --data-binary @request.json

In [None]:
# Syntactic analysis
curl "https://language.googleapis.com/v1/documents:analyzeSyntax?key=${API_KEY}" \
  -s -X POST -H "Content-Type: application/json" --data-binary @request.json

### Create a storage bucket

In [None]:
gsutil mb -p $PROJECT_ID \
-c standard    \
-l us-central1 \
gs://$PROJECT_ID

# Copy training images
gsutil -m cp -r gs://spls/gsp223/images/* gs://${BUCKET}
gsutil cp gs://spls/gsp223/data.csv gs://${BUCKET}

### Create service account

In [None]:
gcloud iam service-accounts create my-account --display-name my-account
gcloud projects add-iam-policy-binding $PROJECT --member=serviceAccount:my-account@$PROJECT.iam.gserviceaccount.com --role=roles/bigquery.admin
gcloud iam service-accounts keys create key.json --iam-account=my-account@$PROJECT.iam.gserviceaccount.com
export GOOGLE_APPLICATION_CREDENTIALS=key.json

### Google Cloud Python client library

In [None]:
from google.cloud import storage, language, bigquery
# Set up our GCS, NL, and BigQuery clients
storage_client = storage.Client()
nl_client = language.LanguageServiceClient()
# TODO: replace YOUR_PROJECT with your project name below
bq_client = bigquery.Client(project='YOUR_PROJECT')
dataset_ref = bq_client.dataset('news_classification_dataset')
dataset = bigquery.Dataset(dataset_ref)
table_ref = dataset.table('article_data')
table = bq_client.get_table(table_ref)
# Send article text to the NL API's classifyText method
def classify_text(article):
        response = nl_client.classify_text(
                document=language.Document(
                        content=article,
                        type_=language.Document.Type.PLAIN_TEXT
                )
        )
        return response
rows_for_bq = []
files = storage_client.bucket('qwiklabs-test-bucket-gsp063').list_blobs()
print("Got article files from GCS, sending them to the NL API (this will take ~2 minutes)...")
# Send files to the NL API and save the result to send to BigQuery
for file in files:
        if file.name.endswith('txt'):
                article_text = file.download_as_bytes()
                nl_response = classify_text(article_text)
                if len(nl_response.categories) > 0:
                        rows_for_bq.append((str(article_text), nl_response.categories[0].name, nl_response.categories[0].confidence))
print("Writing NL API article data to BigQuery...")
# Write article text + category data to BQ
errors = bq_client.insert_rows(table, rows_for_bq)
assert errors == []

## Cloud Vision API from a Kubernetes Cluster

### Create a Kubernetes Engine cluster

In [None]:
gcloud config set compute/zone us-central1-a

gcloud container clusters create awwvision \
    --num-nodes 2 \
    --scopes cloud-platform

# container's credentials
gcloud container clusters get-credentials awwvision

kubectl cluster-info

### Python virtual environment

In [None]:
# Install
sudo pip3 install -U pip
sudo pip3 install --upgrade virtualenv

# Create a virtual environment
virtualenv -p python3 venv

# Activate the virtual environment.
source venv/bin/activate

## Image text detection, translation

### Configure a service account to access the Machine Learning APIs, BigQuery, and Cloud Storage

In [None]:
export PROJECT=qwiklabs-gcp-01-b8eb84d4a4e2

gcloud iam service-accounts create my-account --display-name my-account

gcloud projects add-iam-policy-binding $PROJECT \
--member=serviceAccount:my-account@$PROJECT.iam.gserviceaccount.com \
--role=roles/bigquery.admin

gcloud projects add-iam-policy-binding $PROJECT \
--member=serviceAccount:my-account@$PROJECT.iam.gserviceaccount.com \
--role=roles/storage.objectAdmin

### Create and download a credential file for your Service Account

In [None]:
gcloud iam service-accounts keys create key.json \
--iam-account=my-account@$PROJECT.iam.gserviceaccount.com

export GOOGLE_APPLICATION_CREDENTIALS=key.json

### Modify the Python script to extract text from image files

In [None]:
gsutil cp gs://$PROJECT/analyze-images.py .

In [None]:
%%file analyze-images.py
# Dataset: image_classification_dataset
# Table name: image_text_detail
import os
import sys

# Import Google Cloud Library modules
from google.cloud import storage, bigquery, language, vision, translate_v2

if ('GOOGLE_APPLICATION_CREDENTIALS' in os.environ):
    if (not os.path.exists(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])):
        print ("The GOOGLE_APPLICATION_CREDENTIALS file does not exist.\n")
        exit()
else:
    print ("The GOOGLE_APPLICATION_CREDENTIALS environment variable is not defined.\n")
    exit()

if len(sys.argv)<3:
    print('You must provide parameters for the Google Cloud project ID and Storage bucket')
    print ('python3 '+sys.argv[0]+ '[PROJECT_NAME] [BUCKET_NAME]')
    exit()

project_name = sys.argv[1]
bucket_name = sys.argv[2]

# Set up our GCS, BigQuery, and Natural Language clients
storage_client = storage.Client()
bq_client = bigquery.Client(project=project_name)
nl_client = language.LanguageServiceClient()

# Set up client objects for the vision and translate_v2 API Libraries
vision_client = vision.ImageAnnotatorClient()
translate_client = translate_v2.Client()

# Setup the BigQuery dataset and table objects
dataset_ref = bq_client.dataset('image_classification_dataset')
dataset = bigquery.Dataset(dataset_ref)
table_ref = dataset.table('image_text_detail')
table = bq_client.get_table(table_ref)

# Create an array to store results data to be inserted into the BigQuery table
rows_for_bq = []

# Get a list of the files in the Cloud Storage Bucket
files = storage_client.bucket(bucket_name).list_blobs()
bucket = storage_client.bucket(bucket_name)
print('Processing image files from GCS. This will take a few minutes..')

# Process files from Cloud Storage and save the result to send to BigQuery
for file in files:
    if file.name.endswith('jpg') or file.name.endswith('png'):
        file_content = file.download_as_string()

        # TBD: Create a Vision API image object called image_object
        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/types.html#google.cloud.vision_v1.types.Image
        image_object = vision.Image(content=file_content)
        
        # TBD: Detect text in the image and save the response data into an object called response
        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/api.html#google.cloud.vision_v1.ImageAnnotatorClient.document_text_detection
        response = vision_client.text_detection(image=image_object)
        
        # Save the text content found by the vision API into a variable called text_data
        text_data = response.text_annotations[0].description

        # Save the text detection response data in <filename>.txt to cloud storage
        file_name = file.name.split('.')[0] + '.txt'
        blob = bucket.blob(file_name)
        # Upload the contents of the text_data string variable to the Cloud Storage file
        blob.upload_from_string(text_data, content_type='text/plain')
        
        # Extract the description and locale data from the response file
        # into variables called desc and locale
        # using response object properties e.g. response.text_annotations[0].description
        desc = response.text_annotations[0].description
        locale = response.text_annotations[0].locale
        
        # if the locale is English (en) save the description as the translated_txt
        if locale == 'en':
            translated_text = desc
        else:
            # TBD: For non EN locales pass the description data to the translation API
            # ref: https://googleapis.dev/python/translation/latest/client.html#google.cloud.translate_v2.client.Client.translate
            # Set the target_language locale to 'en')
            translation = translate_client.translate(desc, target_language='en')
            translated_text = translation['translatedText']
        print(translated_text)
        
        # if there is response data save the original text read from the image,
        # the locale, translated text, and filename
        if len(response.text_annotations) > 0:
            rows_for_bq.append((desc, locale, translated_text, file.name))

print('Writing Vision API image data to BigQuery...')
# Write original text, locale and translated text to BQ
# TBD: When the script is working uncomment the next line to upload results to BigQuery
errors = bq_client.insert_rows(table, rows_for_bq)
assert errors == []

In [None]:
python3 analyze-images.py $DEVSHELL_PROJECT_ID $DEVSHELL_PROJECT_ID

In [None]:
SELECT 
locale, 
COUNT(locale) as lcount 
FROM image_classification_dataset.image_text_detail 
GROUP BY locale 
ORDER BY lcount DESC

## Training with pre-built ML models using AutoML Vision

In [None]:
# Activate cloud shell
%%bash
gcloud auth list
gcloud config list project

# Upload training images to Google Cloud Storage
gsutil mb -p $DEVSHELL_PROJECT_ID \
    -c regional \
    -l us-central1 \
    gs://$DEVSHELL_PROJECT_ID-vcm/

gsutil -m cp -r gs://cloud-training/automl-lab-clouds/* gs://$DEVSHELL_PROJECT_ID-vcm/
gsutil ls gs://$DEVSHELL_PROJECT_ID-vcm/

In [None]:
# Create a dataset
# Create a CSV file where each row contains a URL to a training image 
# and the associated label for that image.
%%bash
gsutil cp gs://cloud-training/automl-lab-clouds/data.csv .
head --lines=10 data.csv
sed -i -e "s/placeholder/$DEVSHELL_PROJECT_ID-vcm/g" ./data.csv
head --lines=10 data.csv
gsutil cp ./data.csv gs://$DEVSHELL_PROJECT_ID-vcm/
gsutil ls gs://$DEVSHELL_PROJECT_ID-vcm/*