# Google Cloud Connection

Setting Project id

In [None]:
PROJECT_ID = 'bdcc-project1-417811'

Authenticating in Google Cloud

In [None]:
from google.colab import auth
auth.authenticate_user()
!gcloud config set project {PROJECT_ID}

Updated property [core/project].


Bucket creation

In [None]:
# !gsutil mb gs://{PROJECT_ID} commented since bucket was already created

# BigQuery Client

In [None]:
import google.cloud.bigquery as bq

client = bq.Client(project=PROJECT_ID)

# Data Preparation


Initial calling of libraries and files from bucket

In [None]:
import pandas as pd
import time

!gsutil cp gs://{PROJECT_ID}/classes.csv .
!gsutil cp gs://{PROJECT_ID}/relations.csv .
!gsutil cp gs://{PROJECT_ID}/image-labels.csv .

Copying gs://bdcc-project1-417811/classes.csv...
/ [0 files][    0.0 B/ 11.8 KiB]                                                / [1 files][ 11.8 KiB/ 11.8 KiB]                                                
Operation completed over 1 objects/11.8 KiB.                                     
Copying gs://bdcc-project1-417811/relations.csv...
/ [1 files][113.5 KiB/113.5 KiB]                                                
Operation completed over 1 objects/113.5 KiB.                                    
Copying gs://bdcc-project1-417811/image-labels.csv...
/ [1 files][ 10.9 MiB/ 10.9 MiB]                                                
Operation completed over 1 objects/10.9 MiB.                                     


Extraction of data from csv files (classes.csv, relations.csv, image-labels.csv)

In [None]:
classes = pd.read_csv("classes.csv")
relations = pd.read_csv("relations.csv")
imageLabel = pd.read_csv("image-labels.csv")

classes

Unnamed: 0,Label,Description
0,/m/011k07,Tortoise
1,/m/011q46kg,Container
2,/m/012074,Magpie
3,/m/0120dh,Sea turtle
4,/m/01226z,Football
...,...,...
596,/m/0qmmr,Wheelchair
597,/m/0wdt60w,Rugby ball
598,/m/0xfy,Armadillo
599,/m/0xzly,Maracas


Creation of Images Dataset in BigQuery

In [None]:
dataset = client.create_dataset('openimages', exists_ok=True)

Creation of corresponding classes in BigQuery

Classes table

In [None]:
table_name = PROJECT_ID + '.openimages.classes'
print('Creating table ' + table_name)

# Deletion of the table in case you're running this for the second time
client.delete_table(table_name, not_found_ok=True)

# Creation of classesTable
classesTable = bq.Table(table_name)
classesTable.schema = (
        bq.SchemaField('Label',       'STRING'),
        bq.SchemaField('Description', 'STRING')
)
client.create_table(classesTable)

Creating table bdcc-project1-417811.openimages.classes


Table(TableReference(DatasetReference('bdcc-project1-417811', 'openimages'), 'classes'))

In [None]:
print('Loading data into ' + table_name)
load_job = client.load_table_from_dataframe(classes, classesTable)

while load_job.running():
  print('waiting for the load job to complete')
  time.sleep(1)

if load_job.errors == None:
  print('Load complete!')
else:
  print(load_job.errors)

Loading data into bdcc-project1-417811.openimages.classes
waiting for the load job to complete
waiting for the load job to complete
Load complete!


Relations table

In [None]:
table_name = PROJECT_ID + '.openimages.relations'
print('Creating table ' + table_name)

# Deletion of the table in case you're running this for the second time
client.delete_table(table_name, not_found_ok=True)

# Creation of relationsTable
relationsTable = bq.Table(table_name)
relationsTable.schema = (
        bq.SchemaField('ImageId',  'STRING'),
        bq.SchemaField('Label1',   'STRING'),
        bq.SchemaField('Relation', 'STRING'),
        bq.SchemaField('Label2',   'STRING')
)
client.create_table(relationsTable)

Creating table bdcc-project1-417811.openimages.relations


Table(TableReference(DatasetReference('bdcc-project1-417811', 'openimages'), 'relations'))

In [None]:
print('Loading data into ' + table_name)
load_job = client.load_table_from_dataframe(relations, relationsTable)

while load_job.running():
  print('waiting for the load job to complete')
  time.sleep(1)

if load_job.errors == None:
  print('Load complete!')
else:
  print(load_job.errors)

Loading data into bdcc-project1-417811.openimages.relations
waiting for the load job to complete
waiting for the load job to complete
Load complete!


Image Labels Table

In [None]:
table_name = PROJECT_ID + '.openimages.image_labels'
print('Creating table ' + table_name)

# Deletion of the table in case you're running this for the second time
client.delete_table(table_name, not_found_ok=True)

# Creation of imageLabelTable
imageLabelTable = bq.Table(table_name)
imageLabelTable.schema = (
        bq.SchemaField('ImageId',  'STRING'),
        bq.SchemaField('Label',   'STRING')
)
client.create_table(imageLabelTable)

Creating table bdcc-project1-417811.openimages.image_labels


Table(TableReference(DatasetReference('bdcc-project1-417811', 'openimages'), 'image_labels'))

Upload data onto BigQuery

In [None]:
print('Loading data into ' + table_name)
load_job = client.load_table_from_dataframe(imageLabel, imageLabelTable)

while load_job.running():
  print('waiting for the load job to complete')
  time.sleep(1)

if load_job.errors == None:
  print('Load complete!')
else:
  print(load_job.errors)

Loading data into bdcc-project1-417811.openimages.image_labels
waiting for the load job to complete
waiting for the load job to complete
waiting for the load job to complete
Load complete!
