# Guide Google Cloud Platflorm API

Afin d'automatiser certain processus sur la platform Google Cloud Platform, il est possible d'utiliser l'API avec Pyhton ou d'autres languages.

## BigQuery

BigQuery est un outil de GCP permettant de stocker un d'interoger des données avec une très grande rapidité. Tout comme la plupard des produits GCP, il est possible d'interagir avec l'outil grace à l'API.

#### Importer les resultats d'une requète dans un DataFrame

In [0]:
from google.cloud import bigquery
client = bigquery.Client()

sql = (
    "SELECT * FROM `Project_id.dataset_id.Table_id` LIMIT 10"
)

data = client.query(sql).to_dataframe()

print(data.shape)

(10, 28)


##### Autre méthode

In [0]:
%%bigquery df
SELECT 
    *
FROM `Project_id.dataset_id.Table_id`
LIMIT 10

#### Création d'une table BigQuery

In [0]:
from google.cloud import bigquery
client = bigquery.Client()

schema = [
    bigquery.SchemaField("nom", "string", mode="NULLABLE"),
    bigquery.SchemaField("prenom", "string", mode="NULLABLE"),
    bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
]

table_id = "Project_id.dataset_id.Table_id"

table = bigquery.Table(table_id, schema=schema)

client.create_table(table)

#### Créer une table à partir des résultats d'une requète

In [0]:
from google.cloud import bigquery
client = bigquery.Client()

table_id = "Project_id.dataset_id.Table_id"
table_ref = bigquery.Table(table_id)

job_config = bigquery.QueryJobConfig()
job_config.destination = table_ref

sql = """
    SELECT *
    FROM `Project_id.dataset_id.Table_id`
    LIMIT 10
"""

query_job = client.query(
    sql,
    location='EU',
    job_config=job_config)

query_job.result()

<google.cloud.bigquery.table.RowIterator at 0x12e89fd0>

#### Obtenir des informations sur une tables

In [0]:
from google.cloud import bigquery
client = bigquery.Client()


table_id = `Project_id.dataset_id.Table_id`

table = client.get_table(table_id)

## (project,dataset_id,table_id,schema,description,num_rows,expires,labels,num_bytes...)
print("Nn Ligne : {}".format(table.num_rows))

Nn Ligne : 42467785


#### Inserer des données dans une table BigQuery

In [0]:
from google.cloud import bigquery
client = bigquery.Client()

table_id = `Project_id.dataset_id.Table_id`
table = client.get_table(table_id)

rows_to_insert = [
    ("Nom","Prenom", 23)
]

client.insert_rows(table, rows_to_insert)

[]

#### Requète parametré

In [0]:
from google.cloud import bigquery
client = bigquery.Client()

sql = """
    SELECT *
    FROM  `Project_id.dataset_id.Table_id`
    WHERE parametre = @filtre_parametre
"""
query_config = bigquery.QueryJobConfig(
    query_parameters=[
        bigquery.ScalarQueryParameter('filtre_parametre', 'STRING', 'parametre-123')
    ]
)

df = client.query(sql, job_config=query_config).to_dataframe()

#### Charger un dataframe dans une table Bigquery

In [0]:
import pandas as pd

from google.cloud import bigquery
client = bigquery.Client()

df = pd.DataFrame(
    {
        'prenom': ['prenom', 'prenom2', 'prenom3'],
        'nom': ['nom', 'nom2', 'nom3'],
        'age': [22, 23, 24],
    }
)

dataset_ref = client.dataset('dataset_id')
table_ref = dataset_ref.table('table_id')

client.load_table_from_dataframe(df, table_ref).result()

##### Autre méthode

In [0]:
import pandas

df = pd.DataFrame(
    {
        'prenom': ['prenom', 'prenom2', 'prenom3'],
        'nom': ['nom', 'nom2', 'nom3'],
        'age': [22, 23, 24],
    }
)

full_table_id = 'dataset_id.table_id'
project_id = 'project_id'

df.to_gbq(full_table_id, project_id=project_id)

1it [00:03,  3.58s/it]


#### Supprimer une table BigQuery

In [0]:
from google.cloud import bigquery
client = bigquery.Client()

table_id = 'project_id.dataset_id.table_id'

client.delete_table(table_id, not_found_ok=True)

#### Mettre à jour une table BigQuery

In [0]:
from google.cloud import bigquery
client = bigquery.Client()

table_ref = client.dataset('dataset_id', project="project_id").table('table_id')
table = client.get_table(table_ref)

table.description = "Ajoutez une description"

table = client.update_table(table, ["description"])

#### Copier une table BigQuery

In [0]:
from google.cloud import bigquery
client = bigquery.Client()

source_dataset = client.dataset('dataset_id', project="project_id")
source_table_ref = source_dataset.table('table_id')

# dataset_id = 'my_dataset'
dest_table_ref = client.dataset('dataset_id', project="project_id").table("table_id")

job = client.copy_table(
    source_table_ref,
    dest_table_ref,
    location="EU",
)

job.result()

<google.cloud.bigquery.job.CopyJob at 0x1470ee48>

#### Executer une simple requète BQ

In [0]:
from google.cloud import bigquery
client = bigquery.Client()

sql = """
    CREATE OR REPLACE TABLE `project_id.dataset_id.table_id`
    PARTITION BY date
    CLUSTER BY variable1,variable2  
    as
    
    SELECT *
    FROM `project_id.dataset_id.table_id`
    LIMIT 100
"""

client.query(sql)

<google.cloud.bigquery.job.QueryJob at 0x143d5828>

## Google Cloud Storage

Google Cloud Storage est un outil de GCP qui permet de stocker tout type de fichier sur la platform.

#### Créer un Bucket

In [0]:
from google.cloud import storage
storage_client = storage.Client()

bucket_name = 'bucket_name'

bucket = storage_client.create_bucket(bucket_name)

#### Supprimer un Bucket

In [0]:
from google.cloud import storage
storage_client = storage.Client()

bucket_name = 'bucket_name'
bucket = storage_client.get_bucket(bucket_name)

bucket.delete()

#### Telecharger un fichier

In [0]:
from google.cloud import storage
storage_client = storage.Client()

bucket_name =  "bucket_name"
bucket = storage_client.get_bucket(bucket_name)

source_blob_name = "sous_bucket/file_name.csv"
blob = bucket.blob(source_blob_name)

blob.download_to_filename("local_path")

#### Importer un fichier

In [0]:
from google.cloud import storage
storage_client = storage.Client()

bucket_name =  "bucket_name"
bucket = storage_client.get_bucket(bucket_name)

source_blob_name = "sous_bucket/file_name.csv"
blob = bucket.blob(source_blob_name)

blob.upload_from_filename("local_path")

#### Supprimer un fichier

In [0]:
from google.cloud import storage
storage_client = storage.Client()

bucket_name =  "bucket_name"
bucket = storage_client.get_bucket(bucket_name)

source_blob_name = "sous_bucket/file_name.csv"
blob = bucket.blob(source_blob_name)

blob.delete()

#### Mettre à jour un fichier

In [0]:
from google.cloud import storage
storage_client = storage.Client()

bucket_name =  "bucket_name"
bucket = storage_client.get_bucket(bucket_name)

source_blob_name = "sous_bucket/file_name.csv"
blob = bucket.blob(source_blob_name)

blob.upload_from_filename(source_file_name)

#### Liste des blobs dans un bucket 

In [0]:
from google.cloud import storage
storage_client = storage.Client()

bucket_name =  "bucket_name"
bucket = storage_client.get_bucket(bucket_name)

bucket.list_blobs()

#### Liste des blobs commencant par un prefixe

In [0]:
from google.cloud import storage
storage_client = storage.Client()

bucket_name =  "bucket_name"
bucket = storage_client.get_bucket(bucket_name)

prefix = "data"
blobs = bucket.list_blobs(prefix=prefix)#, delimiter="/")