In [19]:
import pandas as pd
from google.cloud import storage, bigquery
from google.oauth2 import service_account
from io import BytesIO

In [20]:
credentialsPath = "/home/cloud_user_p_13b408d8/playground-s-11-29087116-2c37b82489ba.json"
credentials = service_account.Credentials.from_service_account_file(credentialsPath)
project_id = 'project-id'
bucket_name = 'your-bucket-name'

In [21]:
# initializing clients 
def initialize_clients(creds, proj_id):
    storage_client = storage.Client(credentials=creds, project=proj_id)
    bigquery_client = bigquery.Client(credentials=creds, project=proj_id)
    return storage_client, bigquery_client

In [22]:
b_client, bq_client = initialize_clients(credentials, project_id)

In [23]:
def files_in_bucket(bucket_name, storage_client):
    bucket = storage_client.get_bucket(bucket_name)

    files = bucket.list_blobs()

    file_names = [file.name for file in files]
    return file_names

In [24]:
file_names = files_in_bucket(bucket_name, b_client)
file_names

['brands.csv',
 'categories.csv',
 'customers.csv',
 'order_items.csv',
 'orders.csv',
 'products.csv',
 'staffs.csv',
 'stocks.csv',
 'stores.csv']

In [11]:
def read_gcs_file_to_df(bucket_name, file_name, storage_client):
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(file_name)
    data = blob.download_as_bytes()
    df = pd.read_csv(BytesIO(data))
    return df

In [12]:
df = read_gcs_file_to_df(bucket_name, file_names[0], b_client)
df

Unnamed: 0,brand_id,brand_name
0,1,Electra
1,2,Haro
2,3,Heller
3,4,Pure Cycles
4,5,Ritchey
5,6,Strider
6,7,Sun Bicycles
7,8,Surly
8,9,Trek


In [25]:
def load_data_from_gcs_to_bigquery(bucket_name, file_name, bigquery_client):
    dataset_id = 'your-bigquery-dataset-id'
    
    job_config = bigquery.LoadJobConfig(
        source_format=bigquery.SourceFormat.CSV,
        autodetect=True,
        skip_leading_rows=1,
        write_disposition="WRITE_TRUNCATE"
    )
    
    uri = f"gs://{bucket_name}/{file_name}"

    table_name = file_name.split(".")[0]

    load_job = bigquery_client.load_table_from_uri(
        uri, 
        f"{dataset_id}.{table_name}",
        job_config=job_config
    )

    load_job.result()
    print(f"Data from {uri} loaded to BigQuery table {table_name} in dataset {dataset_id}")

In [26]:
for file_name in file_names:
    load_data_from_gcs_to_bigquery(bucket_name, file_name, bq_client)

Data from gs://gcptask/brands.csv loaded to BigQuery table brands in dataset playground-s-11-3c5604b9.BikeStore
Data from gs://gcptask/categories.csv loaded to BigQuery table categories in dataset playground-s-11-3c5604b9.BikeStore
Data from gs://gcptask/customers.csv loaded to BigQuery table customers in dataset playground-s-11-3c5604b9.BikeStore
Data from gs://gcptask/order_items.csv loaded to BigQuery table order_items in dataset playground-s-11-3c5604b9.BikeStore
Data from gs://gcptask/orders.csv loaded to BigQuery table orders in dataset playground-s-11-3c5604b9.BikeStore
Data from gs://gcptask/products.csv loaded to BigQuery table products in dataset playground-s-11-3c5604b9.BikeStore
Data from gs://gcptask/staffs.csv loaded to BigQuery table staffs in dataset playground-s-11-3c5604b9.BikeStore
Data from gs://gcptask/stocks.csv loaded to BigQuery table stocks in dataset playground-s-11-3c5604b9.BikeStore
Data from gs://gcptask/stores.csv loaded to BigQuery table stores in dataset