# BigQuery ETL Tools

### Setup Variables

In [None]:
! gcloud auth application-default login
! gcloud config set project jetty-platform-reporting-dev

In [None]:
import logging
import polars as pl
from datetime import datetime, timezone

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler("debug.log"),
        logging.StreamHandler()
    ]
)

dataframe = pl.DataFrame(
    {
        "A": [1, 2, 3, 4, 5],
        "fruits": ["banana", "banana", "apple", "apple", "banana"],
        "B": [5, 4, 3, 2, 1],
        "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
        "bool_test": [True, False, True, False, True],
        "test_dt": [datetime(2024, 1, 1)] * 5
    }
)

### Load Dataframe into BigQuery

In [None]:
import os
from bigquery_etl_tools_package_tup import dataframe_to_bigquery

file_type = 'csv'
now_ts = int(round(datetime.now(timezone.utc).timestamp()))

bucket_name = os.environ['BUCKET']
dataset_name = os.environ['DATASET']

table_name = f'dataframe_to_bigquery_test_{file_type}'
blob_name = f'bigquery_etl_tools/tests/{now_ts}_{table_name}.{file_type}'
table_id = f'{dataset_name}.{table_name}'

In [None]:
blob, table = dataframe_to_bigquery(
    dataframe,
    bucket_name,
    blob_name,
    table_id,
    file_type
)

### Autodetect BigQuery Schema of Dataframe

In [None]:
from bigquery_etl_tools_package_tup import autodetect_dataframe_schema
file_type = 'csv'
now_ts = int(round(datetime.now(timezone.utc).timestamp()))

bucket_name = os.environ['BUCKET']
dataset_name = os.environ['DATASET']

blob_name = f'bigquery_etl_tools/tests/{now_ts}_{table_name}.{file_type}'
table_id = f'{dataset_name}.{table_name}'

filepath = autodetect_dataframe_schema(
    dataframe,
    bucket_name,
    blob_name,
    table_id
)

print(filepath)