In [1]:
# Configuration
# 00_setup.ipynb
# Purpose: Infrastructure & external table setup ONLY

from google.cloud import bigquery
import os

# -----------------------
# Project configuration
# -----------------------
PROJECT_ID  = "infinite-mantra-480821-v7"
LOCATION    = "US"

BUCKET_NAME = "mlops-telco-bigdata"
CSV_PATH    = "Telco-Customer-Churn.csv"

DATASET_ID  = "telco_churn_ds"
TABLE_ID    = "customers"   # external table name

CSV_GCS_URI = f"gs://{BUCKET_NAME}/{CSV_PATH}"

print("PROJECT_ID :", PROJECT_ID)
print("CSV_GCS_URI:", CSV_GCS_URI)

PROJECT_ID : infinite-mantra-480821-v7
CSV_GCS_URI: gs://mlops-telco-bigdata/Telco-Customer-Churn.csv


In [2]:
#BigQuery client and Dataset
from google.cloud import bigquery

bq_client = bigquery.Client(project=PROJECT_ID, location=LOCATION)

dataset_ref = bigquery.Dataset(f"{PROJECT_ID}.{DATASET_ID}")
dataset_ref.location = LOCATION

try:
    bq_client.get_dataset(dataset_ref)
    print(f" Dataset already exists: {DATASET_ID}")
except Exception:
    bq_client.create_dataset(dataset_ref)
    print(f" Dataset created: {DATASET_ID}")

 Dataset already exists: telco_churn_ds


In [3]:
# Create external table (GCS BigQuery)
ext_table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

external_config = bigquery.ExternalConfig("CSV")
external_config.source_uris = [CSV_GCS_URI]
external_config.options.skip_leading_rows = 1
external_config.autodetect = True

table = bigquery.Table(ext_table_id)
table.external_data_configuration = external_config

try:
    bq_client.get_table(ext_table_id)
    print(f" External table already exists: {ext_table_id}")
except Exception:
    bq_client.create_table(table)
    print(f" External table created: {ext_table_id}")

 External table already exists: infinite-mantra-480821-v7.telco_churn_ds.customers


In [4]:
# Checking Table
bq_client.query(f"""
SELECT COUNT(*) AS n
FROM `{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}`
""").to_dataframe()


Unnamed: 0,n
0,7043
