In [5]:
%load_ext google.cloud.bigquery

The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery


In [7]:
%%bigquery
-- Example: save test metrics
CREATE OR REPLACE TABLE `telco_churn_ds.eval_logreg_test` AS
SELECT *
FROM ML.EVALUATE(
  MODEL `telco_churn_ds.churn_logreg_model`,
  (
    SELECT * EXCEPT(customerID, split)
    FROM `telco_churn_ds.v_features_with_split`
    WHERE split = 'test'
  )
);

Query is running:   0%|          |

In [8]:
from google.cloud import bigquery
bq = bigquery.Client(project="infinite-mantra-480821-v7")

sql = """
CREATE OR REPLACE TABLE `telco_churn_ds.eval_logreg_test` AS
SELECT *
FROM ML.EVALUATE(
  MODEL `telco_churn_ds.churn_logreg_model`,
  (
    SELECT * EXCEPT(customerID, split)
    FROM `telco_churn_ds.v_features_with_split`
    WHERE split = 'test'
  )
);
"""
bq.query(sql).result()

<google.cloud.bigquery.table._EmptyRowIterator at 0x7f106dfa2ad0>

In [10]:
from google.cloud import bigquery
bq = bigquery.Client(project="infinite-mantra-480821-v7")

check_sql = """
SELECT *
FROM `telco_churn_ds.eval_logreg_test`
"""
eval_df = bq.query(check_sql).to_dataframe()
eval_df

Unnamed: 0,precision,recall,accuracy,f1_score,log_loss,roc_auc
0,0.496269,0.841772,0.713004,0.624413,0.586346,0.825208


In [11]:
sql_global = """
CREATE OR REPLACE TABLE `telco_churn_ds.explain_logreg_global` AS
SELECT *
FROM ML.GLOBAL_EXPLAIN(MODEL `telco_churn_ds.churn_logreg_model`);
"""
bq.query(sql_global).result()

sql_weights_std = """
CREATE OR REPLACE TABLE `telco_churn_ds.explain_logreg_weights_std` AS
SELECT *
FROM ML.WEIGHTS(
  MODEL `telco_churn_ds.churn_logreg_model`,
  STRUCT(TRUE AS standardize)  -- make magnitudes comparable
)
ORDER BY ABS(weight) DESC;
"""
bq.query(sql_weights_std).result()

# Optional: read them into dataframes to preview
global_df = bq.query("SELECT * FROM `telco_churn_ds.explain_logreg_global`").to_dataframe()
weights_std_df = bq.query("SELECT * FROM `telco_churn_ds.explain_logreg_weights_std`").to_dataframe()
global_df.head(), weights_std_df.head()

(               feature  attribution
 0         Contract_mtm     0.155655
 1               tenure     0.123749
 2       Internet_Fiber     0.118320
 3  Pay_ElectronicCheck     0.106589
 4         Contract_2yr     0.105763,
        processed_input    weight category_weights
 0         Contract_mtm  0.155581               []
 1               tenure -0.140966               []
 2         Contract_2yr -0.121972               []
 3       Internet_Fiber  0.119749               []
 4  Pay_ElectronicCheck  0.114980               [])

In [13]:
from google.cloud import bigquery

bq = bigquery.Client(project="infinite-mantra-480821-v7")

roc_sql = """
WITH roc AS (
  SELECT *
  FROM ML.ROC_CURVE(
    MODEL `telco_churn_ds.churn_logreg_model`,
    (
      SELECT * EXCEPT(customerID, split)
      FROM `telco_churn_ds.v_features_with_split`
      WHERE split = 'val'
    )
  )
),
metrics AS (
  SELECT
    threshold,
    recall,
    SAFE_DIVIDE(true_positives, true_positives + false_positives) AS precision,
    2 * SAFE_DIVIDE(
      SAFE_DIVIDE(true_positives, true_positives + false_positives) * recall,
      SAFE_DIVIDE(true_positives, true_positives + false_positives) + recall
    ) AS f1_score
  FROM roc
)
SELECT *
FROM metrics
ORDER BY f1_score DESC
LIMIT 10;
"""

roc_top_df = bq.query(roc_sql).to_dataframe()
roc_top_df

Unnamed: 0,threshold,recall,precision,f1_score
0,0.568826,0.768627,0.571429,0.655518
1,0.565041,0.780392,0.563739,0.654605
2,0.561337,0.792157,0.556474,0.653722
3,0.575185,0.752941,0.576577,0.653061
4,0.556383,0.803922,0.549598,0.652866
5,0.576894,0.733333,0.57716,0.645941
6,0.550571,0.803922,0.536649,0.643642
7,0.581574,0.713725,0.579618,0.639719
8,0.586766,0.698039,0.585526,0.636852
9,0.546263,0.807843,0.52551,0.636785


In [14]:
# Picking threshold 
chosen_threshold = float(roc_top_df.iloc[0]["threshold"])
chosen_threshold

0.56882556637744

In [15]:
# Confirming on Test
cm_sql = f"""
SELECT *
FROM ML.CONFUSION_MATRIX(
  MODEL `telco_churn_ds.churn_logreg_model`,
  (
    SELECT * EXCEPT(customerID, split)
    FROM `telco_churn_ds.v_features_with_split`
    WHERE split = 'test'
  ),
  STRUCT({chosen_threshold} AS threshold)
);
"""

cm_df = bq.query(cm_sql).to_dataframe()
cm_df

Unnamed: 0,expected_label,_0,_1
0,0,631,168
1,1,93,223


In [19]:
# save threshold for batch and looker
from google.cloud import bigquery

bq = bigquery.Client(project="infinite-mantra-480821-v7")

# chosen_threshold should already be defined from your ROC step
# e.g., chosen_threshold = float(roc_top_df.iloc[0]["threshold"])

threshold_sql = """
CREATE OR REPLACE TABLE `telco_churn_ds.model_decision_threshold` AS
SELECT
  'churn_logreg_model' AS model_name,
  @th                    AS threshold,
  CURRENT_TIMESTAMP()    AS decided_at
"""

job = bq.query(
    threshold_sql,
    job_config=bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter("th", "FLOAT64", float(chosen_threshold))
        ]
    ),
)
job.result()

# sanity-check it was written
bq.query("SELECT * FROM `telco_churn_ds.model_decision_threshold`").to_dataframe()

Unnamed: 0,model_name,threshold,decided_at
0,churn_logreg_model,0.568826,2026-02-05 09:19:03.589859+00:00


In [21]:
from google.cloud import bigquery

bq = bigquery.Client(project="infinite-mantra-480821-v7")

# chosen_threshold must already exist
# e.g. chosen_threshold = float(roc_top_df.iloc[0]["threshold"])

threshold_sql = f"""
CREATE OR REPLACE TABLE `telco_churn_ds.model_decision_threshold` AS
SELECT
  'churn_logreg_model' AS model_name,
  {float(chosen_threshold)} AS threshold,
  CURRENT_TIMESTAMP() AS decided_at
"""
bq.query(threshold_sql).result()

#  verify table content
check_df = bq.query(
    "SELECT * FROM `telco_churn_ds.model_decision_threshold`"
).to_dataframe()

check_df

Unnamed: 0,model_name,threshold,decided_at
0,churn_logreg_model,0.568826,2026-02-05 09:21:38.813291+00:00


In [23]:
# Saving best deployment and Batch prediction
from google.cloud import bigquery

bq = bigquery.Client(project="infinite-mantra-480821-v7")

# chosen_threshold must already exist
# e.g. chosen_threshold = float(roc_top_df.iloc[0]["threshold"])

threshold_sql = f"""
CREATE OR REPLACE TABLE `telco_churn_ds.model_decision_threshold` AS
SELECT
  'churn_logreg_model' AS model_name,
  {float(chosen_threshold)} AS threshold,
  CURRENT_TIMESTAMP() AS decided_at
"""
bq.query(threshold_sql).result()

#  verify table content
check_df = bq.query(
    "SELECT * FROM `telco_churn_ds.model_decision_threshold`"
).to_dataframe()

check_df

Unnamed: 0,model_name,threshold,decided_at
0,churn_logreg_model,0.568826,2026-02-05 09:24:42.967256+00:00
