# BigQuery Cost Optimization

**Note:** For the full interactive notebook with all visualizations, please use `bigquery-exploration.ipynb` and `data-quality-workflow.ipynb` as templates.

This simplified notebook demonstrates cost optimization concepts.

## Quick Cost Estimation

In [None]:
from google.cloud import bigquery
import pandas as pd

client = bigquery.Client()
PRICE_PER_TB = 5.00

def estimate_cost(query_sql):
    job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
    query_job = client.query(query_sql, job_config=job_config)
    bytes_processed = query_job.total_bytes_processed
    cost = (bytes_processed / (1024**4)) * PRICE_PER_TB
    return bytes_processed, cost

print('Cost estimation helper loaded!')

## Compare Query Costs

In [None]:
# Unoptimized query
unopt_query = "SELECT * FROM `bigquery-public-data.usa_names.usa_1910_current` WHERE year >= 2000"
unopt_bytes, unopt_cost = estimate_cost(unopt_query)

# Optimized query
opt_query = "SELECT name, year, number FROM `bigquery-public-data.usa_names.usa_1910_current` WHERE year >= 2000 LIMIT 1000"
opt_bytes, opt_cost = estimate_cost(opt_query)

print(f'Unoptimized: ${unopt_cost:.4f}')
print(f'Optimized: ${opt_cost:.4f}')
print(f'Savings: ${unopt_cost - opt_cost:.4f} ({(unopt_cost - opt_cost)/unopt_cost*100:.1f}%)')