In [8]:
from google.cloud import bigquery
bq = bigquery.Client()

In [9]:
#q1
q = bq.query(
"""
SELECT geo_id 
FROM bigquery-public-data.geo_us_boundaries.counties
WHERE county_name = 'Dane'
""")
result = q.to_dataframe()
print(result['geo_id'][0])

55025


In [10]:
#q2
q = bq.query(
"""
SELECT state_fips_code, COUNT(*) as county_count
FROM bigquery-public-data.geo_us_boundaries.counties
GROUP BY state_fips_code
ORDER BY county_count DESC
LIMIT 5
""")
result = q.to_dataframe()
result.set_index('state_fips_code')['county_count'].to_dict()

{'48': 254, '13': 159, '51': 133, '21': 120, '29': 115}

In [11]:
#q3
def calculate_cost(query, job_config):
    query_job = bq.query(query, job_config=job_config)
    bytes_billed = query_job.total_bytes_billed
    cost = (bytes_billed / (1024 ** 4)) * 5  # 비용 계산 (단위: 달러)
    return cost

# setting for not using cache
job_config = bigquery.QueryJobConfig(use_query_cache=False)

# first query
q1 = """
SELECT geo_id 
FROM bigquery-public-data.geo_us_boundaries.counties
WHERE county_name = 'Dane'
"""
cost_q1 = calculate_cost(q1, job_config)

# second query
q2 = """
SELECT state_fips_code, COUNT(*) as county_count
FROM bigquery-public-data.geo_us_boundaries.counties
GROUP BY state_fips_code
ORDER BY county_count DESC
LIMIT 5
"""
cost_q2 = calculate_cost(q2, job_config)

costs = {'q1': cost_q1, 'q2': cost_q2}
print(costs)

{'q1': 4.76837158203125e-05, 'q2': 4.76837158203125e-05}


In [14]:
ds = bigquery.Dataset("decent-terra-398415.p8")
ds

Dataset(DatasetReference('decent-terra-398415', 'p8'))

In [16]:
bq.create_dataset(ds, exists_ok=True)

Dataset(DatasetReference('decent-terra-398415', 'p8'))

In [18]:
uri = "gs://dec1213/hdma-wi-2021.parquet"

table_id = "p8.hdma"

job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.PARQUET)
load_job = bq.load_table_from_uri(uri, table_id, job_config=job_config)

load_job.result()

LoadJob<project=decent-terra-398415, location=US, id=68b6e999-a075-475e-8259-1f6cb55e757b>

In [22]:
#q4
datasets = [ds.dataset_id for ds in bq.list_datasets("decent-terra-398415")]
print(datasets)

['p8']


In [24]:
#q5
query = """
SELECT c.county_name, COUNT(*) as loan_count
FROM `p8.hdma` as h
JOIN `bigquery-public-data.geo_us_boundaries.counties` as c
ON h.county_code = c.geo_id
GROUP BY c.county_name
ORDER BY loan_count DESC
LIMIT 10
"""

query_job = bq.query(query)

# convert the result to dictionary
result = query_job.to_dataframe()
loan_counts = result.set_index('county_name')['loan_count'].to_dict()

print(loan_counts)


{'Milwaukee': 46570, 'Dane': 38557, 'Waukesha': 34159, 'Brown': 15615, 'Racine': 13007, 'Outagamie': 11523, 'Kenosha': 10744, 'Washington': 10726, 'Rock': 9834, 'Winnebago': 9310}
