In [None]:
import os
from google.cloud import bigquery

os.environ["GOOGLE_CLOUD_PROJECT"] = "bigquery-444006"

In [None]:
client = bigquery.Client()
job_config = bigquery.QueryJobConfig(use_query_cache=False)

In [None]:
#q1
query = """
SELECT
  geo_id
FROM
  `bigquery-public-data.geo_us_boundaries.counties`
WHERE
  county_name = 'Dane';
"""
result = client.query(query, job_config=job_config)
q1_billed = int(result.total_bytes_billed / (1024**2))
result = result.result()
result = [row.geo_id for row in result][0]
result

In [None]:
#q2
query = """
SELECT
  state_fips_code,
  COUNT(*) AS county_count
FROM
  `bigquery-public-data.geo_us_boundaries.counties`
GROUP BY
  state_fips_code
ORDER BY
  county_count DESC
LIMIT
  5;
"""
result = client.query(query, job_config=job_config)
q2_billed = int(result.total_bytes_billed / (1024**2))
result = result.result()
result = {row.state_fips_code: row.county_count for row in result}
result

In [None]:
#q3
costs = {"q1": "{} MB".format(q1_billed), "q2": "{} MB".format(q2_billed)}
costs

In [None]:
dataset_id = "{}.p8".format(client.project)
dataset = bigquery.Dataset(dataset_id)
dataset.location = "US"
client.create_dataset(dataset, exists_ok=True)

In [None]:
table_id = "{}.hdma".format(dataset_id)
job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.PARQUET)
uri = "gs://uw-madison_cs544_p8_bucket/hdma-wi-2021.parquet"
result = client.load_table_from_uri(uri, table_id, job_config=job_config)
result.result()

In [None]:
#q4
datasets = [dataset.dataset_id for dataset in client.list_datasets()]
datasets

In [None]:
#q5
query = """
SELECT
  counties.county_name AS county_name,
  COUNT(DISTINCT hdma_data.index) AS application_count
FROM
  `bigquery-444006.p8.hdma` AS hdma_data
JOIN
  `bigquery-public-data.geo_us_boundaries.counties` AS counties
ON
  hdma_data.county_code = counties.county_fips_code
GROUP BY
  county_name
ORDER BY
  application_count DESC
LIMIT
  10;
"""
result = client.query(query)
result = result.result()
result = {row.county_name: row.application_count for row in result}
result

In [None]:
table_id = "bigquery-444006.p8.applications"
sheet_url = "https://docs.google.com/spreadsheets/d/13e14LzDDm9U4y2KddlKFAy7exNdbo1OwJa-OTe4ywiw/edit"

table_config = bigquery.ExternalConfig("GOOGLE_SHEETS")
table_config.source_uris = [sheet_url]
table_config.autodetect = True

table = bigquery.Table(table_id)
table.external_data_configuration = table_config
client.create_table(table, exists_ok=True)

In [None]:
#q6
query = f"""
SELECT
  COUNT(*) AS application_count
FROM
  `bigquery-444006.p8.applications`
WHERE
  income = 120000
"""
result = client.query(query)
result = result.result()
result = [row.application_count for row in result][0]
result

In [None]:
query = f"""
CREATE OR REPLACE MODEL
  `bigquery-444006.p8.linear_reg_model` OPTIONS( model_type='LINEAR_REG',
    input_label_cols=['loan_amount'] ) AS
SELECT
  income AS x,
  loan_amount
FROM
  `bigquery-444006.p8.applications`
"""
result = client.query(query)
result = result.result()

In [None]:
#q7
query = """
SELECT
  r2_score
FROM
  ML.EVALUATE(MODEL `bigquery-444006.p8.linear_reg_model`);
"""
result = client.query(query)
result = result.result()
result = [row.r2_score for row in result][0]
result

In [None]:
#q8
query = f"""
SELECT
  MIN( ST_DISTANCE( ST_GEOGPOINT(longitude, latitude), ST_GEOGPOINT(-89.384444, 43.074722) ) ) AS distance
FROM
  `bigquery-444006.p8.applications`
WHERE
  longitude IS NOT NULL
  AND latitude IS NOT NULL;
"""
result = client.query(query)
result = result.result()
result = next(result)["distance"]
result

In [None]:
#q9
query = f"""
SELECT
  counties.county_name AS county_name,
  COUNT(applications.latitude) AS application_count
FROM
  `bigquery-public-data.geo_us_boundaries.counties` AS counties
JOIN
  `bigquery-444006.p8.applications` AS applications
ON
  ST_CONTAINS(counties.county_geom, ST_GEOGPOINT(applications.longitude, applications.latitude))
WHERE
  counties.state_fips_code = '55'
GROUP BY
  county_name;
"""
result = client.query(query)
result = result.result()
result = {row["county_name"]: row["application_count"] for row in result}
result

In [None]:
#q10
query = f"""
SELECT
  neighbor_data AS bordering_county
FROM
  `bigquery-public-data.geo_us_boundaries.adjacent_counties`,
  UNNEST(neighbors) AS neighbor_data
WHERE
  county = 'Dane County'
  AND state = 'Wisconsin'
ORDER BY
  bordering_county;
"""
result = client.query(query)
result = result.result()
result = [row.bordering_county for row in result]
result