Challenge lab - 2

In [1]:
%%bash
PROJECT_ID=$(gcloud config get-value project)
DATASET="challenge2"

bq --location=US mk -d \
    --description "emergency calls response times" \
    "${PROJECT_ID}:${DATASET}" || true

Dataset 'qwiklabs-gcp-00-46c4d2064c57:challenge2' successfully created.


In [5]:
import os
PROJECT_ID = "qwiklabs-gcp-00-46c4d2064c57"
DATASET = "challenge2"
os.environ['PROJECT_ID'] = PROJECT_ID
os.environ['DATASET'] = DATASET

In [6]:
!bq load \
  --source_format=CSV \
  --skip_leading_rows=1 \
  --autodetect \
  $PROJECT_ID:$DATASET.emergency_calls_raw \
  gs://labs.roitraining.com/data-to-ai-workshop/emergency_calls_response_times.csv

Waiting on bqjob_r62f438f866aba56b_0000019bc283c46f_1 ... (2s) Current status: DONE   


In [7]:
from google.cloud import bigquery
#confirm loading success and data readable
RAW_TABLE = f"{PROJECT_ID}.{DATASET}.emergency_calls_raw"
MODEL = f"{PROJECT_ID}.{DATASET}.emergency_response_model"

client = bigquery.Client(project=PROJECT_ID)
client.query(f"SELECT * FROM `{RAW_TABLE}` LIMIT 10").to_dataframe()

Unnamed: 0,call_id,call_timestamp,call_type,location,weather_condition,day_of_week,time_of_day,traffic_level,distance_to_station,units_available,response_time
0,35957,2023-01-01 00:05:53+00:00,Fire,Highland,Rainy,Sunday,0,High,21.45,3,23.41
1,20832,2023-01-01 00:20:47+00:00,Fire,Oakmont,Rainy,Sunday,0,High,22.29,6,20.11
2,27949,2023-01-01 00:33:27+00:00,Fire,Riverside,Windy,Sunday,0,High,17.19,14,19.75
3,20199,2023-01-01 00:48:29+00:00,Fire,Riverside,Windy,Sunday,0,High,17.39,14,20.76
4,46938,2023-01-01 00:50:44+00:00,Rescue,Brookfield,Sunny,Sunday,0,High,22.5,14,22.37
5,17582,2023-01-01 02:28:50+00:00,Rescue,Downtown,Snowy,Sunday,2,High,25.15,6,28.48
6,21624,2023-01-01 02:44:06+00:00,Rescue,Oakmont,Snowy,Sunday,2,High,3.95,9,19.3
7,36793,2023-01-01 02:53:54+00:00,Fire,Riverside,Sunny,Sunday,2,High,5.87,10,10.72
8,41350,2023-01-01 03:52:33+00:00,Police,Greenfield,Windy,Sunday,3,High,6.66,5,20.55
9,32092,2023-01-01 04:09:23+00:00,Police,Maplewood,Snowy,Sunday,4,High,15.5,13,22.98


In [9]:
#some basic stats
client.query(f"""
SELECT
  COUNT(*) AS n,
  AVG(response_time) AS avg_response_time,
  MIN(response_time) AS min_response_time,
  MAX(response_time) AS max_response_time
FROM `{RAW_TABLE}`
WHERE response_time IS NOT NULL
""").to_dataframe()

Unnamed: 0,n,avg_response_time,min_response_time,max_response_time
0,50000,17.446134,2.01,36.55


Create a BigQuery ML model to predict response time

In [11]:
create_model_sql = f"""
CREATE OR REPLACE MODEL `{MODEL}`
OPTIONS(
  model_type = 'BOOSTED_TREE_REGRESSOR',
  input_label_cols = ['response_time'],
  data_split_method = 'AUTO_SPLIT'
) AS
SELECT
  call_type,
  location,
  weather_condition,
  day_of_week,
  time_of_day,
  traffic_level,
  distance_to_station,
  units_available,
  response_time
FROM `{RAW_TABLE}`
WHERE response_time IS NOT NULL
"""
client.query(create_model_sql).result()
print("Model created:", MODEL)


Model created: qwiklabs-gcp-00-46c4d2064c57.challenge2.emergency_response_model


In [12]:
#evaluate the model
eval_sql = f"SELECT * FROM ML.EVALUATE(MODEL `{MODEL}`)"
client.query(eval_sql).to_dataframe()


Unnamed: 0,mean_absolute_error,mean_squared_error,mean_squared_log_error,median_absolute_error,r2_score,explained_variance
0,1.80521,5.098738,0.015998,1.519919,0.821957,0.82333


In [13]:
#making a prediction
predict_sql = f"""
SELECT *
FROM ML.PREDICT(
  MODEL `{MODEL}`,
  (SELECT
     'Fire' AS call_type,
     'Greenfield' AS location,
     'Sunny' AS weather_condition,
     'Sunday' AS day_of_week,
     10 AS time_of_day,
     'High' AS traffic_level,
     15 AS distance_to_station,
     10 AS units_available
  )
)
"""
client.query(predict_sql).to_dataframe()


Unnamed: 0,predicted_response_time,call_type,location,weather_condition,day_of_week,time_of_day,traffic_level,distance_to_station,units_available
0,16.944138,Fire,Greenfield,Sunny,Sunday,10,High,15,10
