Challenge lab - 4

In [8]:
!pip -q install --upgrade google-cloud-bigquery google-cloud-pubsub

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/261.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━[0m [32m204.8/261.3 kB[0m [31m6.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.3/261.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/320.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.1/320.1 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
#claim vars
PROJECT_BQ = "qwiklabs-gcp-00-46c4d2064c57"  #my project id
DATASET_ID = "challenge4"
TABLE_ID   = "flight_transponder"

PROJECT_PUBSUB = "paul-leroy"
TOPIC_ID = "flight-transponder"
SUBSCRIPTION_ID = "flight-transponder-sub"

In [6]:
#create dataset and table, given schema
from google.cloud import bigquery

bq = bigquery.Client(project=PROJECT_BQ)

# Create dataset
dataset = bigquery.Dataset(f"{PROJECT_BQ}.{DATASET_ID}")
dataset.location = "US"
bq.create_dataset(dataset, exists_ok=True)

# Create table
schema = [
    bigquery.SchemaField("MT", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("TT", "INT64", mode="NULLABLE"),
    bigquery.SchemaField("SID", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("AID", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("Hex", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("FID", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("DMG", "DATE", mode="NULLABLE"),
    bigquery.SchemaField("TMG", "TIME", mode="NULLABLE"),
    bigquery.SchemaField("DML", "DATE", mode="NULLABLE"),
    bigquery.SchemaField("TML", "TIME", mode="NULLABLE"),
    bigquery.SchemaField("CS", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("Alt", "INT64", mode="NULLABLE"),
    bigquery.SchemaField("GS", "INT64", mode="NULLABLE"),
    bigquery.SchemaField("Trk", "INT64", mode="NULLABLE"),
    bigquery.SchemaField("Lat", "FLOAT64", mode="NULLABLE"),
    bigquery.SchemaField("Lng", "FLOAT64", mode="NULLABLE"),
    bigquery.SchemaField("VR", "INT64", mode="NULLABLE"),
    bigquery.SchemaField("Sq", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("Alrt", "INT64", mode="NULLABLE"),
    bigquery.SchemaField("Emer", "INT64", mode="NULLABLE"),
    bigquery.SchemaField("SPI", "INT64", mode="NULLABLE"),
    bigquery.SchemaField("Gnd", "INT64", mode="NULLABLE"),
]

table = bigquery.Table(f"{PROJECT_BQ}.{DATASET_ID}.{TABLE_ID}", schema=schema)
bq.create_table(table, exists_ok=True)

print("BQ table ready:", f"{PROJECT_BQ}.{DATASET_ID}.{TABLE_ID}")


BQ table ready: qwiklabs-gcp-00-46c4d2064c57.challenge4.flight_transponder


In [9]:
#Create a subscription to the topic in paul-leroy
from google.cloud import pubsub_v1

subscriber = pubsub_v1.SubscriberClient()

topic_path = f"projects/{PROJECT_PUBSUB}/topics/{TOPIC_ID}"
subscription_path = subscriber.subscription_path(PROJECT_BQ, SUBSCRIPTION_ID)

try:
    subscriber.create_subscription(name=subscription_path, topic=topic_path)
    print("Created subscription:", subscription_path)
except Exception as e:
    print("Subscription may already exist:", e)


Created subscription: projects/qwiklabs-gcp-00-46c4d2064c57/subscriptions/flight-transponder-sub


In [10]:
#Data collection
import time
from datetime import datetime

FIELD_NAMES = ["MT","TT","SID","AID","Hex","FID","DMG","TMG","DML","TML",
               "CS","Alt","GS","Trk","Lat","Lng","VR","Sq","Alrt","Emer","SPI","Gnd"]

INT_FIELDS   = {"TT","Alt","GS","Trk","VR","Alrt","Emer","SPI","Gnd"}
FLOAT_FIELDS = {"Lat","Lng"}
DATE_FIELDS  = {"DMG","DML"}
TIME_FIELDS  = {"TMG","TML"}

def parse_value(name, val):
    val = val.strip()
    if val == "":
        return None
    if name in INT_FIELDS:
        return int(val)
    if name in FLOAT_FIELDS:
        return float(val)
    if name in DATE_FIELDS:
        return datetime.strptime(val, "%Y/%m/%d").date().isoformat()
    if name in TIME_FIELDS:
        return val  # keep "00:43:07.127" as string; BQ TIME accepts fractional seconds
    return val

def parse_csv_line(line: str) -> dict:
    parts = [p.strip() for p in line.split(",")]
    if len(parts) < len(FIELD_NAMES):
        parts += [""] * (len(FIELD_NAMES) - len(parts))
    parts = parts[:len(FIELD_NAMES)]
    return {FIELD_NAMES[i]: parse_value(FIELD_NAMES[i], parts[i]) for i in range(len(FIELD_NAMES))}

TABLE_FQN = f"{PROJECT_BQ}.{DATASET_ID}.{TABLE_ID}"

def insert_rows(rows):
    errors = bq.insert_rows_json(TABLE_FQN, rows)
    if errors:
        print("Insert errors:", errors)

# Run for a few minutes
duration_seconds = 180
end_time = time.time() + duration_seconds

while time.time() < end_time:
    response = subscriber.pull(
        request={"subscription": subscription_path, "max_messages": 100},
        timeout=10,
    )
    if not response.received_messages:
        continue

    rows, ack_ids = [], []
    for rm in response.received_messages:
        data = rm.message.data.decode("utf-8").strip()
        for line in data.splitlines():
            line = line.strip()
            if line:
                rows.append(parse_csv_line(line))
        ack_ids.append(rm.ack_id)

    if rows:
        insert_rows(rows)
    subscriber.acknowledge(request={"subscription": subscription_path, "ack_ids": ack_ids})

print("Done collecting messages.")


Done collecting messages.


In [12]:
#output counts of records to confirm data being written into BQ
count_sql = f"SELECT COUNT(*) AS n FROM `{PROJECT_BQ}.{DATASET_ID}.{TABLE_ID}`"
print(list(bq.query(count_sql).result())[0]["n"], 'records pulled within', duration_seconds/60, 'mins' )

45536 records pulled within 3.0 mins


In [15]:
geoviz_sql = f"""
SELECT
  ST_GEOGPOINT(Lng, Lat) AS Location,
  CS, Alt, GS, Trk, DMG, TMG
FROM `{PROJECT_BQ}.{DATASET_ID}.{TABLE_ID}`
WHERE Lat IS NOT NULL AND Lng IS NOT NULL
"""
bq.query(geoviz_sql).to_dataframe().head(10)

Unnamed: 0,Location,CS,Alt,GS,Trk,DMG,TMG
0,POINT(-0.68405 51.3129),,10575,,,2026-01-15,20:14:46.530000
1,POINT(-0.69689 51.79861),,10975,,,2026-01-15,20:14:46.655000
2,POINT(-0.26779 51.63244),,7950,,,2026-01-15,20:14:46.708000
3,POINT(1.3753 51.16502),,34675,,,2026-01-15,20:14:46.710000
4,POINT(3.74552 51.28432),,32000,,,2026-01-15,20:14:46.275000
5,POINT(-0.25565 51.46536),,2800,,,2026-01-15,20:14:46.279000
6,POINT(-2.8112 51.8307),,32000,,,2026-01-15,20:14:46.304000
7,POINT(0.51498 51.67275),,9800,,,2026-01-15,20:14:46.309000
8,POINT(-1.51596 51.9185),,17125,,,2026-01-15,20:14:46.398000
9,POINT(-1.12961 51.70771),,36025,,,2026-01-15,20:14:46.425000


Visualization screenshot is attached