From 844a7fb5b0045c8d2fdc24a3221140f8546d9cd1 Mon Sep 17 00:00:00 2001 From: Michelle Lohr <98989192+chellebodnar-google@users.noreply.github.com> Date: Thu, 17 Mar 2022 15:31:26 -0700 Subject: [PATCH] Feat: Add 311 example (#310) * Create category_by_complaint_source.sql Add a query snippet * Create artifact.yaml * Improve formatting * Add better comments Add comments and change variable "num_instances" to "num_compaints" for clarity --- .../docs/queries/artifact.yaml | 5 ++++ .../category_by_complaint_source.sql | 30 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 datasets/san_francisco_311/docs/queries/artifact.yaml create mode 100644 datasets/san_francisco_311/docs/queries/category_by_complaint_source/category_by_complaint_source.sql diff --git a/datasets/san_francisco_311/docs/queries/artifact.yaml b/datasets/san_francisco_311/docs/queries/artifact.yaml new file mode 100644 index 000000000..db9c4cfee --- /dev/null +++ b/datasets/san_francisco_311/docs/queries/artifact.yaml @@ -0,0 +1,5 @@ +artifact: + title: Analyze most prevelant category by complaint source of issue + description: In this tutorial we analyze the most likely category corresponding to each complaint source from all 311 reports in San Francisco. + vertical: government + tier: free diff --git a/datasets/san_francisco_311/docs/queries/category_by_complaint_source/category_by_complaint_source.sql b/datasets/san_francisco_311/docs/queries/category_by_complaint_source/category_by_complaint_source.sql new file mode 100644 index 000000000..71f245707 --- /dev/null +++ b/datasets/san_francisco_311/docs/queries/category_by_complaint_source/category_by_complaint_source.sql @@ -0,0 +1,30 @@ +# What is the most common category for each complaint source? + +WITH source_category_counts AS ( + SELECT + source, + category, + COUNT(1) AS num_complaints + FROM + `bigquery-public-data`.san_francisco_311.311_service_requests + GROUP BY + source, category +) +SELECT + source, + category, + num_complaints, + num_complaints/total AS fraction_of_source +FROM + (SELECT + source, + category, + num_complaints, + # Within each source, rank the categories by number of complaints in descending order. + ROW_NUMBER() OVER (PARTITION BY source ORDER BY num_complaints DESC) AS rank, + # Compute the total number of complaints reported per source + SUM(num_complaints) OVER (PARTITION BY source) total + FROM source_category_counts) + WHERE + # Extract the most common category of complaint + rank = 1;