Initialize "business_licenses" table before data load

In [0]:
CREATE TABLE IF NOT EXISTS "business_licenses" (
    FOLDER_YEAR           INT NOT NULL,       -- Integer for year
    STATUS                VARCHAR(255),       -- Status as text
    ISSUED_DATE           VARCHAR(255),       -- Issued date as text (consider DATE if applicable)
    BUSINESS_NAME         VARCHAR(255) NOT NULL, -- Business name as text
    BUSINESS_TRADE_NAME   VARCHAR(255),       -- Business trade name as text
    BUSINESS_TYPE         VARCHAR(255),       -- Business type as text
    BUSINESS_SUBTYPE      VARCHAR(255),       -- Business subtype as text
    POSTAL_CODE           VARCHAR(255) NOT NULL, -- Postal code as text
    STREET_NAME           VARCHAR(255),       -- Street name as text
    LOCAL_AREA            VARCHAR(255),       -- Local area as text
    NUMBER_OF_EMPLOYEES   INT,                 -- Integer for the number of employees
    PRIMARY KEY(POSTAL_CODE, BUSINESS_NAME)
);

Sample table to check of correctness

In [0]:
SELECT * 
FROM business_licenses
LIMIT 10;

SELECT COUNT(*) AS total_entries
FROM business_licenses;

Initialize "property_tax" table before data load

In [0]:
CREATE TABLE IF NOT EXISTS "property_tax" (
    PID                     VARCHAR(255) NOT NULL,          -- Assuming PID is an identifier
    LEGAL_TYPE              VARCHAR(255),          -- Assuming LEGAL_TYPE is categorical text
    ZONING_DISTRICT         VARCHAR(255),          -- Text describing zoning district
    ZONING_CLASSIFICATION   VARCHAR(255),          -- Text describing zoning classification
    TO_CIVIC_NUMBER         VARCHAR(255),          -- Civic number, could be text or number
    STREET_NAME             VARCHAR(255),          -- Street name as text
    PROPERTY_POSTAL_CODE    VARCHAR(20) NOT NULL,  -- Postal code as text
    CURRENT_LAND_VALUE      DECIMAL(18,2),         -- Monetary value, supports decimals
    CURRENT_IMPROVEMENT_VALUE DECIMAL(18,2),       -- Monetary value, supports decimals
    TAX_ASSESSMENT_YEAR     INT,                   -- Integer year
    PREVIOUS_LAND_VALUE     DECIMAL(18,2),         -- Monetary value, supports decimals
    PREVIOUS_IMPROVEMENT_VALUE DECIMAL(18,2),      -- Monetary value, supports decimals
    YEAR_BUILT              INT,                   -- Year, integer
    BIG_IMPROVEMENT_YEAR    INT,                   -- Year, integer
    TAX_LEVY                DECIMAL(18,2),         -- Monetary value, supports decimals
    NEIGHBOURHOOD_CODE      VARCHAR(255),          -- Text or code for neighborhood
    REPORT_YEAR             INT NOT NULL,                    -- Year, integer
    PRIMARY KEY(PROPERTY_POSTAL_CODE, PID)
);

Sample table to check for correctness

In [0]:
SELECT * 
FROM property_tax
LIMIT 10;

SELECT COUNT(*) AS total_entries
FROM property_tax;

Create view grouped by postal code for "business_licenses"

In [0]:
DROP VIEW IF EXISTS business_size_by_postal_code CASCADE;

CREATE VIEW business_size_by_postal_code AS
SELECT 
    POSTAL_CODE,
    FOLDER_YEAR,
    STREET_NAME,
    LOCAL_AREA,
    SUM(NUMBER_OF_EMPLOYEES) AS TOTAL_EMPLOYEES,
    COUNT(BUSINESS_NAME) AS TOTAL_BUSINESSES
FROM 
    business_licenses
GROUP BY 
    POSTAL_CODE, FOLDER_YEAR, STREET_NAME, LOCAL_AREA;

SELECT *
FROM business_size_by_postal_code
LIMIT 10;

SELECT COUNT(*) AS total_entries
FROM business_size_by_postal_code;

Create view grouped by postal code for "property_tax"

In [0]:
DROP VIEW IF EXISTS property_aggregated_by_postal_code CASCADE;

CREATE OR REPLACE VIEW property_aggregated_by_postal_code AS
SELECT 
    PROPERTY_POSTAL_CODE AS POSTAL_CODE,
    REPORT_YEAR,
    ZONING_CLASSIFICATION,
    STREET_NAME,
    SUM(PREVIOUS_LAND_VALUE) AS TOTAL_PREVIOUS_LAND_VALUE,
    SUM(CURRENT_LAND_VALUE) AS TOTAL_CURRENT_LAND_VALUE,
    SUM(PREVIOUS_IMPROVEMENT_VALUE) AS TOTAL_PREVIOUS_IMPROVEMENT_VALUE,
    SUM(CURRENT_IMPROVEMENT_VALUE) AS TOTAL_CURRENT_IMPROVEMENT_VALUE
FROM 
    property_tax
GROUP BY 
    PROPERTY_POSTAL_CODE, REPORT_YEAR, ZONING_CLASSIFICATION, STREET_NAME;

SELECT *
FROM property_aggregated_by_postal_code
LIMIT 10;

SELECT COUNT(*) AS total_entries
FROM property_aggregated_by_postal_code;

Join the two views by postal code

In [0]:
DROP VIEW IF EXISTS combined_by_postal_code CASCADE;

CREATE OR REPLACE VIEW combined_by_postal_code AS
SELECT 
    b.POSTAL_CODE,
    p.REPORT_YEAR AS YEAR,
    b.LOCAL_AREA,
    b.TOTAL_EMPLOYEES,
    b.TOTAL_BUSINESSES,
    p.ZONING_CLASSIFICATION,
    p.STREET_NAME,
    p.TOTAL_PREVIOUS_LAND_VALUE,
    p.TOTAL_CURRENT_LAND_VALUE,
    p.TOTAL_PREVIOUS_IMPROVEMENT_VALUE,
    p.TOTAL_CURRENT_IMPROVEMENT_VALUE
FROM 
    business_size_by_postal_code b
JOIN 
    property_aggregated_by_postal_code p
ON 
    b.POSTAL_CODE = p.POSTAL_CODE
    AND (CAST(b.FOLDER_YEAR AS INT) + 2000 = p.REPORT_YEAR); -- Convert format like '16' to '2016'

SELECT *
FROM combined_by_postal_code
WHERE year = 2023
LIMIT 10;

SELECT COUNT(*) AS total_entries
FROM combined_by_postal_code;

Group data by local region

In [0]:
DROP VIEW IF EXISTS combined_by_local_area CASCADE;

CREATE OR REPLACE VIEW combined_by_local_area AS
SELECT
    LOCAL_AREA,
    YEAR,
    SUM(TOTAL_EMPLOYEES) AS TOTAL_EMPLOYEES,
    SUM(TOTAL_BUSINESSES) AS TOTAL_BUSINESSES,
    AVG(TOTAL_PREVIOUS_LAND_VALUE) AS AVG_TOTAL_PREVIOUS_LAND_VALUE,
    AVG(TOTAL_CURRENT_LAND_VALUE) AS AVG_TOTAL_CURRENT_LAND_VALUE,
    AVG(TOTAL_PREVIOUS_IMPROVEMENT_VALUE) AS AVG_TOTAL_PREVIOUS_IMPROVEMENT_VALUE,
    AVG(TOTAL_CURRENT_IMPROVEMENT_VALUE) AS AVG_TOTAL_CURRENT_IMPROVEMENT_VALUE,
    -- Optionally, you can include other fields and use aggregation or select them directly if needed.
    COUNT(DISTINCT POSTAL_CODE) AS NUM_OF_PROPERTIES
FROM
    combined_by_postal_code
GROUP BY
    LOCAL_AREA, YEAR
ORDER BY
    LOCAL_AREA, YEAR;

SELECT *
FROM combined_by_local_area;

SELECT COUNT(*) AS total_entries
FROM combined_by_local_area;

Display series of yearly commercial investment data and land value changes in subsequent years

In [0]:
DROP VIEW IF EXISTS investment_land_value_followup CASCADE;

CREATE OR REPLACE VIEW investment_land_value_followup AS
WITH investment_and_land_values AS (
    SELECT *
    FROM 
        combined_by_postal_code
),
ranked_values AS (
    SELECT 
        i.LOCAL_AREA,
        i.POSTAL_CODE,
        i.YEAR AS INVESTMENT_YEAR,
        i.TOTAL_BUSINESSES AS BUSINESSES_COUNT,
        i.TOTAL_EMPLOYEES AS EMPLOYEES_COUNT,
        i.TOTAL_CURRENT_LAND_VALUE AS LAND_VALUE_IN_INVESTMENT_YEAR,
        f.YEAR AS FOLLOWUP_YEAR,
        f.TOTAL_CURRENT_LAND_VALUE AS LAND_VALUE_IN_FOLLOWUP_YEAR,
        CASE
            WHEN i.TOTAL_CURRENT_LAND_VALUE = 0 THEN NULL -- Handle division by zero
            ELSE ROUND(
                (f.TOTAL_CURRENT_LAND_VALUE - i.TOTAL_CURRENT_LAND_VALUE) * 100.0 
                / i.TOTAL_CURRENT_LAND_VALUE, 
                2
            )
        END AS PERCENT_CHANGE_IN_LAND_VALUE,
        ROW_NUMBER() OVER (PARTITION BY i.POSTAL_CODE, i.YEAR, f.YEAR ORDER BY f.YEAR) AS rn
    FROM 
        investment_and_land_values i
    JOIN 
        investment_and_land_values f 
        ON i.POSTAL_CODE = f.POSTAL_CODE
        AND f.YEAR > i.YEAR -- To track future years
        AND f.YEAR <= i.YEAR + 5 -- Adjust to however many years you want to follow up (e.g., 5 years)
)
SELECT 
    LOCAL_AREA,
    POSTAL_CODE,
    INVESTMENT_YEAR,
    BUSINESSES_COUNT,
    EMPLOYEES_COUNT,
    LAND_VALUE_IN_INVESTMENT_YEAR,
    FOLLOWUP_YEAR,
    LAND_VALUE_IN_FOLLOWUP_YEAR,
    PERCENT_CHANGE_IN_LAND_VALUE
FROM 
    ranked_values
WHERE 
    rn = 1 -- Keep only the first occurrence of each (POSTAL_CODE, INVESTMENT_YEAR, FOLLOWUP_YEAR)
ORDER BY 
    LOCAL_AREA, POSTAL_CODE, INVESTMENT_YEAR, FOLLOWUP_YEAR;


SELECT *
FROM investment_land_value_followup
LIMIT 10;

SELECT COUNT(*) AS total_entries
FROM investment_land_value_followup;

-- Unload results to output storage in s3
UNLOAD ('SELECT * FROM investment_land_value_followup')
TO 's3://cmpt732-project-raw-data/analysis_output/'
IAM_ROLE 'arn:aws:iam::354918385749:role/AWSRedshiftOutputRole'
DELIMITER ','
HEADER
ALLOWOVERWRITE;