In [1]:
from sqlalchemy import create_engine, inspect
import pandas as pd
import psycopg2
import plotly.express as px
import os

# move current directory to /Users/tanyatsui/Documents/01_Projects/housingEmissions
os.chdir('/Users/tanyatsui/Documents/01_Projects/housingEmissions')

In [2]:
# create a connection to the database
db_name = 'urbanmining'
db_user = 'postgres'
db_password = 'Tunacompany5694!'
db_host = 'localhost'
db_port = '5432'
engine = create_engine(f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')


# Testing harmonised data on in Amsterdam 2015 
Here, tested wijk-level harmonised data to see if it creates more accurate scenario results. I'm testing the "energy efficiency" scenario, in which all renovations and constructions maximise the energy efficiency of the building. The results have improved - previously, the "energy efficiency" scenario led to increased operational emissions, which doesn't make sense. Now, the scenario leads to lower operational emissions. 

### Emissions

In [47]:
base_query = ''' 
-- aggregate (buildings level stats on sqm) and (neighborhood level stats on energy) to (wijk geometries) 
-- get buurt level data for n_homes, sqm - use buurt geoms from 2015 
WITH nhomes_buurt AS (
	SELECT "BU_CODE" AS neighborhood_code, "GM_NAAM" AS municipality, geometry, 
		COALESCE(NULLIF(CASE WHEN "AANTAL_HH" < 0 THEN NULL ELSE "AANTAL_HH" END, NULL), 0) AS n_homes, 
		COALESCE(NULLIF(CASE WHEN "G_GAS_TOT" < 0 THEN NULL ELSE "G_GAS_TOT" END, NULL), 0) AS av_gas_m3, 
		COALESCE(NULLIF(CASE WHEN "G_ELEK_TOT" < 0 THEN NULL ELSE "G_ELEK_TOT" END, NULL), 0) AS av_elec_kwh
	FROM cbs_map_2015 
	WHERE "WATER" = 'NEE'
		AND "GM_NAAM" = 'Amsterdam'
),

-- aggregate nhomes_buurt to wijk 
bbox AS (
	SELECT ST_Buffer(ST_MakeEnvelope(ST_XMin(extent), ST_YMin(extent), ST_XMax(extent), ST_YMax(extent), 28992), 500) AS geometry
	FROM (SELECT ST_Extent(geometry) AS extent FROM nhomes_buurt) AS subquery
), 
wijk_2012 AS (
	SELECT wk_code, gm_naam AS municipality, geom 
	FROM cbs_wijk_2012
	WHERE water = 'NEE'
), 
wijk_almere AS (
	SELECT w.*
	FROM wijk_2012 w 
	JOIN bbox b 
	ON w.geom && b.geometry
		AND ST_Within(w.geom, b.geometry)
), 
neighborhoods_with_wijk AS (
	SELECT a.*, a.n_homes * a.av_gas_m3 AS gas_m3, a.n_homes * a.av_elec_kwh AS elec_kwh, 
		b.wk_code, b.geom AS wijk_geom
	FROM nhomes_buurt a
	JOIN LATERAL (
		SELECT b.wk_code, b.geom, ST_Area(ST_Intersection(a.geometry, b.geom)) AS intersection_area
		FROM wijk_almere b
		WHERE ST_Intersects(a.geometry, b.geom)
		ORDER BY intersection_area DESC
		LIMIT 1
	) AS b ON true
), 
nhomes_wijk AS (
	SELECT wk_code, wijk_geom, 
		SUM(n_homes) AS n_homes, SUM(gas_m3) AS gas_m3, SUM(elec_kwh) AS elec_kwh
	FROM neighborhoods_with_wijk
	GROUP BY wk_code, wijk_geom
), 

-- add 2022 buurt geom to housing_inuse_almere_2015 
sqm_pand_2022geom AS (
	SELECT 
		a.id_pand, a.sqm, 'Pand in gebruik' AS status, 
		b.neighborhood_code, b.neighborhood_geom AS geometry
	FROM housing_inuse_amsterdam_2015_droppedguesses a 
	LEFT JOIN (SELECT neighborhood_geom, neighborhood_code FROM cbs_map_all WHERE municipality = 'Amsterdam' AND year = 2015) b 
	ON a.neighborhood_code = b.neighborhood_code 
), 
sqm_buurt AS (
	SELECT neighborhood_code, ST_Transform(geometry, 28992) AS geometry, SUM(sqm) AS sqm 
	FROM sqm_pand_2022geom
	GROUP BY neighborhood_code, geometry 
), 

-- convert sqm_buurt_2022geom to sqm_wijk 
sqm_buurt_withwijk AS (
	SELECT a.sqm, b.wk_code, b.geom AS wijk_geom
	FROM sqm_buurt a
	JOIN LATERAL (
		SELECT b.wk_code, b.geom, ST_Area(ST_Intersection(a.geometry, b.geom)) AS intersection_area
		FROM wijk_almere b
		WHERE ST_Intersects(a.geometry, b.geom)
		ORDER BY intersection_area DESC
		LIMIT 1
	) AS b ON true
), 
sqm_wijk AS (
	SELECT wk_code, wijk_geom, SUM(sqm) AS sqm
	FROM sqm_buurt_withwijk
	GROUP BY wk_code, wijk_geom
), 
wijk_stats AS (
	SELECT a.*, b.n_homes, 
		CASE 
			WHEN n_homes <= 0 THEN 0 
			ELSE ROUND(a.sqm / b.n_homes) 
		END AS sqm_per_home, 
		b.gas_m3, b.elec_kwh
	FROM sqm_wijk a 
	JOIN nhomes_wijk b 
	ON a.wk_code = b.wk_code 
), 


-- attribute wijk level energy use data to buildings 
construction_municipality AS ( -- all construction activity (except for demolition) in 2015
	SELECT id_pand, 
		CASE 
			WHEN status = 'Pand gesloopt' THEN LEFT(registration_start, 4)::INTEGER
			WHEN status != 'Pand gesloopt' AND registration_end IS NOT NULL THEN LEFT(registration_end, 4)::INTEGER
			ELSE LEFT(registration_start, 4)::INTEGER
		END AS year, 
		status, sqm, geom, geom_28992, neighborhood_code, municipality
	FROM housing_nl
	WHERE municipality = 'Amsterdam'
	    AND ahn_version IS NULL
), 
construction_sample AS (
	SELECT * FROM construction_municipality 
    WHERE year <= 2015
), 
inuse_sample AS (
	SELECT 2015 AS year, * 
	FROM sqm_pand_2022geom
), 
inuse_lowenergy AS (
	SELECT 
		b.id_pand, b.year, 'Pand in gebruik - low energy' AS status, b.sqm, b.neighborhood_code
	FROM construction_sample a 
	LEFT JOIN inuse_sample b 
	ON a.id_pand = b.id_pand
	WHERE a.status != 'Pand gesloopt'
		AND b.id_pand IS NOT NULL
), 
inuse_normalenergy AS (
	SELECT b.id_pand, b.year, b.status, b.sqm, b.neighborhood_code
	FROM construction_sample a 
	RIGHT JOIN inuse_sample b 
	ON a.id_pand = b.id_pand
	WHERE a.id_pand IS NULL 
), 
buildings_all AS (
	-- all construction / renovation / transformation / demolition activity in 2015
	SELECT id_pand, year, status, sqm, neighborhood_code 
	FROM construction_sample 
	WHERE year = 2015
	
    UNION ALL 
	
    -- low energy in use buildings in 2015
    SELECT * FROM inuse_lowenergy
	
    UNION ALL 
	
    -- non-low energy in use buildings in 2015
    SELECT * FROM inuse_normalenergy
), 

-- create neighborhood_code to wk_code key
neighborhoods AS (
	SELECT DISTINCT ON (neighborhood_code) neighborhood_code, ST_Transform(geometry, 28992) AS geometry 
	FROM inuse_sample
), 
neighborhood_wijk_key AS (
	SELECT 
		a.neighborhood_code, a.geometry AS neighborhood_geom, 
		b.wk_code AS wijk_code, b.geom AS wijk_geom, 
		b.municipality
	FROM neighborhoods a 
	LEFT JOIN wijk_almere b 
	ON a.geometry && b.geom
		AND ST_Within(a.geometry, ST_Buffer(b.geom, 200))
), 

buildings_with_wijk_code AS (
	SELECT a.*, b.wijk_code, b.municipality
	FROM buildings_all a 
	LEFT JOIN neighborhood_wijk_key b 
	ON a.neighborhood_code = b.neighborhood_code 
), 
energy_use_per_building AS (
	SELECT a.id_pand, a.year, a.status, a.sqm,
		CASE 
			WHEN status IN ('Pand in gebruik', 'Pand in gebruik - low energy') THEN ROUND(a.sqm / b.sqm * b.gas_m3)
            ELSE 0
        END AS gas_m3_s0,
		CASE
			WHEN status = 'Pand in gebruik' THEN ROUND(a.sqm / b.sqm * b.gas_m3)
			WHEN status = 'Pand in gebruik - low energy' THEN a.sqm * 5
			ELSE 0 
		END AS gas_m3_s1,
		CASE 
			WHEN status IN ('Pand in gebruik', 'Pand in gebruik - low energy') THEN ROUND(a.sqm / b.sqm * b.elec_kwh) 
			ELSE 0 
		END AS electricity_kwh_s0, 
		CASE 
			WHEN status IN ('Pand in gebruik', 'Pand in gebruik - low energy') THEN ROUND(a.sqm / b.sqm * b.elec_kwh) 
			ELSE 0 
		END AS electricity_kwh_s1, 
		b.wk_code, b.wijk_geom
	FROM buildings_with_wijk_code a 
	JOIN wijk_stats b 
	ON a.wijk_code = b.wk_code
), 
emissions_per_building AS (
	SELECT id_pand, year, status, sqm, 
		ROUND(gas_m3_s0 * 1.9 + electricity_kwh_s0 * 0.45) AS operational_kg_s0, 
		CASE 
			WHEN status IN ('transformation - adding units', 'transformation - function change', 
							'renovation - pre2020', 'renovation - post2020') THEN sqm * 126
			WHEN status = 'Bouw gestart' THEN sqm * 325
			ELSE 0 
		END AS embodied_kg_s0, 
		
		ROUND(gas_m3_s1 * 1.9 + electricity_kwh_s1 * 0.45) AS operational_kg_s1, 
		CASE 
			WHEN status IN ('transformation - adding units', 'transformation - function change', 
							'renovation - pre2020', 'renovation - post2020') THEN sqm * 200
			WHEN status = 'Bouw gestart' THEN sqm * 550
			ELSE 0 
		END AS embodied_kg_s1, 
		wk_code, wijk_geom
	FROM energy_use_per_building 
)
'''

query = ''' 
SELECT wk_code, wijk_geom, 
	SUM(sqm) AS sqm, 
	SUM(operational_kg_s0) AS operational_kg_s0, SUM(operational_kg_s1) AS operational_kg_s1, 
	SUM(embodied_kg_s0) AS embodied_kg_s0, SUM(embodied_kg_s1) AS embodied_kg_s1
FROM emissions_per_building
GROUP BY wk_code, wijk_geom
'''

amsterdam_emissions_2015 = pd.read_sql_query(base_query + query, engine)

In [48]:
df = amsterdam_emissions_2015.copy()
operational_s0 = df['operational_kg_s0'].sum()
operational_s1 = df['operational_kg_s1'].sum()
embodied_s0 = df['embodied_kg_s0'].sum()
embodied_s1 = df['embodied_kg_s1'].sum()

# operational emissions plot
data = {
    "Category": ["Operational S0", "Operational S1"],
    "Emissions (kg)": [operational_s0, operational_s1]
}
df_plot = pd.DataFrame(data)
fig = px.bar(df_plot, x="Category", y="Emissions (kg)", 
             title=f"Operational Emissions for S0 vs S1 (difference={round((operational_s1 - operational_s0)/1000000)} million kgCO2)")
fig.update_layout(width=800)
fig.show()

# embodied emissions plot 
data = {
    "Category": ["Embodied S0", "Embodied S1"],
    "Emissions (kg)": [embodied_s0, embodied_s1]
}
df_plot = pd.DataFrame(data)
fig = px.bar(df_plot, x="Category", y="Emissions (kg)", 
             title=f"Embodied Emissions for S0 vs S1 (difference={round((embodied_s1 - embodied_s0)/1000000)} million kgCO2)")
fig.update_layout(width=800)
fig.show()

In [35]:
query = ''' 
SELECT * 
FROM construction_sample 

'''

df = pd.read_sql_query(base_query + query, engine)
df

Unnamed: 0,id_pand,year,status,sqm,geom,geom_28992,neighborhood_code,municipality
0,363100012165245,2013,renovation - pre2020,87,0103000020E610000001000000070000002C40424FCD0B...,01030000204071000001000000070000008716D9CE439A...,BU03637306,Amsterdam
1,363100012110916,2015,renovation - pre2020,175,0103000020E6100000010000000B00000028F5221BE16D...,010300002040710000010000000B000000A4703D0AAF05...,BU03639003,Amsterdam
2,363100012070991,2013,renovation - pre2020,295,0103000020E61000000100000009000000C3798CA16E82...,0103000020407100000100000009000000560E2DB24D5C...,BU03634901,Amsterdam
3,363100012155350,2015,renovation - pre2020,62,0103000020E61000000100000007000000FF82166DDE6D...,0103000020407100000100000007000000B4C876BEB305...,BU03639003,Amsterdam
4,363100012111763,2014,renovation - pre2020,382,0103000020E61000000100000008000000604EE9AC20BF...,0103000020407100000100000008000000DD2406816959...,BU03633101,Amsterdam


# Issue: mismatch between changed and in-use buildings
Even though there's a drop in emissions in the energy efficiency scenario, it's still pretty minor. If this result is accurate, then it's an interesting finding. 

However, there is an issue: of all the buildings that were constructed or renovated on or before 2015 (n=6496), ~18% of them (n=1194) do not show up in the snapshot of in-use buildings (~n=100,000). **So what happened to those 1194 buildings?**  

Turns out, those 1194 buildings did't show up in the in-use building table because of the following reasons, some reasonable and some not: 
- (n=98) some constructed/renovated buildings didn't come into use. They were either immediately demolished, or just not yet registered ('Pand in gebruik - niet ingemeten'). This is reasonable.
- (n=635) some constructed/renovated buildings did come into use after, but not during 2015. They were either out-of-use (e.g. demolished) before 2015, or only came into use after 2015. This is also reasonable.
- (n=461) some constructed/renovated buildings did come into use during 2015, but they were not recorded in the bag_vbo dataset. This issue here is that `housing_nl` was used to create the `housing_changed` table, whereas `bag_vbo` was used to create the `housing_inuse` table. This is not reasonable and need to be fixed.

To fix the last bullet point, I need to do the following: 
- only pick rows from housing_nl that were not guessed (WHERE ahn_version IS NULL) 
- only take into account data from bag_vbo when creating housing_inuse snapshot

In [30]:
query = ''' 
WITH housing_changed AS (
	SELECT id_pand, 
		LEFT(registration_start, 4)::INTEGER AS year, 
		status, sqm, geom, geom_28992, neighborhood_code, municipality
	FROM housing_nl
	WHERE municipality = 'Amsterdam'
		AND ahn_version IS NULL
		AND status != 'Pand gesloopt'
		AND LEFT(registration_start, 4)::INTEGER <= 2015
), 

housing_inuse AS (
	SELECT * 
	FROM housing_inuse_amsterdam_2015_droppedguesses
), 

-- buildings that were changed (constructed/renovated) before 2015, but not in the 2015 in use table 
problematic_buildings AS (
	SELECT 
		DISTINCT a.id_pand
	FROM housing_changed a 
	FULL JOIN housing_inuse b 
	ON a.id_pand = b.id_pand
	WHERE b.id_pand IS NULL 
)
SELECT DISTINCT id_pand 
FROM bag_vbo
WHERE id_pand IN (SELECT id_pand FROM problematic_buildings) 
	AND status = 'Pand in gebruik'
	AND LEFT(registration_start, 4)::INTEGER <= 2015 
	AND (LEFT(registration_end, 4)::INTEGER >= 2015 OR registration_end IS NULL)
'''

problematic_buildings = pd.read_sql_query(query, engine)
print(f'there are {problematic_buildings.shape[0]} problematic buildings')

there are 0 problematic buildings


### Discrepancy between `housing_changed` and `housing_inuse` 
As mentioned in the previous section, some constructed/renovated buildings (n=461) did come into use during 2015, but did not show up in the in-use table, as they were not recorded in the bag_vbo dataset. The issue here is that `housing_nl` was used to create the `housing_changed` table, whereas `bag_vbo` was used to create the `housing_inuse` table. 

Based on what I find, I may make the following changes: 
- record all tables (housing_inuse, housing_nl) as residential units (id_vbo) instead of buildings (id_pand)
- change housing_nl so that it doesn't have guesses 

Below is the query for creating the `housing_inuse` table. It aggregates housing unit data from `bag_vbo` into building data using `id_pand`, and attaches building information from `bag_pand`. This convoluted method was used because I thought that some residential units were not captured in `bag_vbo`. Now I realize I was wrong, which is why I "dropped guesses". So perhaps the unit of measurement should be residential units, not residential buildings. 

In [None]:
municipality = 'Amsterdam'
year = 2015 

query_building_inuse = f''' DROP TABLE IF EXISTS housing_inuse_{municipality}_{year}_droppedguesses;
CREATE TABLE housing_inuse_{municipality}_{year}_droppedguesses AS     

-- get subset of housing units and buildings in municipality
WITH bag_vbo_municipality AS (
	SELECT * 
	FROM bag_vbo 
	WHERE municipality = '{municipality}'
), 
bag_pand_municipality AS (
	SELECT DISTINCT ON (id_pand) * 
	FROM bag_pand
	WHERE 
		municipality = '{municipality}'
		AND status = 'Pand in gebruik' 
		AND LEFT(registration_start, 4)::INTEGER <= {year} 
		AND (registration_end IS NULL OR LEFT(registration_end, 4)::INTEGER >= {year})
), 
housing_units AS (
	SELECT DISTINCT ON (id_vbo) *
	FROM bag_vbo_municipality
	WHERE 
		status = 'Verblijfsobject in gebruik'
		AND sqm::INTEGER < 9999
		AND function = 'woonfunctie'
		AND LEFT(registration_start, 4)::INTEGER <= {year}
		AND (registration_end IS NULL OR LEFT(registration_end, 4)::INTEGER >= {year})
), 
housing_buildings AS (
	SELECT 
		id_pand, 
		SUM(sqm::INTEGER) AS sqm, 
		'woonfunctie' AS function
	FROM (SELECT DISTINCT ON (id_vbo) * FROM housing_units)
	GROUP BY id_pand
)

SELECT a.*, b.geom, b.geom_28992, b.neighborhood_code, b.municipality
FROM housing_buildings a 
LEFT JOIN bag_pand_municipality b 
ON a.id_pand = b.id_pand 
'''

conn = psycopg2.connect(dbname=db_name, user=db_user, password=db_password, host=db_host)
cur = conn.cursor()
cur.execute(query_building_inuse)
conn.commit()

Below is the query for creating the `housing_changed` table. It comes from `housing_nl`, which records all construction, renovation, and demolition activities for the whole NL. The problem is `housing_nl` includes guesses, while `housing_inuse` doesn't. So I think I need to change the way `housing_nl` is made. 

In [None]:
query_housing_changed = f''' 
WITH buildings_changed AS (
	SELECT id_pand, 
		LEFT(registration_start, 4)::INTEGER AS year, 
		status, sqm, geom, geom_28992, neighborhood_code, municipality
	FROM housing_nl
	WHERE municipality = 'Amsterdam'
		AND ahn_version IS NULL
		AND status != 'Pand gesloopt'
		AND LEFT(registration_start, 4)::INTEGER <= 2015
), 
''' 

# Calculating emissions for Amsterdam 2012-2021 
Here, I test the fixed housing_nl and housing_inuse tables that no longer include guesses. 

In [4]:
# making housing_inuse_amsterdam_year for all years (currently only 2015)
municipality = 'Amsterdam'
for year in range(2012, 2022): 
    print(f'processing year {year}')
    query = f''' 
    DROP TABLE IF EXISTS housing_inuse_{municipality}_{year};
    CREATE TABLE housing_inuse_{municipality}_{year} AS     

    -- get subset of housing units and buildings in municipality
    WITH bag_vbo_municipality AS (
        SELECT * 
        FROM bag_vbo 
        WHERE municipality = '{municipality}'
    ), 
    bag_pand_municipality AS (
        SELECT DISTINCT ON (id_pand) * 
        FROM bag_pand
        WHERE 
            municipality = '{municipality}'
            AND status = 'Pand in gebruik' 
            AND LEFT(registration_start, 4)::INTEGER <= {year} 
            AND (registration_end IS NULL OR LEFT(registration_end, 4)::INTEGER >= {year})
    ), 
    housing_units AS (
        SELECT DISTINCT ON (id_vbo) *
        FROM bag_vbo_municipality
        WHERE 
            status = 'Verblijfsobject in gebruik'
            AND sqm::INTEGER < 9999
            AND function = 'woonfunctie'
            AND LEFT(registration_start, 4)::INTEGER <= {year}
            AND (registration_end IS NULL OR LEFT(registration_end, 4)::INTEGER >= {year})
    ), 
    housing_buildings AS (
        SELECT 
            id_pand, 
            SUM(sqm::INTEGER) AS sqm, 
            'woonfunctie' AS function
        FROM (SELECT DISTINCT ON (id_vbo) * FROM housing_units)
        GROUP BY id_pand
    )

    SELECT a.*, b.geom, b.geom_28992, b.neighborhood_code, b.municipality
    FROM housing_buildings a 
    LEFT JOIN bag_pand_municipality b 
    ON a.id_pand = b.id_pand 
    '''
    conn = psycopg2.connect(dbname=db_name, user=db_user, password=db_password, host=db_host)
    cur = conn.cursor()
    cur.execute(query)
    conn.commit()

processing year 2012
processing year 2013
processing year 2014
processing year 2015
processing year 2016
processing year 2017
processing year 2018
processing year 2019
processing year 2020
processing year 2021


In [10]:
emissions_all_years = pd.DataFrame()

for year in range(2012, 2022):
	print(f'processing year {year}')

	base_query = f''' 
	-- aggregate (buildings level stats on sqm) and (neighborhood level stats on energy) to (wijk geometries) 
	-- get buurt level data for n_homes, sqm - use buurt geoms from 2015 
	WITH nhomes_buurt AS (
		SELECT "BU_CODE" AS neighborhood_code, "GM_NAAM" AS municipality, geometry, 
			COALESCE(NULLIF(CASE WHEN "AANTAL_HH" < 0 THEN NULL ELSE "AANTAL_HH" END, NULL), 0) AS n_homes, 
			COALESCE(NULLIF(CASE WHEN "G_GAS_TOT" < 0 THEN NULL ELSE "G_GAS_TOT" END, NULL), 0) AS av_gas_m3, 
			COALESCE(NULLIF(CASE WHEN "G_ELEK_TOT" < 0 THEN NULL ELSE "G_ELEK_TOT" END, NULL), 0) AS av_elec_kwh
		FROM cbs_map_{year} 
		WHERE "WATER" = 'NEE'
			AND "GM_NAAM" = 'Amsterdam'
	),

	-- aggregate nhomes_buurt to wijk 
	bbox AS (
		SELECT ST_Buffer(ST_MakeEnvelope(ST_XMin(extent), ST_YMin(extent), ST_XMax(extent), ST_YMax(extent), 28992), 500) AS geometry
		FROM (SELECT ST_Extent(geometry) AS extent FROM nhomes_buurt) AS subquery
	), 
	wijk_2012 AS (
		SELECT wk_code, gm_naam AS municipality, geom 
		FROM cbs_wijk_2012
		WHERE water = 'NEE'
	), 
	wijk_almere AS (
		SELECT w.*
		FROM wijk_2012 w 
		JOIN bbox b 
		ON w.geom && b.geometry
			AND ST_Within(w.geom, b.geometry)
	), 
	neighborhoods_with_wijk AS (
		SELECT a.*, a.n_homes * a.av_gas_m3 AS gas_m3, a.n_homes * a.av_elec_kwh AS elec_kwh, 
			b.wk_code, b.geom AS wijk_geom
		FROM nhomes_buurt a
		JOIN LATERAL (
			SELECT b.wk_code, b.geom, ST_Area(ST_Intersection(a.geometry, b.geom)) AS intersection_area
			FROM wijk_almere b
			WHERE ST_Intersects(a.geometry, b.geom)
			ORDER BY intersection_area DESC
			LIMIT 1
		) AS b ON true
	), 
	nhomes_wijk AS (
		SELECT wk_code, wijk_geom, 
			SUM(n_homes) AS n_homes, SUM(gas_m3) AS gas_m3, SUM(elec_kwh) AS elec_kwh
		FROM neighborhoods_with_wijk
		GROUP BY wk_code, wijk_geom
	), 

	-- add 2022 buurt geom to housing_inuse table
	sqm_pand_2022geom AS (
		SELECT 
			a.id_pand, a.sqm, 'Pand in gebruik' AS status, 
			b.neighborhood_code, b.neighborhood_geom AS geometry
		FROM housing_inuse_2012_2021 a 
		LEFT JOIN (SELECT neighborhood_geom, neighborhood_code FROM cbs_map_all WHERE municipality = 'Amsterdam' AND year = {year}) b 
		ON a.neighborhood_code = b.neighborhood_code 
	), 
	sqm_buurt AS (
		SELECT neighborhood_code, ST_Transform(geometry, 28992) AS geometry, SUM(sqm) AS sqm 
		FROM sqm_pand_2022geom
		GROUP BY neighborhood_code, geometry 
	), 

	-- convert sqm_buurt_2022geom to sqm_wijk 
	sqm_buurt_withwijk AS (
		SELECT a.sqm, b.wk_code, b.geom AS wijk_geom
		FROM sqm_buurt a
		JOIN LATERAL (
			SELECT b.wk_code, b.geom, ST_Area(ST_Intersection(a.geometry, b.geom)) AS intersection_area
			FROM wijk_almere b
			WHERE ST_Intersects(a.geometry, b.geom)
			ORDER BY intersection_area DESC
			LIMIT 1
		) AS b ON true
	), 
	sqm_wijk AS (
		SELECT wk_code, wijk_geom, SUM(sqm) AS sqm
		FROM sqm_buurt_withwijk
		GROUP BY wk_code, wijk_geom
	), 
	wijk_stats AS (
		SELECT a.*, b.n_homes, 
			CASE 
				WHEN n_homes <= 0 THEN 0 
				ELSE ROUND(a.sqm / b.n_homes) 
			END AS sqm_per_home, 
			b.gas_m3, b.elec_kwh
		FROM sqm_wijk a 
		JOIN nhomes_wijk b 
		ON a.wk_code = b.wk_code 
	), 


	-- attribute wijk level energy use data to buildings 
	construction_municipality AS ( -- all construction activity (except for demolition) in year
		SELECT id_pand, 
			CASE 
				WHEN status = 'Pand gesloopt' THEN LEFT(registration_start, 4)::INTEGER
				WHEN status != 'Pand gesloopt' AND registration_end IS NOT NULL THEN LEFT(registration_end, 4)::INTEGER
				ELSE LEFT(registration_start, 4)::INTEGER
			END AS year, 
			status, sqm, geom, geom_28992, neighborhood_code, municipality
		FROM housing_nl
		WHERE municipality = 'Amsterdam'
			AND ahn_version IS NULL
	), 
	construction_sample AS (
		SELECT * FROM construction_municipality 
		WHERE year <= {year}
	), 
	inuse_sample AS (
		SELECT {year} AS year, * 
		FROM sqm_pand_2022geom
	), 
	inuse_lowenergy AS (
		SELECT 
			b.id_pand, b.year, 'Pand in gebruik - low energy' AS status, b.sqm, b.neighborhood_code
		FROM construction_sample a 
		LEFT JOIN inuse_sample b 
		ON a.id_pand = b.id_pand
		WHERE a.status != 'Pand gesloopt'
			AND b.id_pand IS NOT NULL
	), 
	inuse_normalenergy AS (
		SELECT b.id_pand, b.year, b.status, b.sqm, b.neighborhood_code
		FROM construction_sample a 
		RIGHT JOIN inuse_sample b 
		ON a.id_pand = b.id_pand
		WHERE a.id_pand IS NULL 
	), 
	buildings_all AS (
		-- all construction / renovation / transformation / demolition activity in year
		SELECT id_pand, year, status, sqm, neighborhood_code 
		FROM construction_sample 
		WHERE year = {year}
		
		UNION ALL 
		
		-- low energy in use buildings in year
		SELECT * FROM inuse_lowenergy
		
		UNION ALL 
		
		-- non-low energy in use buildings in year
		SELECT * FROM inuse_normalenergy
	), 

	-- create neighborhood_code to wk_code key
	neighborhoods AS (
		SELECT DISTINCT ON (neighborhood_code) neighborhood_code, ST_Transform(geometry, 28992) AS geometry 
		FROM inuse_sample
	), 
	neighborhood_wijk_key AS (
		SELECT 
			a.neighborhood_code, a.geometry AS neighborhood_geom, 
			b.wk_code AS wijk_code, b.geom AS wijk_geom, 
			b.municipality
		FROM neighborhoods a 
		LEFT JOIN wijk_almere b 
		ON a.geometry && b.geom
			AND ST_Within(a.geometry, ST_Buffer(b.geom, 200))
	), 

	buildings_with_wijk_code AS (
		SELECT a.*, b.wijk_code, b.municipality
		FROM buildings_all a 
		LEFT JOIN neighborhood_wijk_key b 
		ON a.neighborhood_code = b.neighborhood_code 
	), 
	energy_use_per_building AS (
		SELECT a.id_pand, a.year, a.status, a.sqm,
			CASE 
				WHEN status IN ('Pand in gebruik', 'Pand in gebruik - low energy') THEN ROUND(a.sqm / b.sqm * b.gas_m3)
				ELSE 0
			END AS gas_m3_s0,
			CASE
				WHEN status = 'Pand in gebruik' THEN ROUND(a.sqm / b.sqm * b.gas_m3)
				WHEN status = 'Pand in gebruik - low energy' THEN a.sqm * 5
				ELSE 0 
			END AS gas_m3_s1,
			CASE 
				WHEN status IN ('Pand in gebruik', 'Pand in gebruik - low energy') THEN ROUND(a.sqm / b.sqm * b.elec_kwh) 
				ELSE 0 
			END AS electricity_kwh_s0, 
			CASE 
				WHEN status IN ('Pand in gebruik', 'Pand in gebruik - low energy') THEN ROUND(a.sqm / b.sqm * b.elec_kwh) 
				ELSE 0 
			END AS electricity_kwh_s1, 
			b.wk_code, b.wijk_geom
		FROM buildings_with_wijk_code a 
		JOIN wijk_stats b 
		ON a.wijk_code = b.wk_code
	), 
	emissions_per_building AS (
		SELECT id_pand, year, status, sqm, 
			ROUND(gas_m3_s0 * 1.9 + electricity_kwh_s0 * 0.45) AS operational_kg_s0, 
			CASE 
				WHEN status IN ('transformation - adding units', 'transformation - function change', 
								'renovation - pre2020', 'renovation - post2020') THEN sqm * 126
				WHEN status = 'Bouw gestart' THEN sqm * 325
				-- WHEN status = 'Pand gesloopt' THEN sqm * 77
				ELSE 0 
			END AS embodied_kg_s0, 
			
			ROUND(gas_m3_s1 * 1.9 + electricity_kwh_s1 * 0.45) AS operational_kg_s1, 
			CASE 
				WHEN status IN ('transformation - adding units', 'transformation - function change', 
								'renovation - pre2020', 'renovation - post2020') THEN sqm * 200
				WHEN status = 'Bouw gestart' THEN sqm * 550
				-- WHEN status = 'Pand gesloopt' THEN sqm * 77
				ELSE 0 
			END AS embodied_kg_s1, 
			wk_code, wijk_geom
		FROM energy_use_per_building 
	)
	'''
	query = ''' 
	SELECT wk_code, wijk_geom, 
		SUM(sqm) AS sqm, 
		SUM(operational_kg_s0) AS operational_kg_s0, SUM(operational_kg_s1) AS operational_kg_s1, 
		SUM(embodied_kg_s0) AS embodied_kg_s0, SUM(embodied_kg_s1) AS embodied_kg_s1
	FROM emissions_per_building
	GROUP BY wk_code, wijk_geom
	'''

	emissions_wijk = pd.read_sql_query(base_query + query, engine)
	emissions_wijk['year'] = year

	emissions_all_years = pd.concat([emissions_all_years, emissions_wijk])

processing year 2012
processing year 2013
processing year 2014
processing year 2015
processing year 2016
processing year 2017
processing year 2018
processing year 2019
processing year 2020
processing year 2021


In [21]:
total_kg_s0_sum = df['total_kg_s0'].sum()
total_kg_s1_sum = df['total_kg_s1'].sum()

# Creating a summary DataFrame for the bar chart
summary_df = pd.DataFrame({
    'Scenario': ['total_kg_s0', 'total_kg_s1'],
    'Total KG': [total_kg_s0_sum, total_kg_s1_sum]
})

# Creating the bar chart with Plotly Express
fig = px.bar(summary_df, x='Scenario', y='Total KG', 
             title=f"Total emissions Comparison for S0 and S1 (difference={round((total_kg_s1_sum - total_kg_s0_sum)/1000000)} million kgCO2)",
             labels={'Total KG': 'Total KG', 'Scenario': 'Scenario'})
fig.update_layout(width=800)
fig.show()

In [12]:
df = emissions_all_years.copy()
df = df.groupby('year').sum().reset_index()
df = df[['year', 'sqm', 'operational_kg_s0', 'operational_kg_s1', 'embodied_kg_s0', 'embodied_kg_s1']]
df['total_kg_s0'] = df['operational_kg_s0'] + df['embodied_kg_s0']
df['total_kg_s1'] = df['operational_kg_s1'] + df['embodied_kg_s1']

# df long 
df_long = df.melt(id_vars=['year'], value_vars=['operational_kg_s0', 'operational_kg_s1', 'embodied_kg_s0', 'embodied_kg_s1', 'total_kg_s0', 'total_kg_s1'], 
                   var_name='emission_type', value_name='emissions_kg')

# total emissions 
df_long_total = df_long[df_long.emission_type.str.startswith('total')]
fig = px.bar(df_long_total, x='year', y='emissions_kg', color='emission_type', barmode='group',
             title='Total Housing Emissions in Amsterdam S0 vs S1')
fig.update_layout(width=800)
fig.show()

# operational emissions 
df_long_operational = df_long[df_long.emission_type.str.startswith('operational')]
fig = px.bar(df_long_operational, x='year', y='emissions_kg', color='emission_type', barmode='group', 
             title='Operational Housing Emissions in Amsterdam S0 vs S1')
fig.update_layout(width=800)
fig.show()

# embodied emissions
df_long_embodied = df_long[df_long.emission_type.str.startswith('embodied')]
fig = px.bar(df_long_embodied, x='year', y='emissions_kg', color='emission_type', barmode='group',
             title='Embodied Housing Emissions in Amsterdam S0 vs S1')
fig.update_layout(width=800)
fig.show()




In [23]:
query = f''' 
WITH construction_municipality AS ( -- all construction activity (except for demolition) in year
    SELECT id_pand, 
        CASE 
            WHEN status = 'Pand gesloopt' THEN LEFT(registration_start, 4)::INTEGER
            WHEN status != 'Pand gesloopt' AND registration_end IS NOT NULL THEN LEFT(registration_end, 4)::INTEGER
            ELSE LEFT(registration_start, 4)::INTEGER
        END AS year, 
        status, sqm, geom, geom_28992, neighborhood_code, municipality
    FROM housing_nl
    WHERE municipality = 'Amsterdam'
        AND ahn_version IS NULL
)

SELECT year, status, SUM(sqm)::INTEGER AS sqm
FROM construction_municipality
WHERE year >= 2012 AND year <= 2021 -- AND status != 'Pand gesloopt'
GROUP BY year, status
ORDER BY year, status
'''

df = pd.read_sql_query(query, engine)
def changeName(x): 
    if x.startswith('renovation'): 
        return 'renovation'
    elif x.startswith('transformation'): 
        return 'transformation'
    elif x.startswith('Bouw'):
        return 'construction'
    elif x.startswith('Pand'):
        return 'demolition'
    else: 
        return x
df.status = df.status.map(lambda x: changeName(x))

fig = px.bar(df, x='year', y='sqm', color='status', title='Construction Activity in Amsterdam 2012-2021')
fig.update_layout(width=800)
fig.show()
