In [2]:
import os
import ijson
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas.tools import sjoin

In [None]:
#Script that joins VUR to land parcels with 'samletFast': Samlet Fast Ejendom, SFE
#Sums the ejendomværdibeløb and grundværdibeløb and counts how many times they were aggregated

# Directories
json_dir = r'D:\FOLDER FROM THESIS\THESIS\Data\VUR\VUR_MUNI'
shapefile_dir = r'D:\FOLDER FROM THESIS\THESIS\Processed data\Jordstykke_municipalities'
output_dir = r'D:\FOLDER FROM THESIS\THESIS\Processed data\VUR_MAT_MUNI2'

# Loop through each municipality
for json_file in os.listdir(json_dir):
    if not json_file.endswith('.json'):
        continue

    kommune_code = json_file[:4]
    shapefile_path = os.path.join(shapefile_dir, f"{kommune_code}.shp")

    if not os.path.exists(shapefile_path):
        print(f"Skipping {kommune_code}: Shapefile not found.")
        continue

    json_file_path = os.path.join(json_dir, json_file)
    print(f"Processing {kommune_code}")

    # Parsing JSON to create ejendomsvurdering_dict and property_dict
    ejendomsvurdering_dict = {}
    with open(json_file_path, 'r', encoding='utf-8') as f:
        objects = ijson.items(f, 'item')
        for entry in objects:
            id_ = entry['id']
            ejendomsvurdering_dict[id_] = {
                'ejendomværdiBeløb': entry.get('ejendomværdiBeløb', entry.get('ejendomvÃ¦rdiBelÃ¸b', None)),
                'grundværdiBeløb': entry.get('grundværdiBeløb', entry.get('grundvÃ¦rdiBelÃ¸b', None))
            }

    # Mapping BFEnummer to EjendomsvurderingID
    property_dict = {}
    with open(json_file_path, 'r', encoding='utf-8') as f:
        objects = ijson.items(f, 'item')
        for entry in objects:
            id_ = entry['id']
            for bfenummer_entry in entry.get("BFEnummerList", []):
                bfenummer = bfenummer_entry if isinstance(bfenummer_entry, int) else bfenummer_entry["BFEnummer"]
                property_dict.setdefault(bfenummer, {'ejendomværdiBeløb': 0, 'grundværdiBeløb': 0, 'count': 0})
                property_dict[bfenummer]['ejendomværdiBeløb'] += ejendomsvurdering_dict.get(id_, {}).get('ejendomværdiBeløb', 0)
                property_dict[bfenummer]['grundværdiBeløb'] += ejendomsvurdering_dict.get(id_, {}).get('grundværdiBeløb', 0)
                property_dict[bfenummer]['count'] += 1  # Increment the count

    print("Parsed JSON file")

    # Reading shapefile
    gdf = gpd.read_file(shapefile_path)
    print("Read shapefile")

    # Joining based on samletFast 
    gdf['samletFast'] = gdf['samletFast'].astype(int)
    gdf['ejendomværdiBeløb'] = gdf['samletFast'].map(lambda x: property_dict.get(x, {}).get('ejendomværdiBeløb', None))
    gdf['grundværdiBeløb'] = gdf['samletFast'].map(lambda x: property_dict.get(x, {}).get('grundværdiBeløb', None))
    gdf['count'] = gdf['samletFast'].map(lambda x: property_dict.get(x, {}).get('count', 0))

    # Rename the columns to be under 10 characters for shapefile compatibility
    gdf.rename(columns={
        'ejendomværdiBeløb': 'e_value',
        'grundværdiBeløb': 'g_value'
    }, inplace=True)

    print("Joined data")

    # Writing to new shapefile
    output_path = os.path.join(output_dir, f"{kommune_code}.shp")
    gdf.to_file(output_path)
    print(f"Saved to {output_path}")

In [None]:
#Script that joins VUR to BPFG

# Directories
json_dir = r'D:\FOLDER FROM THESIS\THESIS\Data\VUR\VUR_MUNI'
shapefile_path = r'D:\FOLDER FROM THESIS\THESIS\Data\Ejendom API\BPFG\BPFG.shp'
output_dir = r'D:\FOLDER FROM THESIS\THESIS\Processed data\BPFG VUR'

# Create output dir if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Initialize empty property_dict
property_dict = {}

# Loop through each municipality's JSON
for json_file in os.listdir(json_dir):
    if not json_file.endswith('.json'):
        continue
    
    kommune_code = json_file[:4]
    json_file_path = os.path.join(json_dir, json_file)
    
    print(f"Processing JSON for {kommune_code}")

    # Parsing JSON to update property_dict
    with open(json_file_path, 'r', encoding='utf-8') as f:
        objects = ijson.items(f, 'item')
        for entry in objects:
            id_ = entry['id']
            for bfenummer_entry in entry.get("BFEnummerList", []):
                bfenummer = bfenummer_entry if isinstance(bfenummer_entry, int) else bfenummer_entry["BFEnummer"]
                property_dict.setdefault(bfenummer, {'ejendomværdiBeløb': 0, 'grundværdiBeløb': 0})
                property_dict[bfenummer]['ejendomværdiBeløb'] += entry.get('ejendomværdiBeløb', 0)
                property_dict[bfenummer]['grundværdiBeløb'] += entry.get('grundværdiBeløb', 0)

print("Finished processing all JSON files.")

# Reading the single BPFG shapefile
gdf = gpd.read_file(shapefile_path)
print("Read shapefile")

# Joining based on samletFast
gdf['samletFast'] = gdf['samletFast'].apply(lambda x: int(x) if pd.notnull(x) else None)
gdf['ejendomværdiBeløb'] = gdf['samletFast'].map(lambda x: property_dict.get(x, {}).get('ejendomværdiBeløb', None))
gdf['grundværdiBeløb'] = gdf['samletFast'].map(lambda x: property_dict.get(x, {}).get('grundværdiBeløb', None))

# Rename the columns for shapefile compatibility
gdf.rename(columns={'ejendomværdiBeløb': 'e_value', 'grundværdiBeløb': 'g_value'}, inplace=True)

print("Joined data")

# Writing to new shapefile
output_path = os.path.join(output_dir, 'BPFG_with_VUR.shp')
gdf.to_file(output_path)
print(f"Saved to {output_path}")

In [None]:
#Script that joins VUR to Ejerlejlighed
#The condominiums were polygons and the data was afterwards made into point centroids in QGIS

# Directories
json_dir = r'D:\FOLDER FROM THESIS\THESIS\Data\VUR\VUR_MUNI'
shapefile_path = r'D:\FOLDER FROM THESIS\THESIS\Data\Ejendom API\Ejerlejlighed\Ejerlejlighed.shp'
output_dir = r'D:\FOLDER FROM THESIS\THESIS\Processed data\Ejerlejlighed VUR'

# Initialize empty property_dict
property_dict = {}

# Loop through each municipality's JSON
for json_file in os.listdir(json_dir):
    if not json_file.endswith('.json'):
        continue
    
    kommune_code = json_file[:4]
    json_file_path = os.path.join(json_dir, json_file)
    
    print(f"Processing JSON for {kommune_code}")

    # Parsing JSON to update property_dict
    with open(json_file_path, 'r', encoding='utf-8') as f:
        objects = ijson.items(f, 'item')
        for entry in objects:
            id_ = entry['id']
            for bfenummer_entry in entry.get("BFEnummerList", []):
                bfenummer = bfenummer_entry if isinstance(bfenummer_entry, int) else bfenummer_entry["BFEnummer"]
                property_dict.setdefault(bfenummer, {'ejendomværdiBeløb': 0, 'grundværdiBeløb': 0})
                property_dict[bfenummer]['ejendomværdiBeløb'] += entry.get('ejendomværdiBeløb', 0)
                property_dict[bfenummer]['grundværdiBeløb'] += entry.get('grundværdiBeløb', 0)

print("Finished processing all JSON files.")

# Reading the single Ejerlejlighed shapefile
gdf = gpd.read_file(shapefile_path)
print("Read shapefile")

# Joining based on samletFast
gdf['samletFast'] = gdf['samletFast'].apply(lambda x: int(x) if pd.notnull(x) else None)
gdf['ejendomværdiBeløb'] = gdf['samletFast'].map(lambda x: property_dict.get(x, {}).get('ejendomværdiBeløb', None))
gdf['grundværdiBeløb'] = gdf['samletFast'].map(lambda x: property_dict.get(x, {}).get('grundværdiBeløb', None))

# Rename the columns for shapefile compatibility
gdf.rename(columns={'ejendomværdiBeløb': 'e_value', 'grundværdiBeløb': 'g_value'}, inplace=True)

print("Joined data")

# Writing to new shapefile
output_path = os.path.join(output_dir, 'Ejerlejlighed_with_VUR.shp')
gdf.to_file(output_path)
print(f"Saved to {output_path}")

In [None]:
#Ejerlejlighed and BPFG
#Script that makes aggregated values for 'ejendomsværdibeløb' and 'grunværdibeløb' (within each land parcel) 

#Load the datasets
polygon_path = r'D:\FOLDER FROM THESIS\THESIS\Data\Matrikel\Jordstykke\MAT_jordstykke_DK.shp'
point1_path = r'D:\FOLDER FROM THESIS\THESIS\Processed data\Ejerlejlighed VUR\Ejerlejlighed_centroids_VUR.shp'
point2_path = r'D:\FOLDER FROM THESIS\THESIS\Processed data\BPFG VUR\BPFG_with_VUR.shp'

polygon_gdf = gpd.read_file(polygon_path)
point1_gdf = gpd.read_file(point1_path)
point2_gdf = gpd.read_file(point2_path)

# Generate spatial indices
polygon_gdf.sindex
point1_gdf.sindex
point2_gdf.sindex

# Perform spatial joins
joined1 = sjoin(point1_gdf, polygon_gdf, how='inner', op='within')
joined2 = sjoin(point2_gdf, polygon_gdf, how='inner', op='within')

# Aggregate the data
aggregated1 = joined1.groupby('index_right').agg({'e_value': 'sum', 'g_value': 'sum'})
aggregated2 = joined2.groupby('index_right').agg({'e_value': 'sum', 'g_value': 'sum'})

# Combine aggregated results
combined_aggregated = aggregated1.add(aggregated2, fill_value=0)

# Update the original polygon GeoDataFrame
polygon_gdf['e_value'] = 0
polygon_gdf['g_value'] = 0
polygon_gdf.loc[combined_aggregated.index, 'e_value'] = combined_aggregated['e_value']
polygon_gdf.loc[combined_aggregated.index, 'g_value'] = combined_aggregated['g_value']

# Save the updated GeoDataFrame
output_path = r'D:\FOLDER FROM THESIS\THESIS\Processed data\VUR_MAT_MUNI_ejerBPFG\updated_MAT_jordstykke_DK2.shp'
polygon_gdf.to_file(output_path)

#A 'count' field of points within each polygon was created afterwards in QGIS (the file was named e_g_c_MAT_jordstykke_DK)

In [None]:
#Seperating e_g_c_MAT_jordstykke_DK (BPFG and Ejerlejlighed) into municipalities
 
# Load the jordstykke shapefile
input_shapefile_path = r'D:\FOLDER FROM THESIS\THESIS\Processed data\VUR_MAT_MUNI_ejerBPFG\e_g_c_MAT_jordstykke_DK.shp'
gdf = gpd.read_file(input_shapefile_path)

# Output directory
output_dir = r'D:\FOLDER FROM THESIS\THESIS\Processed data\ejerBPFG_MUNI'

# Get unique 'kommunekod' values
unique_kommunekod = gdf['kommunekod'].unique()

# Loop through each unique 'kommunekod' and save the subset as a new shapefile
for k in unique_kommunekod:
    subset_gdf = gdf[gdf['kommunekod'] == k]
    output_path = os.path.join(output_dir, f"{k}.shp")
    subset_gdf.to_file(output_path)
    print(f"Saved subset shapefile for kommunekod {k} to {output_path}")

In [None]:
#Separating the land parcel data (MAT) into municipalities based on 'kommunekod'

# Load the jordstykke shapefile
jordstykke_path = 'D:\\FOLDER FROM THESIS\\THESIS\\Data\\Matrikel\\Jordstykke\\MAT_jordstykke_DK.shp'
jordstykke_gdf = gpd.read_file(jordstykke_path)

# Output directory
output_dir = 'D:\\FOLDER FROM THESIS\\THESIS\\Processed data\\Jordstykke_municipalities'

# Check if the output directory exists; if not, create it
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Loop through each unique 'kommunekod'
for code in jordstykke_gdf['kommunekod'].unique():
    # Filter the GeoDataFrame based on 'kommunekod'
    subset_gdf = jordstykke_gdf[jordstykke_gdf['kommunekod'] == code]
    
    # Skip if no records are found for the current 'kommunekod'
    if subset_gdf.empty:
        print(f"Skipping empty subset for kommunekod {code}")
        continue
    
    # Define the output path
    output_path = os.path.join(output_dir, f"{code}.shp")
    
    # Save the subset GeoDataFrame to a new shapefile
    subset_gdf.to_file(output_path, crs="EPSG:25832")
    
    print(f"Saved shapefile for kommunekod {code}")

In [None]:
#Script that merges VUR data for the 3 kinds of BFE-IDs by summing the values for EjendomværdiBeløb (e_value), 
#grundværdiBeløb (g_value) and count

# Define directories
folder1 = r'D:\FOLDER FROM THESIS\THESIS\Processed data\ejerBPFG_MUNI'
folder2 = r'D:\FOLDER FROM THESIS\THESIS\Processed data\VUR_MAT_MUNI2'
output_folder = r'D:\FOLDER FROM THESIS\THESIS\Processed data\SFE_EJER_BPFG_MUNI'

# Create output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Loop through each shapefile in folder1
for filename in os.listdir(folder1):
    if filename.endswith(".shp"):
        filepath1 = os.path.join(folder1, filename)
        filepath2 = os.path.join(folder2, filename)

        # Read shapefiles
        gdf1 = gpd.read_file(filepath1)
        gdf2 = gpd.read_file(filepath2)

        # Merge GeoDataFrames based on 'id.lokalId'
        merged_gdf = gdf1.merge(gdf2[['id.lokalId', 'e_value', 'g_value', 'count']], on='id.lokalId', suffixes=('', '_new'))

        # Sum the original and new columns
        merged_gdf['e_value'] += merged_gdf['e_value_new']
        merged_gdf['g_value'] += merged_gdf['g_value_new']
        merged_gdf['count'] += merged_gdf['count_new']

        # Keep only the desired columns and maintain as GeoDataFrame
        final_gdf = gpd.GeoDataFrame(merged_gdf[['id.lokalId', 'e_value', 'g_value', 'count', 'geometry']], geometry='geometry')

        # Save to new shapefile
        output_path = os.path.join(output_folder, filename)
        final_gdf.to_file(output_path)
        print(f"Saved combined data to {output_path}")

In [78]:
# Define directory and output path
input_folder = r'D:\FOLDER FROM THESIS\THESIS\Processed data\SFE_EJER_BPFG_MUNI'
output_file = r'D:\FOLDER FROM THESIS\THESIS\Processed data\VUR_DK\VUR_Total_DK.shp'

# Create a list to store each processed GeoDataFrame
all_gdfs = []

# Loop through each file in the directory
for filename in os.listdir(input_folder):
    if filename.endswith('.shp'):
        filepath = os.path.join(input_folder, filename)
        
        # Read the shapefile into a GeoDataFrame
        gdf = gpd.read_file(filepath)
        
        # Create a new 'building' field
        gdf['building'] = gdf['e_value'] - gdf['g_value']
        
        # Create a new 'b_div_c' field
        gdf['b_div_c'] = gdf['building'] / gdf['count']
        
        # Append to list of all GeoDataFrames
        all_gdfs.append(gdf)

# Concatenate all GeoDataFrames into one
final_gdf = pd.concat(all_gdfs, ignore_index=True)

# Convert it back to a GeoDataFrame if needed
final_gdf = gpd.GeoDataFrame(final_gdf, geometry='geometry')

# Save to new shapefile
final_gdf.to_file(output_file)
