In [1]:
import pandas as pd
import math
import numpy as np
import os
import rasterio

In [None]:
base_folder = r"../dataset"
csv_path = r"../dataset/all_patch_summaries.csv"

df = pd.read_csv(csv_path)
df['C_Tram'] = df['C_Tram'].astype(str)

# Store masked mean values
tmrt_building_means = {}

# Process folders 
for folder_name in os.listdir(base_folder):
    folder_path = os.path.join(base_folder, folder_name)

    if os.path.isdir(folder_path) and "_" in folder_name:
        c_tram = folder_name.split("_")[-1]

        tmrt_path = os.path.join(folder_path, "Tmrt_average.tif")
        bld_path = os.path.join(folder_path, "buildings.tif")

        if os.path.exists(tmrt_path) and os.path.exists(bld_path):
            try:
                with rasterio.open(tmrt_path) as tmrt_src, rasterio.open(bld_path) as bld_src:
                    tmrt = tmrt_src.read(1).astype(np.float32)
                    bld = bld_src.read(1)

                    if tmrt.shape != bld.shape:
                        print(f" Shape mismatch in {folder_name}, skipping.")
                        continue

                    # Apply mask: buildings == 1
                    mask = (bld == 1)
                    tmrt_masked = np.where(mask, tmrt, np.nan)

                    # Remove NoData pixels
                    tmrt_masked = np.where(tmrt == tmrt_src.nodata, np.nan, tmrt_masked)

                    mean_val = np.nanmean(tmrt_masked)
                    tmrt_building_means[c_tram] = round(float(mean_val), 2)

            except Exception as e:
                print(f" Error in {folder_name}: {e}")

# Merge into main CSV
df['Tmrt_Buildings_Mean'] = df['C_Tram'].map(tmrt_building_means)

df.to_csv(csv_path, index=False, encoding='utf-8-sig')

print(" Done. Column 'Tmrt_Buildings_Mean' added.")
print(df[['C_Tram', 'Tmrt_Buildings_Mean']].head())

Create a unique dataset for tree

In [None]:
left_file = r"../dataset/Street_Trees_Left.csv"
right_file = r"../dataset/Street_Trees_Right.csv"
output_file = r"../dataset/Merged_Street_Trees.csv"

df_left = pd.read_csv(left_file)
df_right = pd.read_csv(right_file)

df_left.columns = df_left.columns.str.strip()
df_right.columns = df_right.columns.str.strip()

df_all = pd.concat([df_left, df_right], ignore_index=True)

#Group by C_Tram and aggregate
grouped = df_all.groupby('C_Tram').agg(
    Number_of_Trees=('source_pke', 'count'),
    Mean_Tree_Height=('height', 'mean'),
    Mean_Tree_Circumfere=('circumfere', 'mean')
).reset_index()

# Round numeric columns
grouped['Mean_Tree_Height'] = grouped['Mean_Tree_Height'].round(2)
grouped['Mean_Tree_Circumfere'] = grouped['Mean_Tree_Circumfere'].round(2)

# Calculate total canopy area
canopy_area_per_tree = math.pi * (2 ** 2)
grouped['Total_Canopy_Area_m2'] = (grouped['Number_of_Trees'] * canopy_area_per_tree).round(2)

grouped.to_csv(output_file, index=False, encoding='utf-8-sig')

print(f" Merged tree data saved to:\n{output_file}")


Add building height and tree

In [None]:
street_file = r"../dataset/BCN_GrafVial_Trams_ETRS89_CSV.csv"
height_file = r"../dataset/Road_BuildingHeight.csv"
buffer_file = r"../dataset/BufferStreet_Distance.csv"
output_file = r"../dataset/BCN_GrafVial_MOD_with_features.csv"

df = pd.read_csv(street_file, encoding='latin1', delimiter=';')

#Load building height data
df_heights = pd.read_csv(height_file)
df_heights_avg = df_heights.groupby('C_Tram', as_index=False).agg({
    'BuildingHeight_Right': 'mean',
    'BuildingHeight_Left': 'mean'
})

# Merge height into street data
df = df.merge(df_heights_avg, on='C_Tram', how='left')

# Compute mean height
df['BuildingHeight_Right'] = df['BuildingHeight_Right'].fillna(0.0).round(1)
df['BuildingHeight_Left'] = df['BuildingHeight_Left'].fillna(0.0).round(1)
df['BuildingHeight_Mean'] = df[['BuildingHeight_Right', 'BuildingHeight_Left']].mean(axis=1).round(1)

#  Load Street_Buffer data and merge
df_buffer = pd.read_csv(buffer_file)
df = df.merge(df_buffer[['C_Tram', 'Street_Buffer']], on='C_Tram', how='left')
df['Street_Buffer'] = df['Street_Buffer'].fillna(0.0).round(2)

# Load tree data and merge 
tree_file = r"../dataset/Merged_Street_Trees.csv"
df_trees = pd.read_csv(tree_file)

df = df.merge(df_trees, on='C_Tram', how='left')

# Fill NaNs for numeric tree fields
df['Number_of_Trees'] = df['Number_of_Trees'].fillna(0).astype(int)
df['Mean_Tree_Height'] = df['Mean_Tree_Height'].fillna(0.0).round(2)
df['Mean_Tree_Circumfere'] = df['Mean_Tree_Circumfere'].fillna(0.0).round(2)
df['Total_Canopy_Area_m2'] = df['Total_Canopy_Area_m2'].fillna(0.0).round(2)


# Save final merged dataset
df.to_csv(output_file, index=False, encoding='utf-8-sig')

print(" Final merged dataset with heights, buffer, and trees.\n")
print(" Shape of dataset:", df.shape)

print("\n Column names:")
print(df.columns.tolist())

print("\n First 5 rows:")
print(df[['C_Tram', 
          'BuildingHeight_Right', 
          'BuildingHeight_Left', 
          'BuildingHeight_Mean', 
          'Street_Buffer', 
          'Number_of_Trees', 
          'Mean_Tree_Height', 
          'Total_Canopy_Area_m2']].head())

In [None]:
# strip column names to avoid whitespace issues
df.columns = df.columns.str.strip()

# Check if ANGLE exists
print("Available columns:", df.columns.tolist())

# Now apply transformation (if 'ANGLE' exists)
if 'ANGLE' in df.columns:
    df['ANGLE_rad'] = np.deg2rad(df['ANGLE'])
    df['ANGLE_sin'] = np.sin(df['ANGLE_rad'])
    df['ANGLE_cos'] = np.cos(df['ANGLE_rad'])
    print("Orientation converted using sin/cos.")
else:
    print("Column 'ANGLE' not found in df_merged.")

output_path = r"../dataset/BCN_GrafVial_With_Orientation.csv"

df.to_csv(output_path, index=False, encoding='utf-8-sig')

print(f"File saved to:\n{output_path}")

In [None]:
tmrt_stats_path = r"../dataset/all_patch_summaries.csv"
bcn_csv_path = r"../dataset/BCN_GrafVial_With_Orientation.csv"
output_path = r"../dataset/BCN_GrafVial_With_Tmrt_graph.csv"

df_tmrt = pd.read_csv(tmrt_stats_path)
df_bcn = pd.read_csv(bcn_csv_path)

df_tmrt.columns = df_tmrt.columns.str.strip()
df_bcn.columns = df_bcn.columns.str.strip()

df_tmrt['C_Tram'] = df_tmrt['C_Tram'].astype(str).str.replace('tile_', '', regex=False).str.strip()
df_bcn['C_Tram'] = df_bcn['C_Tram'].astype(str).str.strip()

# Correct merge (do not drop C_Tram yet)
df_merged = df_bcn.merge(df_tmrt, on='C_Tram', how='left')

df_merged.to_csv(output_path, index=False, encoding='utf-8-sig')

print(f" Done. Merged Tmrt buffer stats into:\n{output_path}")


In [None]:
print("\n Dataset shape:", df_merged.shape)

print("\n Column names:")
print(df_merged.columns.tolist())

print("\n First 5 rows:")
print(df_merged.head())