In [28]:
import pandas as pd
import math
import numpy as np
import os
import rasterio

In [29]:
# --- Paths ---
base_folder = r"D:\patches_combined"
csv_path = r"D:\sampled_c_tram_ids.csv"

# --- Load CSV ---
df = pd.read_csv(csv_path)
df['C_Tram'] = df['C_Tram'].astype(str)

# --- Store masked mean values ---
tmrt_building_means = {}

# --- Process folders ---
for folder_name in os.listdir(base_folder):
    folder_path = os.path.join(base_folder, folder_name)

    if os.path.isdir(folder_path) and "_" in folder_name:
        c_tram = folder_name.split("_")[-1]

        tmrt_path = os.path.join(folder_path, "Tmrt_average.tif")
        bld_path = os.path.join(folder_path, "buildings.tif")

        if os.path.exists(tmrt_path) and os.path.exists(bld_path):
            try:
                with rasterio.open(tmrt_path) as tmrt_src, rasterio.open(bld_path) as bld_src:
                    tmrt = tmrt_src.read(1).astype(np.float32)
                    bld = bld_src.read(1)

                    if tmrt.shape != bld.shape:
                        print(f" Shape mismatch in {folder_name}, skipping.")
                        continue

                    # Apply mask: buildings == 1
                    mask = (bld == 1)
                    tmrt_masked = np.where(mask, tmrt, np.nan)

                    # Remove NoData pixels
                    tmrt_masked = np.where(tmrt == tmrt_src.nodata, np.nan, tmrt_masked)

                    mean_val = np.nanmean(tmrt_masked)
                    tmrt_building_means[c_tram] = round(float(mean_val), 2)

            except Exception as e:
                print(f" Error in {folder_name}: {e}")

# --- Merge into main CSV ---
df['Tmrt_Buildings_Mean'] = df['C_Tram'].map(tmrt_building_means)

# --- Save updated CSV ---
df.to_csv(csv_path, index=False, encoding='utf-8-sig')

print(" Done. Column 'Tmrt_Buildings_Mean' added.")
print(df[['C_Tram', 'Tmrt_Buildings_Mean']].head())

 Done. Column 'Tmrt_Buildings_Mean' added.
    C_Tram  Tmrt_Buildings_Mean
0  T03487V                29.85
1  T05716X                30.98
2  T08615F                30.56
3  T18975F                33.16
4  T18536M                30.15


In [30]:
# --- File paths ---
left_file = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\25-0902 BCN Data\Street_Trees_Left.csv"
right_file = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\25-0902 BCN Data\Street_Trees_Right.csv"
output_file = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\25-0902 BCN Data\Merged_Street_Trees.csv"

# --- Step 1: Load both datasets ---
df_left = pd.read_csv(left_file)
df_right = pd.read_csv(right_file)

# --- Clean column names ---
df_left.columns = df_left.columns.str.strip()
df_right.columns = df_right.columns.str.strip()

# --- Step 2: Concatenate both sides into one ---
df_all = pd.concat([df_left, df_right], ignore_index=True)

# --- Step 3: Group by C_Tram and aggregate (no Tree IDs) ---
grouped = df_all.groupby('C_Tram').agg(
    Number_of_Trees=('source_pke', 'count'),
    Mean_Tree_Height=('height', 'mean'),
    Mean_Tree_Circumfere=('circumfere', 'mean')
).reset_index()

# --- Step 4: Round numeric columns ---
grouped['Mean_Tree_Height'] = grouped['Mean_Tree_Height'].round(2)
grouped['Mean_Tree_Circumfere'] = grouped['Mean_Tree_Circumfere'].round(2)

# --- Step 5: Calculate total canopy area (assuming 4m diameter → radius = 2m) ---
canopy_area_per_tree = math.pi * (2 ** 2)  # ≈ 12.57 m² per tree
grouped['Total_Canopy_Area_m2'] = (grouped['Number_of_Trees'] * canopy_area_per_tree).round(2)

# --- Step 6: Save to CSV ---
grouped.to_csv(output_file, index=False, encoding='utf-8-sig')

print(f"✅ Merged tree data saved to:\n{output_file}")


✅ Merged tree data saved to:
C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\25-0902 BCN Data\Merged_Street_Trees.csv


In [31]:
# --- File Paths ---
street_file = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\BCN_GrafVial_CSV\BCN_GrafVial_Trams_ETRS89_CSV.csv"
height_file = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\25-0902 BCN Data\Road_BuildingHeight.csv"
buffer_file = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\25-0902 BCN Data\BufferStreet_Distance.csv"
output_file = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\BCN_GrafVial_MOD_with_features.csv"

# --- Step 1: Load original street data ---
df = pd.read_csv(street_file, encoding='latin1', delimiter=';')

# --- Step 2: Load building height data ---
df_heights = pd.read_csv(height_file)
df_heights_avg = df_heights.groupby('C_Tram', as_index=False).agg({
    'BuildingHeight_Right': 'mean',
    'BuildingHeight_Left': 'mean'
})

# --- Step 3: Merge height into street data ---
df = df.merge(df_heights_avg, on='C_Tram', how='left')

# --- Step 4: Compute mean height ---
df['BuildingHeight_Right'] = df['BuildingHeight_Right'].fillna(0.0).round(1)
df['BuildingHeight_Left'] = df['BuildingHeight_Left'].fillna(0.0).round(1)
df['BuildingHeight_Mean'] = df[['BuildingHeight_Right', 'BuildingHeight_Left']].mean(axis=1).round(1)

# --- Step 5: Load Street_Buffer data and merge ---
df_buffer = pd.read_csv(buffer_file)
df = df.merge(df_buffer[['C_Tram', 'Street_Buffer']], on='C_Tram', how='left')
df['Street_Buffer'] = df['Street_Buffer'].fillna(0.0).round(2)

# --- Step 6: Load tree data and merge ---
tree_file = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\25-0902 BCN Data\Merged_Street_Trees.csv"
df_trees = pd.read_csv(tree_file)

df = df.merge(df_trees, on='C_Tram', how='left')

# Fill NaNs for numeric tree fields
df['Number_of_Trees'] = df['Number_of_Trees'].fillna(0).astype(int)
df['Mean_Tree_Height'] = df['Mean_Tree_Height'].fillna(0.0).round(2)
df['Mean_Tree_Circumfere'] = df['Mean_Tree_Circumfere'].fillna(0.0).round(2)
df['Total_Canopy_Area_m2'] = df['Total_Canopy_Area_m2'].fillna(0.0).round(2)


# --- (Step 7 updated) Save final merged dataset ---
df.to_csv(output_file, index=False, encoding='utf-8-sig')

# --- Step 8: Print overview ---
print(" Final merged dataset with heights, buffer, and trees.\n")
print(" Shape of dataset:", df.shape)

print("\n Column names:")
print(df.columns.tolist())

print("\n First 5 rows:")
print(df[['C_Tram', 
          'BuildingHeight_Right', 
          'BuildingHeight_Left', 
          'BuildingHeight_Mean', 
          'Street_Buffer', 
          'Number_of_Trees', 
          'Mean_Tree_Height', 
          'Total_Canopy_Area_m2']].head())

 Final merged dataset with heights, buffer, and trees.

 Shape of dataset: (14831, 30)

 Column names:
['FID', 'COORD_X', 'COORD_Y', 'LONGITUD', 'ANGLE', 'C_Tram', 'C_Nus_I', 'C_Nus_F', 'Distric_D', 'NDistric_D', 'Illa_D', 'CVia_D', 'TVia_D', 'NVia_D', 'Tram_Dret', 'Distric_E', 'NDistric_E', 'Illa_E', 'CVia_E', 'TVia_E', 'NVia_E', 'Tram_Esq', 'BuildingHeight_Right', 'BuildingHeight_Left', 'BuildingHeight_Mean', 'Street_Buffer', 'Number_of_Trees', 'Mean_Tree_Height', 'Mean_Tree_Circumfere', 'Total_Canopy_Area_m2']

 First 5 rows:
    C_Tram  BuildingHeight_Right  BuildingHeight_Left  BuildingHeight_Mean  \
0  T00001B                  29.0                 18.2                 23.6   
1  T00002C                  20.0                 37.0                 28.5   
2  T00003D                  20.0                 15.8                 17.9   
3  T00004E                  16.0                 20.1                 18.0   
4  T00005F                  20.5                 29.6                 25.0 

In [32]:
# strip column names to avoid whitespace issues
df.columns = df.columns.str.strip()

# Check if ANGLE exists
print("Available columns:", df.columns.tolist())

# Now apply transformation (if 'ANGLE' exists)
if 'ANGLE' in df.columns:
    df['ANGLE_rad'] = np.deg2rad(df['ANGLE'])
    df['ANGLE_sin'] = np.sin(df['ANGLE_rad'])
    df['ANGLE_cos'] = np.cos(df['ANGLE_rad'])
    print("Orientation converted using sin/cos.")
else:
    print("Column 'ANGLE' not found in df_merged.")

output_path = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\BCN_GrafVial_With_Orientation.csv"

# Save with UTF-8 encoding and no index column
df.to_csv(output_path, index=False, encoding='utf-8-sig')

print(f"File saved to:\n{output_path}")

Available columns: ['FID', 'COORD_X', 'COORD_Y', 'LONGITUD', 'ANGLE', 'C_Tram', 'C_Nus_I', 'C_Nus_F', 'Distric_D', 'NDistric_D', 'Illa_D', 'CVia_D', 'TVia_D', 'NVia_D', 'Tram_Dret', 'Distric_E', 'NDistric_E', 'Illa_E', 'CVia_E', 'TVia_E', 'NVia_E', 'Tram_Esq', 'BuildingHeight_Right', 'BuildingHeight_Left', 'BuildingHeight_Mean', 'Street_Buffer', 'Number_of_Trees', 'Mean_Tree_Height', 'Mean_Tree_Circumfere', 'Total_Canopy_Area_m2']
Orientation converted using sin/cos.
File saved to:
C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\BCN_GrafVial_With_Orientation.csv


In [16]:
"""import pandas as pd

# --- File Paths ---
full_data_path = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\BCN_GrafVial_MOD_with_features.csv"
sampled_ids_path = r"D:\sampled_c_tram_ids.csv"
matched_output = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\matched_trams.csv"
unmatched_output = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\unmatched_trams.csv"

# --- Load data ---
df_full = pd.read_csv(full_data_path)
df_sampled = pd.read_csv(sampled_ids_path)

# Ensure C_Tram is string for matching
df_full['C_Tram'] = df_full['C_Tram'].astype(str)
df_sampled['C_Tram'] = df_sampled['C_Tram'].astype(str)

# --- Split ---
df_matched = df_full[df_full['C_Tram'].isin(df_sampled['C_Tram'])]
df_unmatched = df_full[~df_full['C_Tram'].isin(df_sampled['C_Tram'])]

# --- Save ---
df_matched.to_csv(matched_output, index=False, encoding='utf-8-sig')
df_unmatched.to_csv(unmatched_output, index=False, encoding='utf-8-sig')

print(" Split complete.")
print(f" Matched rows: {df_matched.shape[0]}")
print(f" Unmatched rows: {df_unmatched.shape[0]}")"""


✅ Split complete.
🟢 Matched rows: 2000
🔴 Unmatched rows: 12831


In [33]:
# --- File paths ---
main_csv = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\BCN_GrafVial_With_Orientation.csv"
tmrt_values_csv = r"D:\sampled_c_tram_ids.csv"
output_csv = r"C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\BCN_dataset_complete.csv"

# --- Load data ---
df_matched = pd.read_csv(main_csv)
df_tmrt = pd.read_csv(tmrt_values_csv)

# --- Strip all column names ---
df_matched.columns = df_matched.columns.str.strip()
df_tmrt.columns = df_tmrt.columns.str.strip()

# --- Ensure consistent data types ---
df_matched['C_Tram'] = df_matched['C_Tram'].astype(str)
df_tmrt['C_Tram'] = df_tmrt['C_Tram'].astype(str)

# --- Confirm available columns ---
print("✅ Columns in df_tmrt:")
print(df_tmrt.columns.tolist())

# --- Merge ---
df_merged = df_matched.merge(
    df_tmrt[['C_Tram', 'Tmrt_Buildings_Mean']],  # Make sure this matches exactly
    on='C_Tram',
    how='left'
)

# --- Confirm what’s in the merged dataframe ---
print("\nColumns in df_merged:")
print(df_merged.columns.tolist())

# --- Check missing values (only if the column exists) ---
if 'Tmrt_Buildings_Mean' in df_merged.columns:
    missing = df_merged['Tmrt_Buildings_Mean'].isna().sum()
    print(f"Missing Tmrt_Buildings_Mean values: {missing}")
    print(df_merged[['C_Tram', 'Tmrt_Buildings_Mean']].head())
else:
    print("'Tmrt_Buildings_Mean' was not found in the merged DataFrame.")

# --- Save output ---
df_merged.to_csv(output_csv, index=False, encoding='utf-8-sig')
print(f"\n✅ File saved to:\n{output_csv}")



✅ Columns in df_tmrt:
['C_Tram', 'Tmrt_Mean', 'Tmrt_Buildings_Mean']

📦 Columns in df_merged:
['FID', 'COORD_X', 'COORD_Y', 'LONGITUD', 'ANGLE', 'C_Tram', 'C_Nus_I', 'C_Nus_F', 'Distric_D', 'NDistric_D', 'Illa_D', 'CVia_D', 'TVia_D', 'NVia_D', 'Tram_Dret', 'Distric_E', 'NDistric_E', 'Illa_E', 'CVia_E', 'TVia_E', 'NVia_E', 'Tram_Esq', 'BuildingHeight_Right', 'BuildingHeight_Left', 'BuildingHeight_Mean', 'Street_Buffer', 'Number_of_Trees', 'Mean_Tree_Height', 'Mean_Tree_Circumfere', 'Total_Canopy_Area_m2', 'ANGLE_rad', 'ANGLE_sin', 'ANGLE_cos', 'Tmrt_Buildings_Mean']
🟡 Missing Tmrt_Buildings_Mean values: 12831
    C_Tram  Tmrt_Buildings_Mean
0  T00001B                  NaN
1  T00002C                  NaN
2  T00003D                  NaN
3  T00004E                30.47
4  T00005F                  NaN

✅ File saved to:
C:\Users\Andrea\Desktop\thesis\Modified_CSV_Street\BCN_dataset_complete.csv


In [34]:
# --- Print dataset structure and sample rows ---
print("\n Dataset shape:", df_merged.shape)

print("\n Column names:")
print(df_merged.columns.tolist())

print("\n First 5 rows:")
print(df_merged.head())



 Dataset shape: (14831, 34)

 Column names:
['FID', 'COORD_X', 'COORD_Y', 'LONGITUD', 'ANGLE', 'C_Tram', 'C_Nus_I', 'C_Nus_F', 'Distric_D', 'NDistric_D', 'Illa_D', 'CVia_D', 'TVia_D', 'NVia_D', 'Tram_Dret', 'Distric_E', 'NDistric_E', 'Illa_E', 'CVia_E', 'TVia_E', 'NVia_E', 'Tram_Esq', 'BuildingHeight_Right', 'BuildingHeight_Left', 'BuildingHeight_Mean', 'Street_Buffer', 'Number_of_Trees', 'Mean_Tree_Height', 'Mean_Tree_Circumfere', 'Total_Canopy_Area_m2', 'ANGLE_rad', 'ANGLE_sin', 'ANGLE_cos', 'Tmrt_Buildings_Mean']

 First 5 rows:
   FID     COORD_X      COORD_Y  LONGITUD  ANGLE   C_Tram  C_Nus_I  C_Nus_F  \
0    0  432128.893  4581047.606    42.615  15.04  T00001B  N00001D  N00002E   
1    1  432156.649  4581054.946    14.806  14.20  T00002C  N00002E  N00003F   
2    2  432171.171  4581059.200    15.480  18.40  T00003D  N00003F  N00004G   
3    3  432185.863  4581063.068    14.969  10.98  T00004E  N00004G  N00005H   
4    4  432200.396  4581066.659    15.010  16.80  T00005F  N00005H