# Retrieve RICS images matching sampling points

and download from AWS

In [157]:
import geopandas as gpd
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os
import glob
import subprocess
import json
import boto3

In [158]:
pd.set_option('display.max_columns', None)

In [159]:
sample_file = r"C:\Users\lliu\FrontierSI\Projects - 127 Residential Dwelling Floor Height\4 Executing\GA_data_documentation\Wagga Exposure Data\Final_Wagga_training_samples.geojson"
RICS_db_files = glob.glob(r"C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\*_interpolated.geojson")
out_folder=r'C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\RICS_wagga_training_locations'
# out_folder=r'C:\Users\lliu\FrontierSI\Projects - 127 Residential Dwelling Floor Height\4 Executing\Data Exploration\RICS\RICS_wagga_training_locations'

In [160]:
df_sample = gpd.read_file(sample_file)
df_sample = df_sample.to_crs(epsg='28355')

In [161]:
mfa_token = '751850'
cmd = (f'C:\\Program Files\\Amazon\\AWSCLIV2\\aws sts get-session-token --serial-number arn:aws:iam::693903849513:mfa/Lavender_AWS_MFA  --token-code {mfa_token}').split()
result = subprocess.run(cmd, capture_output=True)
bucket_name = 'frontiersi-p127-floor-height'

# Check if the command was successful
if result.returncode == 0:
    # Parse JSON output into a dictionary
    data = json.loads(result.stdout)
    #print(data)  # Print or process the dictionary
else:
    print("Error:", result.stderr)

In [162]:
session = boto3.session.Session(
    aws_access_key_id=data['Credentials']['AccessKeyId'],
    aws_secret_access_key=data['Credentials']['SecretAccessKey'],
    aws_session_token=data['Credentials']['SessionToken']
)
# Initialize an S3 client
s3 = session.client('s3')

In [163]:
# Calculate relative bearing between two points
def calculate_bearing(point1, point2):
    delta_x = point2.x - point1.x
    delta_y = point2.y - point1.y
    bearing = (np.degrees(np.arctan2(delta_x, delta_y)) + 360) % 360
    return bearing
# Determine if the house is within the camera's field of view
def is_within_view(bearing, camera_bearing, side, fov_buffer = 30):
    if side == 'L':
        # Left camera: Field of view spans
        lower_bound = (camera_bearing + 270 - fov_buffer) % 360
        upper_bound = (camera_bearing + 270 + fov_buffer) % 360
    elif side == 'R':
        # Right camera: Field of view spans
        lower_bound = (camera_bearing + 90 - fov_buffer) % 360
        upper_bound = (camera_bearing + 90 + fov_buffer) % 360
    else:
        return False

    # Handle circular ranges
    if lower_bound < upper_bound:
        return lower_bound <= bearing <= upper_bound
    else:
        return bearing >= lower_bound or bearing <= upper_bound
    
def calculate_relative_angle(car_bearing, house_bearing, side):
    """Calculate the relative angle between the camera's direction and the house direction."""
    if side == 'L':
        camera_bearing= (car_bearing-90) % 360
    elif side=='R':
        camera_bearing= (car_bearing+90) % 360
    angle_difference = abs(camera_bearing - house_bearing)
    return min(angle_difference, 360 - angle_difference)


In [164]:
df_RICS=[]
for RICS_db_file in RICS_db_files:
# RICS_db_file=RICS_db_files[0]
    print('searching for',RICS_db_file)
    df_rics = gpd.read_file(RICS_db_file)
    df_rics = df_rics.to_crs(epsg='28355')

    # identify key information of the db file from name
    basename = os.path.basename(RICS_db_file).split('.')[0]
    foldername = '_'.join(basename.split('_')[:-2])
    side = basename.split('_')[-2]

    # Perform the spatial join with a 40-meter distance tolerance
    nearby_points = gpd.sjoin_nearest(df_sample, df_rics, max_distance=40, how='inner', distance_col="distance")
    if len(nearby_points)==0:
        print('Cannot find nearest RICS image points')
        continue
    else:
        print('Number of nearest RICS images ',len(nearby_points))

    # Add columns for relative bearing and valid view
    relative_bearings = []
    valid_views = []
    relative_angles=[]
    for idx, row in nearby_points.iterrows():
        house_geom = row.geometry
        camera_geom = df_rics.loc[row.index_right0].geometry
        car_bearing = df_rics.loc[row.index_right0].bearing

        # Calculate the relative angle
        house_bearing = calculate_bearing(camera_geom, house_geom)
        relative_angle = calculate_relative_angle(car_bearing, house_bearing, side)
        relative_angles.append(relative_angle)
        # bearing = calculate_bearing(house_geom, camera_geom)
        # relative_bearings.append(bearing)

        # # Check if the house is within the camera's field of view
        # valid = is_within_view(bearing, camera_bearing, side)
        # valid_views.append(valid)

    # Add results back to the GeoDataFrame
    # nearby_points['relative_bearing'] = relative_bearings
    # nearby_points['valid_view'] = valid_views
    nearby_points['relative_angle'] = relative_angles

    # Filter to keep only valid camera points
    # filtered = nearby_points[nearby_points['valid_view']]
    filtered = nearby_points[nearby_points['relative_angle']<60]
    # filtered= filtered.drop_duplicates(subset='geometry')
    filtered = filtered.sort_values(by=['relative_angle'])
    filtered= filtered.drop_duplicates(subset=['geometry','relative_angle'], keep='first')

    # merging back to original df
    # # Select only columns from the right that are not in the left (excluding the key column)
    new_columns = [col for col in filtered.columns if col not in df_sample.columns]
    filtered_cleaned=filtered[['UFI'] + new_columns]
    merged = pd.merge(df_sample, filtered_cleaned,how='left',on='UFI')
    # add image basename
    merged['RICS_img'] = merged['frame'].apply(lambda frame: '_'.join([foldername, side, f"{int(frame):07d}.jpg"]) if pd.notna(frame) else None)
    df_RICS.append(merged)

searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\cbd_141112_11404_L_interpolated.geojson
Number of nearest RICS images  62
searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\cbd_141112_11404_R_interpolated.geojson
Number of nearest RICS images  62
searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\Industrial_131112_04706_L_interpolated.geojson
Number of nearest RICS images  40
searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\Industrial_131112_04706_R_interpolated.geojson
Number of nearest RICS images  40
searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\middle_res_131112_24238_L_interpolated.geojson
Cannot find nearest RICS image points
searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-

In [165]:
# Combine both GeoDataFrames
df_RICS = pd.concat(df_RICS)
# Sort by 'id' and 'distance' to prioritize rows with non-NaN and lower distance
# NaN values are treated as higher in sorting, so we use 'distance.fillna(float('inf'))' for sorting.
df_RICS_final = df_RICS.sort_values(by=['UFI', 'distance','relative_angle'], key=lambda x: x.fillna(float('inf')))
# Drop duplicates, keeping the first occurrence (lowest distance)
df_RICS_final = df_RICS_final.drop_duplicates(subset=['UFI'], keep='first')
# Reset index (optional, for clean output)
df_RICS_final.reset_index(drop=True, inplace=True)
df_RICS_final

Unnamed: 0,WALL_M,STEPS,USAGE,STOREYS,ASSESSOR,address,Area_1,PMF,Dep_500,Dep_200,Dep_100,Dep_50,Dep_20,Dep_10,Ground_Lev,Floor_Leve,AGE,UFI,ZONE_DESCR,Floor_height,Ground_surveyed,cluster,Ground_Level_bin,geometry,index_right0,frame,time,speed,bearing,satellites,fix_quality,camera_ID,index,lon,lat,distance,relative_angle,RICS_img
0,Brick,1,Residential,1,NEIL,80 THORNE STREET,339.307232,4.1541,0.9867,-9999.00,-9999.0,-9999.0,-9999.0,-9999.0,179.315,179.595,< 1960,1,General Residential,0.28,0,1,Medium,POINT (533121.036 6114319.815),7555.0,7341.0,3709.75,38.1,99.2,10.0,1.0,108246.0,7341.0,147.363508,-35.113159,27.921232,0.930013,old_res_131112_33816_R_0007341.jpg
1,Timber,1,Residential,1,NEIL,156 FORSYTH STREET,261.009733,5.5493,-9999.0000,-9999.00,-9999.0,-9999.0,-9999.0,-9999.0,178.715,178.995,< 1960,2,Medium Density Residential,0.28,0,1,Low,POINT (533021.042 6114331.849),7518.0,7304.0,3660.25,37.5,98.6,10.0,1.0,108246.0,7304.0,147.362419,-35.113016,32.240570,0.058193,old_res_131112_33816_R_0007304.jpg
2,Brick,1,Residential,1,NEIL,86 BEST STREET,250.047753,4.2889,-9999.0000,-9999.00,-9999.0,-9999.0,-9999.0,-9999.0,180.583,180.863,< 1960,8,General Residential,0.28,0,1,High,POINT (533397.16 6114333.635),7715.0,7461.0,3735.25,40.4,99.0,9.0,1.0,53046.0,7461.0,147.366448,-35.113540,29.681244,0.658582,old_res_131112_33816_L_0007461.jpg
3,Timber,1,Residential,1,NEIL,152 FORSYTH STREET,114.304419,5.7465,-9999.0000,-9999.00,-9999.0,-9999.0,-9999.0,-9999.0,178.722,179.002,< 1960,10,Medium Density Residential,0.28,0,1,Low,POINT (533049.27 6114329.719),7529.0,7315.0,3703.00,37.9,98.7,10.0,1.0,108246.0,7315.0,147.362737,-35.113055,30.064134,1.967665,old_res_131112_33816_R_0007315.jpg
4,Brick,1,Residential,1,NEIL,56 BEST STREET,112.037145,3.2723,-9999.0000,-9999.00,-9999.0,-9999.0,-9999.0,-9999.0,180.509,180.789,< 1960,14,General Residential,0.28,0,1,High,POINT (533366.991 6114110.995),2048.0,1998.0,35053.00,34.9,189.9,10.0,1.0,108246.0,1998.0,147.366473,-35.115313,27.581153,2.845632,old_res_131112_33816_R_0001998.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
833,Brick,2,Residential,1,ROBERT,114 DOCKER STREET,128.662408,6.0313,1.6015,0.35,-9999.0,-9999.0,-9999.0,-9999.0,178.871,179.151,< 1960,3443,Medium Density Residential,0.28,0,1,Low,POINT (532565.067 6114377.547),2764.0,2714.0,35509.25,36.9,15.6,10.0,1.0,53046.0,2714.0,147.357610,-35.112954,23.281384,2.204229,old_res_131112_33816_L_0002714.jpg
834,Brick,1,Commercial,1,ROBERT,27 FITZMAURICE STREET,313.355682,0.4990,-9999.0000,-9999.00,-9999.0,-9999.0,-9999.0,-9999.0,181.065,181.345,< 1960,3448,Commercial Core,0.28,0,2,High,POINT (533819.836 6114936.418),775.0,765.0,11818.50,34.4,158.1,11.0,1.0,53046.0,765.0,147.370772,-35.107923,32.516102,2.553757,cbd_141112_11404_L_0000765.jpg
835,Brick,3,Residential,1,NEIL,73 TARCUTTA STREET,216.662728,4.7407,-9999.0000,-9999.00,-9999.0,-9999.0,-9999.0,-9999.0,179.827,180.107,< 1960,3454,Medium Density Residential,0.28,0,3,Medium,POINT (534318.3 6114005.369),2170.0,2098.0,12816.75,35.5,339.0,11.0,1.0,108246.0,2098.0,147.376228,-35.116325,37.807955,0.439915,cbd_141112_11404_R_0002098.jpg
836,Glass,0,Commercial,1,Carly,240 BAYLIS STREET,328.403210,2.8624,-9999.0000,-9999.00,-9999.0,-9999.0,-9999.0,-9999.0,180.531,180.811,< 1960,3459,Commercial Core,0.28,0,1,High,POINT (533717.3 6114668.901),,,,,,,,,,,,,,


In [166]:
for idx, row in df_RICS_final.iterrows():
    frame = row.frame
    if np.isnan(frame):
        continue
    else:
        # Define the and object key (file path in S3) by decoding image name
        out_basename=row.RICS_img
        foldername='_'.join(out_basename.split('_')[:-2])
        side=out_basename.split('_')[-2]
        object_key = f'{foldername}/{side}/{int(frame):07d}.jpg'
        download_path = os.path.join(out_folder,out_basename)
        # Download the file
        if not os.path.exists(download_path):
            s3.download_file(bucket_name, object_key, download_path)

In [168]:
out_file=os.path.join(out_folder,os.path.basename(sample_file).replace('.geojson','_RICS.geojson'))
df_RICS_final.to_file(out_file, driver="GeoJSON")