# Retrieve RICS images matching sampling points

and download from AWS

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os
import glob
import subprocess
import json
import boto3

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
# sample_file = r"C:\Users\lliu\FrontierSI\Projects - 127 Residential Dwelling Floor Height\4 Executing\GA_data_documentation\Wagga Exposure Data\Final_Wagga_training_samples.geojson"
sample_file = r'C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\data\Final_Wagga.shp'
RICS_db_files = glob.glob(r"C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\*_interpolated.geojson")
out_folder=r'D:\Wagga\RICS\all_images'
# out_folder=r'C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\RICS_wagga_training_locations'
# out_folder=r'C:\Users\lliu\FrontierSI\Projects - 127 Residential Dwelling Floor Height\4 Executing\Data Exploration\RICS\RICS_wagga_training_locations'

In [4]:
df_sample = gpd.read_file(sample_file)
df_sample = df_sample.to_crs(epsg='28355')

In [5]:
mfa_token = '976586'
cmd = (f'C:\\Program Files\\Amazon\\AWSCLIV2\\aws sts get-session-token --serial-number arn:aws:iam::693903849513:mfa/Lavender_AWS_MFA  --token-code {mfa_token}').split()
result = subprocess.run(cmd, capture_output=True)
bucket_name = 'frontiersi-p127-floor-height'

# Check if the command was successful
if result.returncode == 0:
    # Parse JSON output into a dictionary
    data = json.loads(result.stdout)
    #print(data)  # Print or process the dictionary
else:
    print("Error:", result.stderr)

In [6]:
session = boto3.session.Session(
    aws_access_key_id=data['Credentials']['AccessKeyId'],
    aws_secret_access_key=data['Credentials']['SecretAccessKey'],
    aws_session_token=data['Credentials']['SessionToken']
)
# Initialize an S3 client
s3 = session.client('s3')

In [7]:
# Calculate relative bearing between two points
def calculate_bearing(point1, point2):
    delta_x = point2.x - point1.x
    delta_y = point2.y - point1.y
    bearing = (np.degrees(np.arctan2(delta_x, delta_y)) + 360) % 360
    return bearing
# Determine if the house is within the camera's field of view
def is_within_view(bearing, camera_bearing, side, fov_buffer = 30):
    if side == 'L':
        # Left camera: Field of view spans
        lower_bound = (camera_bearing + 270 - fov_buffer) % 360
        upper_bound = (camera_bearing + 270 + fov_buffer) % 360
    elif side == 'R':
        # Right camera: Field of view spans
        lower_bound = (camera_bearing + 90 - fov_buffer) % 360
        upper_bound = (camera_bearing + 90 + fov_buffer) % 360
    else:
        return False

    # Handle circular ranges
    if lower_bound < upper_bound:
        return lower_bound <= bearing <= upper_bound
    else:
        return bearing >= lower_bound or bearing <= upper_bound
    
def calculate_relative_angle(car_bearing, house_bearing, side):
    """Calculate the relative angle between the camera's direction and the house direction."""
    if side == 'L':
        camera_bearing= (car_bearing-90) % 360
    elif side=='R':
        camera_bearing= (car_bearing+90) % 360
    angle_difference = abs(camera_bearing - house_bearing)
    return min(angle_difference, 360 - angle_difference)


In [8]:
df_RICS=[]
for RICS_db_file in RICS_db_files:
# RICS_db_file=RICS_db_files[0]
    print('searching for',RICS_db_file)
    df_rics = gpd.read_file(RICS_db_file)
    df_rics = df_rics.to_crs(epsg='28355')

    # identify key information of the db file from name
    basename = os.path.basename(RICS_db_file).split('.')[0]
    foldername = '_'.join(basename.split('_')[:-2])
    side = basename.split('_')[-2]

    # Perform the spatial join with a 40-meter distance tolerance
    nearby_points = gpd.sjoin_nearest(df_sample, df_rics, max_distance=40, how='inner', distance_col="distance")
    if len(nearby_points)==0:
        print('Cannot find nearest RICS image points')
        continue
    else:
        print('Number of nearest RICS images ',len(nearby_points))

    # Add columns for relative bearing and valid view
    relative_bearings = []
    valid_views = []
    relative_angles=[]
    for idx, row in nearby_points.iterrows():
        house_geom = row.geometry
        camera_geom = df_rics.loc[row.index_right0].geometry
        car_bearing = df_rics.loc[row.index_right0].bearing

        # Calculate the relative angle
        house_bearing = calculate_bearing(camera_geom, house_geom)
        relative_angle = calculate_relative_angle(car_bearing, house_bearing, side)
        relative_angles.append(relative_angle)
        # bearing = calculate_bearing(house_geom, camera_geom)
        # relative_bearings.append(bearing)

        # # Check if the house is within the camera's field of view
        # valid = is_within_view(bearing, camera_bearing, side)
        # valid_views.append(valid)

    # Add results back to the GeoDataFrame
    # nearby_points['relative_bearing'] = relative_bearings
    # nearby_points['valid_view'] = valid_views
    nearby_points['relative_angle'] = relative_angles

    # Filter to keep only valid camera points
    # filtered = nearby_points[nearby_points['valid_view']]
    filtered = nearby_points[nearby_points['relative_angle']<60]
    # filtered= filtered.drop_duplicates(subset='geometry')
    filtered = filtered.sort_values(by=['relative_angle'])
    filtered= filtered.drop_duplicates(subset=['geometry','relative_angle'], keep='first')

    # merging back to original df
    # # Select only columns from the right that are not in the left (excluding the key column)
    new_columns = [col for col in filtered.columns if col not in df_sample.columns]
    filtered_cleaned=filtered[['UFI'] + new_columns]
    merged = pd.merge(df_sample, filtered_cleaned,how='left',on='UFI')
    # add image basename
    merged['RICS_img'] = merged['frame'].apply(lambda frame: '_'.join([foldername, side, f"{int(frame):07d}.jpg"]) if pd.notna(frame) else None)
    df_RICS.append(merged)

searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\cbd_141112_11404_L_interpolated.geojson
Number of nearest RICS images  345
searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\cbd_141112_11404_R_interpolated.geojson
Number of nearest RICS images  345
searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\Industrial_131112_04706_L_interpolated.geojson
Number of nearest RICS images  287
searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\Industrial_131112_04706_R_interpolated.geojson
Number of nearest RICS images  286
searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\RICS\middle_res_131112_24238_L_interpolated.geojson
Cannot find nearest RICS image points
searching for C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height

In [9]:
# Combine both GeoDataFrames
df_RICS = pd.concat(df_RICS)
# Sort by 'id' and 'distance' to prioritize rows with non-NaN and lower distance
# NaN values are treated as higher in sorting, so we use 'distance.fillna(float('inf'))' for sorting.
df_RICS_final = df_RICS.sort_values(by=['UFI', 'distance','relative_angle'], key=lambda x: x.fillna(float('inf')))
# Drop duplicates, keeping the first occurrence (lowest distance)
df_RICS_final = df_RICS_final.drop_duplicates(subset=['UFI'], keep='first')
# Reset index (optional, for clean output)
df_RICS_final.reset_index(drop=True, inplace=True)
df_RICS_final

Unnamed: 0,WALL_M,STEPS,USAGE,STOREYS,ASSESSOR,address,Area_1,PMF,Dep_500,Dep_200,Dep_100,Dep_50,Dep_20,Dep_10,Ground_Lev,Floor_Leve,AGE,UFI,ZONE_DESCR,geometry,index_right0,frame,time,speed,bearing,satellites,fix_quality,camera_ID,index,lon,lat,distance,relative_angle,RICS_img
0,Brick,1,Residential,1,NEIL,80 THORNE STREET,339.307232,4.1541,0.9867,-9999.000,-9999.0,-9999.0,-9999.0,-9999.0,179.315,179.595,< 1960,1,General Residential,POINT (533121.036 6114319.815),7555.0,7341.0,3709.75,38.1,99.2,10.0,1.0,108246.0,7341.0,147.363508,-35.113159,27.921232,0.930013,old_res_131112_33816_R_0007341.jpg
1,Timber,1,Residential,1,NEIL,156 FORSYTH STREET,261.009733,5.5493,-9999.0000,-9999.000,-9999.0,-9999.0,-9999.0,-9999.0,178.715,178.995,< 1960,2,Medium Density Residential,POINT (533021.042 6114331.849),7518.0,7304.0,3660.25,37.5,98.6,10.0,1.0,108246.0,7304.0,147.362419,-35.113016,32.240570,0.058193,old_res_131112_33816_R_0007304.jpg
2,Brick,1,Residential,1,NEIL,42 BEST STREET,193.217477,2.8469,-9999.0000,-9999.000,-9999.0,-9999.0,-9999.0,-9999.0,180.650,180.930,< 1960,3,General Residential,POINT (533343.734 6113998.271),2101.0,2051.0,35106.75,36.2,188.8,10.0,1.0,108246.0,2051.0,147.366291,-35.116350,34.089616,0.710337,old_res_131112_33816_R_0002051.jpg
3,Brick,1,Residential,1,NEIL,29 FOX STREET,168.814975,2.5406,-9999.0000,-9999.000,-9999.0,-9999.0,-9999.0,-9999.0,180.694,180.974,< 1960,4,General Residential,POINT (533269.26 6113972.736),8777.0,8523.0,4532.25,35.3,188.9,9.0,1.0,53046.0,8523.0,147.364759,-35.116503,31.763384,2.518713,old_res_131112_33816_L_0008523.jpg
4,Brick,1,Residential,1,NEIL,150 FORSYTH STREET,340.008460,5.3143,1.2041,-9999.000,-9999.0,-9999.0,-9999.0,-9999.0,179.041,179.321,< 1960,5,Medium Density Residential,POINT (533066.072 6114319.908),7535.0,7321.0,3704.75,38.0,98.6,10.0,1.0,108246.0,7321.0,147.362935,-35.113083,36.840376,1.993235,old_res_131112_33816_R_0007321.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3459,Brick,1,Residential,1,ROBERT,7 ALBURY STREET,99.391094,5.6932,1.0956,0.481,-9999.0,-9999.0,-9999.0,-9999.0,178.544,178.824,< 1960,3460,Medium Density Residential,POINT (532516.38 6114441.411),4788.0,4710.0,1725.50,36.8,248.3,10.0,1.0,53046.0,4710.0,147.356757,-35.112181,17.766495,1.655904,old_res_131112_33816_L_0004710.jpg
3460,Brick,1,Residential,2,NEIL,91 TARCUTTA STREET,342.391774,3.0606,-9999.0000,-9999.000,-9999.0,-9999.0,-9999.0,-9999.0,180.035,180.315,< 1960,3461,Medium Density Residential,POINT (534269.136 6114120.487),2217.0,2145.0,12828.75,37.7,338.9,11.0,1.0,108246.0,2145.0,147.375725,-35.115274,33.704380,0.896454,cbd_141112_11404_R_0002145.jpg
3461,Timber,1,Residential,2,NEIL,95 TARCUTTA STREET,186.748847,5.0189,1.3697,-9999.000,-9999.0,-9999.0,-9999.0,-9999.0,179.797,180.077,< 1960,3462,Medium Density Residential,POINT (534257.106 6114147.466),2229.0,2157.0,12831.75,36.3,339.0,11.0,1.0,108246.0,2157.0,147.375607,-35.115024,32.196554,1.088700,cbd_141112_11404_R_0002157.jpg
3462,Brick,1,Residential,2,NEIL,93 TARCUTTA STREET,461.618108,3.0606,-9999.0000,-9999.000,-9999.0,-9999.0,-9999.0,-9999.0,180.086,180.366,< 1960,3463,Medium Density Residential,POINT (534266.493 6114135.332),2223.0,2151.0,12830.25,37.4,338.5,11.0,1.0,108246.0,2151.0,147.375664,-35.115146,36.714276,1.885671,cbd_141112_11404_R_0002151.jpg


In [10]:
for idx, row in df_RICS_final.iterrows():
    frame = row.frame
    if np.isnan(frame):
        continue
    else:
        # Define the and object key (file path in S3) by decoding image name
        out_basename=row.RICS_img
        foldername='_'.join(out_basename.split('_')[:-2])
        side=out_basename.split('_')[-2]
        object_key = f'{foldername}/{side}/{int(frame):07d}.jpg'
        download_path = os.path.join(out_folder,out_basename)
        # Download the file
        if not os.path.exists(download_path):
            s3.download_file(bucket_name, object_key, download_path)

In [11]:
out_file=os.path.join(out_folder,os.path.basename(sample_file).replace('.geojson','_RICS.geojson'))
df_RICS_final.to_file(out_file, driver="GeoJSON")