In [24]:
import pandas as pd
import os
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS


# --- Helper: Extract GPS Info from JPEG
def get_decimal_from_dms(dms, ref):
    degrees = float(dms[0])
    minutes = float(dms[1])
    seconds = float(dms[2])
    decimal = degrees + (minutes / 60.0) + (seconds / 3600.0)
    if ref in ['S', 'W']:
        decimal = -decimal
    return decimal

def extract_gps_info(photo_path):
    try:
        img = Image.open(photo_path)
        exif_data = img._getexif()
        if not exif_data:
            return None, None, None
        gps_info = {}
        for key, value in exif_data.items():
            tag_name = TAGS.get(key)
            if tag_name == "GPSInfo":
                for t in value:
                    sub_tag = GPSTAGS.get(t)
                    gps_info[sub_tag] = value[t]
        lat = get_decimal_from_dms(gps_info['GPSLatitude'], gps_info['GPSLatitudeRef'])
        lon = get_decimal_from_dms(gps_info['GPSLongitude'], gps_info['GPSLongitudeRef'])
        alt = gps_info.get('GPSAltitude', (0, 1))
        altitude = alt[0] / alt[1] if isinstance(alt, tuple) else alt
        return lat, lon, altitude
    except Exception as e:
        print(f"GPS extraction error for {photo_path}: {e}")
        return None, None, None


def get_df_with_camera_position():

    results = []

    # --- Paths
    csv_path = '/home/ec2-user/SageMaker/all_bounding_boxes.csv'
    general_photo_path = '/home/ec2-user/SageMaker/photos'

    # --- Load CSV
    df = pd.read_csv(csv_path)

    # --- Clean 'image' column
    #df['base_image'] = df['image'].apply(lambda x: x.split('_jpeg')[0] + '_jpeg')
    df['image'] = df['image'].apply(lambda x: x.split('.rf')[0].replace('_jpeg', '') + '.jpeg')

    # --- Group by base image and class
    grouped = df.groupby(['image', 'class'])

    # --- Group by base image and class    
    for (image_name, class_name), group_df in grouped:
        # Form photo path
        photo_path = os.path.join(general_photo_path, image_name)

        # Extract GPS info
        lat, lon, alt = extract_gps_info(photo_path)

        # Aggregate bounding boxes
        for idx, row in group_df.iterrows():
            results.append({
                'image': image_name,
                'class': class_name,
                'x1': row['x1'],
                'x2': row['x2'],
                'y1': row['y1'],
                'y2': row['y2'],
                'latitude': lat,
                'longitude': lon,
                'altitude': alt
            })

    return pd.DataFrame(results)

# --- Call the function
results = get_df_with_camera_position()

print(type(results))
print(results)

# --- Create final DataFrame
#final_df = pd.DataFrame(results)

# --- Save to CSV (if needed)
#save_path = '/home/ec2-user/SageMaker/combined_with_gps.csv'
#final_df.to_csv(save_path, index=False)
#print(f"Final DataFrame saved to: {save_path}")


<class 'pandas.core.frame.DataFrame'>
                               image    class           x1           x2  \
0     DJI_20250424192953_0006_V.jpeg  barcode  1243.191895  1286.874878   
1     DJI_20250424192953_0006_V.jpeg  barcode   941.691345   982.890137   
2     DJI_20250424192953_0006_V.jpeg  barcode  1233.339478  1276.073242   
3     DJI_20250424192953_0006_V.jpeg  barcode  1461.385376  1502.566040   
4     DJI_20250424192953_0006_V.jpeg  barcode  1212.242188  1257.599243   
...                              ...      ...          ...          ...   
1055  DJI_20250424193310_0164_V.jpeg  pallets  3126.408691  3306.826904   
1056  DJI_20250424193310_0164_V.jpeg  pallets  3125.772217  3272.637939   
1057  DJI_20250424193311_0165_V.jpeg  pallets  3099.492920  3280.101074   
1058  DJI_20250424193311_0165_V.jpeg  pallets  3112.194580  3276.982666   
1059  DJI_20250424193311_0165_V.jpeg  pallets  3105.455322  3249.014893   

               y1           y2   latitude  longitude altitude