In [10]:
import os
import shutil
import cv2
import pandas as pd
from PIL import Image, ExifTags
import numpy as np

# -------- Helper Functions --------

def extract_metadata(image_path):
    """
    Extracts EXIF metadata from an image using Pillow.
    Returns a dictionary containing the metadata.
    """
    metadata = {}
    try:
        with Image.open(image_path) as img:
            exif_data = img._getexif()
            if exif_data:
                for tag_id, value in exif_data.items():
                    tag = ExifTags.TAGS.get(tag_id, tag_id)
                    metadata[tag] = value
            else:
                print("No EXIF metadata for", image_path)
    except Exception as e:
        print("Error extracting metadata from", image_path, ":", e)
    return metadata

def convert_to_degrees(dms):
    """
    Converts GPS coordinates in DMS format to decimal degrees.
    """
    d = float(dms[0])
    m = float(dms[1])
    s = float(dms[2])
    return d + m/60 + s/3600

def convert_gps(gps_info):
    """
    Converts the GPSInfo from EXIF data to decimal latitude and longitude.
    Returns a tuple (latitude, longitude) or (None, None) if not available.
    """
    try:
        lat_ref = gps_info.get(1)
        lat_dms = gps_info.get(2)
        lon_ref = gps_info.get(3)
        lon_dms = gps_info.get(4)
        if lat_dms and lon_dms:
            lat = convert_to_degrees(lat_dms)
            if lat_ref != 'N':
                lat = -lat
            lon = convert_to_degrees(lon_dms)
            if lon_ref != 'E':
                lon = -lon
            return lat, lon
    except Exception as e:
        print("Error converting gps:", e)
    return None, None

def get_new_filename(original_filename, datetime_str, gps, file_type):
    """
    Constructs a new filename based on the DateTime and GPS metadata.
    E.g.:
      Original: DJI_20250212114151_0035_V.jpg
      DateTime: "2025:02:12 11:41:51"
      GPS: (17.9735, 76.4514)
    Produces:
      20250212_114151_17.9735_76.4514_0035_V.jpg
    """
    # Extract date and time from datetime_str
    if datetime_str and " " in datetime_str:
        date_part, time_part = datetime_str.split(" ")
        date_clean = date_part.replace(":", "")  # e.g., 20250212
        time_clean = time_part.replace(":", "")  # e.g., 114151
    else:
        date_clean = "unknown"
        time_clean = "unknown"

    # Extract trailing part (e.g., "0035_V") from original filename
    base = os.path.splitext(original_filename)[0]
    parts = base.split('_')
    trailing = parts[-2] + "_" + parts[-1] if len(parts) >= 2 else "unknown"

    # Append GPS info if available
    if gps[0] is not None and gps[1] is not None:
        gps_part = f"{gps[0]:.4f}_{gps[1]:.4f}"
    else:
        gps_part = "noGPS"

    new_filename = f"{date_clean}_{time_clean}_{gps_part}_{trailing}.jpg"
    return new_filename

def hotspot_detection_t(image_path, threshold=240):
    """
    For a thermal image (T), detects hotspots by thresholding the grayscale image.
    Returns:
      hotspots: list of (center, radius)
      annotated: annotated T image (with red circles)
      t_width, t_height: dimensions of the T image
    """
    img = cv2.imread(image_path, cv2.IMREAD_COLOR)
    if img is None:
        print("Error reading thermal image:", image_path)
        return [], None, 0, 0

    # Get dimensions of the thermal image
    t_height, t_width = img.shape[:2]

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    hotspots = []
    annotated = img.copy()
    for cnt in contours:
        (x, y), radius = cv2.minEnclosingCircle(cnt)
        center = (int(x), int(y))
        radius = int(radius)
        hotspots.append((center, radius))
        cv2.circle(annotated, center, radius, (0, 0, 255), 2)  # red circle for T image

    return hotspots, annotated, t_width, t_height

def annotate_v_image(v_image_path, hotspots, target_width, target_height):
    """
    Annotates the V (normal) image with circles based on hotspots from the T image.
    1) Resizes the V image to match T image dimensions (target_width x target_height).
    2) Draws green circles at the same (x, y) coordinates and radius.
    Returns the resized and annotated V image.
    """
    img = cv2.imread(v_image_path, cv2.IMREAD_COLOR)
    if img is None:
        print("Error reading V image:", v_image_path)
        return None

    # Resize V image to match T image dimensions
    resized_v = cv2.resize(img, (target_width, target_height), interpolation=cv2.INTER_LINEAR)

    annotated = resized_v.copy()
    for center, radius in hotspots:
        cv2.circle(annotated, center, radius, (0, 255, 0), 2)  # green circle for V image

    return annotated

# -------- Main Processing Function --------

def process_dataset_advanced(dataset_dir, output_dir):
    """
    Processes the dataset as follows:
      - Groups images (T and V) by their common base (e.g., DJI_20250212114151_0035).
      - Extracts metadata (DateTime and GPS) from the V image.
      - Renames images by appending date, time, GPS info, and the trailing identifier.
      - Organizes images into date-based folders (with date separated, e.g., output/date/2025-02-12/V).
      - Separates V and T images into dedicated folders (output/V_images and output/T_images).
      - Groups images with similar GPS into folders (output/GPS/<lat>_<lon>).
      - For each T image, detects and circles hotspots, saves annotated T image in output/hotspots/T.
      - Resizes and annotates the corresponding V image to match T dimensions, saves in output/hotspots/V.
      - Saves metadata for each pair into a CSV file.
    """
    # Create necessary output folders
    os.makedirs(output_dir, exist_ok=True)
    for folder in [
        "date",
        "V_images",
        "T_images",
        "GPS",
        os.path.join("hotspots", "T"),
        os.path.join("hotspots", "V")
    ]:
        os.makedirs(os.path.join(output_dir, folder), exist_ok=True)
    
    # Group images by common base (before the last underscore)
    pairs = {}
    for file in os.listdir(dataset_dir):
        if file.lower().endswith(".jpg"):
            parts = file.rsplit('_', 1)
            if len(parts) < 2:
                continue
            base = parts[0]  # e.g., "DJI_20250212114151_0035"
            suffix = parts[1]  # e.g., "V.jpg" or "T.jpg"
            file_type = None
            if suffix.startswith("V"):
                file_type = "V"
            elif suffix.startswith("T"):
                file_type = "T"
            if base not in pairs:
                pairs[base] = {}
            pairs[base][file_type] = os.path.join(dataset_dir, file)
    
    metadata_records = []
    
    for base, files in pairs.items():
        # We only process if we have both a V and T file
        if "V" not in files or "T" not in files:
            print("Skipping pair", base, "as both V and T are not present.")
            continue
        
        v_path = files["V"]
        t_path = files["T"]
        
        # Extract metadata from V image for DateTime and GPS
        meta = extract_metadata(v_path)
        datetime_str = meta.get("DateTime", "unknown unknown")
        date_folder = datetime_str.split(" ")[0].replace(":", "-") if " " in datetime_str else "unknown_date"
        
        gps_lat, gps_lon = (None, None)
        if "GPSInfo" in meta:
            gps_lat, gps_lon = convert_gps(meta["GPSInfo"])
        
        # Generate new filenames
        original_filename_v = os.path.basename(v_path)
        original_filename_t = os.path.basename(t_path)
        new_filename_v = get_new_filename(original_filename_v, datetime_str, (gps_lat, gps_lon), "V")
        new_filename_t = get_new_filename(original_filename_t, datetime_str, (gps_lat, gps_lon), "T")
        
        # Copy images into date-based folders
        date_v_folder = os.path.join(output_dir, "date", date_folder, "V")
        date_t_folder = os.path.join(output_dir, "date", date_folder, "T")
        os.makedirs(date_v_folder, exist_ok=True)
        os.makedirs(date_t_folder, exist_ok=True)
        
        dest_v_date = os.path.join(date_v_folder, new_filename_v)
        dest_t_date = os.path.join(date_t_folder, new_filename_t)
        shutil.copy(v_path, dest_v_date)
        shutil.copy(t_path, dest_t_date)
        
        # Copy images into overall V_images and T_images folders
        dest_v_all = os.path.join(output_dir, "V_images", new_filename_v)
        dest_t_all = os.path.join(output_dir, "T_images", new_filename_t)
        shutil.copy(v_path, dest_v_all)
        shutil.copy(t_path, dest_t_all)
        
        # Group images by similar GPS (if available)
        if gps_lat is not None and gps_lon is not None:
            gps_str = f"{gps_lat:.4f}_{gps_lon:.4f}"
            gps_folder = os.path.join(output_dir, "GPS", gps_str)
            os.makedirs(gps_folder, exist_ok=True)
            dest_v_gps = os.path.join(gps_folder, new_filename_v)
            dest_t_gps = os.path.join(gps_folder, new_filename_t)
            shutil.copy(v_path, dest_v_gps)
            shutil.copy(t_path, dest_t_gps)
        
        # Detect hotspots on the T image
        hotspots, annotated_t, t_width, t_height = hotspot_detection_t(t_path, threshold=240)
        
        # Resize & annotate the V image with the same hotspots
        annotated_v = annotate_v_image(v_path, hotspots, t_width, t_height)
        
        # Save the annotated hotspot images
        hotspot_t_path = os.path.join(output_dir, "hotspots", "T", new_filename_t)
        hotspot_v_path = os.path.join(output_dir, "hotspots", "V", new_filename_v)
        
        cv2.imwrite(hotspot_t_path, annotated_t)
        if annotated_v is not None:
            cv2.imwrite(hotspot_v_path, annotated_v)
        else:
            print(f"Could not annotate V image for pair {base}")
        
        # Record metadata for this image pair
        record = {
            "OriginalBase": base,
            "NewFilename_V": new_filename_v,
            "NewFilename_T": new_filename_t,
            "DateTime": datetime_str,
            "DateFolder": date_folder,
            "GPSLatitude": gps_lat if gps_lat is not None else "N/A",
            "GPSLongitude": gps_lon if gps_lon is not None else "N/A",
            "Hotspots": hotspots,
            "V_Image_DatePath": dest_v_date,
            "T_Image_DatePath": dest_t_date,
            "V_Image_AllPath": dest_v_all,
            "T_Image_AllPath": dest_t_all,
            "V_Image_Hotspots": hotspot_v_path if annotated_v is not None else "N/A",
            "T_Image_Hotspots": hotspot_t_path
        }
        metadata_records.append(record)
    
    # Save all metadata records to a CSV file
    csv_path = os.path.join(output_dir, "metadata.csv")
    df = pd.DataFrame(metadata_records)
    df.to_csv(csv_path, index=False)
    print("Metadata saved to", csv_path)
    return metadata_records

# -------- Run the Advanced Processing --------

if __name__ == "__main__":
    # Ensure that the folder "dataset/transformer" exists and contains your raw images (T & V).
    dataset_directory = os.path.join("dataset", "transformer")
    output_directory = "output"  # This folder and its subfolders will be created automatically.
    
    metadata_records = process_dataset_advanced(dataset_directory, output_directory)
    print("Processing complete.")


Metadata saved to output\metadata.csv
Processing complete.
