In [0]:
# Notebook: Extract_Metadata
import exifread
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
import piexif
import io
import logging
import os
import uuid
from datetime import datetime

# Set up logging
log_dir = "/dbfs/tmp/logs"  # Separate directory for logs
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(log_dir, f"myapp_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")

logging.basicConfig(
    filename=log_file,
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

dbutils.widgets.text("imageDataPath", "")

# Read image from input path
image_data_path = dbutils.widgets.get("imageDataPath")
image_path = image_data_path

def get_all_exif_tags(image_path):
    exif_dict = {}

    # Check if the file exists
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"No such file or directory: '{image_path}'")

    # Load image and extract EXIF metadata
    img = Image.open(image_path)
    exif_data = piexif.load(img.info.get("exif", b""))

    # Function to process EXIF data
    def extract_tags(exif_section, tag_source):
        for tag, value in exif_section.items():
            tag_name = tag_source.get(tag, f"Unknown_{tag}")  # Default if tag name not found
            if isinstance(value, bytes):
                try:
                    value = value.decode("utf-8")  # Decode text values
                except UnicodeDecodeError:
                    value = value.hex()  # Convert binary to hex string
            exif_dict[tag_name] = value

    # Extract tags from each EXIF section
    extract_tags(exif_data.get("0th", {}), TAGS)      # Standard EXIF
    extract_tags(exif_data.get("Exif", {}), TAGS)     # Extended EXIF
    extract_tags(exif_data.get("GPS", {}), GPSTAGS)   # GPS Data
    extract_tags(exif_data.get("1st", {}), TAGS)      # Thumbnail EXIF
    extract_tags(exif_data.get("Interop", {}), TAGS)  # Interoperability metadata

    # Fill missing tags with None (all known EXIF tags)
    all_exif_tags = set(TAGS.values()) | set(GPSTAGS.values())
    for tag_name in all_exif_tags:
        exif_dict.setdefault(tag_name, None)

    # Use exifread to extract MakerNotes and other private metadata
    with open(image_path, "rb") as f:
        tags = exifread.process_file(f)
        for tag, value in tags.items():
            exif_dict[tag] = str(value)

    return exif_dict

# Function call
metadata = get_all_exif_tags(image_path)

# Log metadata
logger.info(f"Extracted image metadata: {metadata}")

dbutils.notebook.exit(metadata)