[SENSEFULNAME]

Extract MetaData from files and use it to rename files according to select metadata fields.
Supports
    > JPEG
    > JPG
    > PNG

In [65]:
import pyheif
from PIL import Image
from PIL.ExifTags import TAGS
import exifread
import os
import datetime
from pymediainfo import MediaInfo
import shutil
import pandas as pd

In [66]:
INPUT = './test'
OUTPUT = './result'
NAMING_FORMAT = '{datetime_original} ({camera_model})'
ALLOWED_EXTENSIONS = ['jpg', 'jpeg', 'png', 'heic']
DEVELOPMENT_MODE = True

In [67]:
def get_files():
    global INPUT, ALLOWED_EXTENSIONS
    files = os.listdir(INPUT)
    return [f for f in files if f.split('.')[-1].lower() in ALLOWED_EXTENSIONS]

In [68]:
import os
import datetime
from PIL import Image, ExifTags
import pyheif
from pymediainfo import MediaInfo

def extract_image_metadata(image_path):
    """
    Extracts metadata from image or video files, including EXIF metadata for images and media info for videos.
    
    Parameters:
    - image_path (str): Path to the image or video file.
    
    Returns:
    - dict: Metadata of the image/video or an error message if any issue occurs.
    """
    # Check if the file exists
    if not os.path.exists(image_path):
        return {"error": "File does not exist."}
    
    # Get the file extension
    file_extension = os.path.splitext(image_path)[1].lower()

    # Handle HEIC images
    if file_extension == '.heic':
        return extract_heic_metadata(image_path)

    # Handle JPEG, JPG, PNG images
    elif file_extension in ['.jpg', '.jpeg', '.png']:
        return extract_standard_image_metadata(image_path)
    
    # Handle video formats
    elif file_extension in ['.mov', '.mp4']:
        return extract_video_metadata(image_path)
    
    # Unsupported file formats
    else:
        return None

def extract_heic_metadata(image_path):
    """Extracts metadata from a HEIC image."""
    try:
        # Read and convert HEIC image to PIL format
        heif_file = pyheif.read(image_path)
        image = Image.frombytes(
            heif_file.mode, 
            heif_file.size, 
            heif_file.data, 
            "raw", 
            heif_file.mode, 
            heif_file.stride
        )
        
        # Extract EXIF metadata
        return extract_exif_metadata(image)
        
    except Exception as e:
        return None

def extract_standard_image_metadata(image_path):
    """Extracts metadata from standard image formats like JPEG, JPG, PNG."""
    try:
        # Open the image and extract EXIF data
        image = Image.open(image_path)
        return extract_exif_metadata(image)
        
    except Exception as e:
        return None

def extract_exif_metadata(image):
    """
    Extracts EXIF metadata from a PIL image object.

    Parameters:
    - image (PIL.Image): Image object.

    Returns:
    - dict: EXIF metadata.
    """
    try:
        # Extract EXIF data from the image
        exif_data = image._getexif()
        if not exif_data:
            return {"error": "No EXIF metadata found."}
        
        # Decode EXIF data into a readable format
        metadata = {}
        for tag, value in exif_data.items():
            tag_name = ExifTags.TAGS.get(tag, tag)
            metadata[tag_name] = value
        
        return metadata
    
    except Exception as e:
        return None

def extract_video_metadata(file_path):
    """
    Extracts metadata from a video file, including creation and modification dates.

    Parameters:
    - file_path (str): Path to the video file.

    Returns:
    - dict: Video metadata.
    """
    try:
        # Extract metadata using pymediainfo
        media_info = MediaInfo.parse(file_path)
        video_metadata = {}
        
        # Parse video and audio tracks
        for track in media_info.tracks:
            if track.track_type == "Video":
                video_metadata.update({
                    'duration': track.duration / 1000,  # seconds
                    'width': track.width,
                    'height': track.height,
                    'frame_rate': track.frame_rate,
                    'bit_rate': track.bit_rate,
                    'codec': track.codec,
                })
            elif track.track_type == "Audio":
                video_metadata.update({
                    'audio_channels': track.channel_s,
                    'audio_bit_rate': track.bit_rate,
                    'audio_codec': track.codec,
                })
            elif track.track_type == "General":
                video_metadata.update({
                    'file_size': track.file_size,
                    'format': track.format,
                    'overall_bit_rate': track.overall_bit_rate,
                    'encoded_date': getattr(track, 'encoded_date', None),
                    'tagged_date': getattr(track, 'tagged_date', None),
                })

        # Get file creation and modification times
        stat_info = os.stat(file_path)
        video_metadata['file_created_time'] = datetime.datetime.fromtimestamp(stat_info.st_ctime).strftime('%Y-%m-%d %H:%M:%S')
        video_metadata['file_modified_time'] = datetime.datetime.fromtimestamp(stat_info.st_mtime).strftime('%Y-%m-%d %H:%M:%S')

        return video_metadata

    except Exception as e:
        return None


In [69]:
import datetime


class ExifData:
    def __init__(self, image_size=None, camera_make=None, camera_model=None,
                 lens_make=None, lens_model=None, focal_length=None,
                 exposure_time=None, f_number=None, iso_speed=None,
                 datetime_original=None, gps_latitude=None, gps_longitude=None, gps_altitude=None,
                 orientation=None):
        
        self.image_size = image_size
        self.camera_make = camera_make
        self.camera_model = camera_model
        self.lens_make = lens_make
        self.lens_model = lens_model
        self.focal_length = focal_length
        self.exposure_time = exposure_time
        self.f_number = f_number
        self.iso_speed = iso_speed
        self.datetime_original = datetime_original
        self.gps_latitude = gps_latitude
        self.gps_longitude = gps_longitude
        self.gps_altitude = gps_altitude
        self.orientation = orientation

    def __repr__(self):
        return (f"ExifData("
                f"Image Size: {self.image_size}, "
                f"Camera: {self.camera_make} {self.camera_model}, "
                f"Lens: {self.lens_make} {self.lens_model}, "
                f"Focal Length: {self.focal_length}, "
                f"Exposure: {self.exposure_time}s at f/{self.f_number}, "
                f"ISO: {self.iso_speed}, "
                f"Date/Time: {self.datetime_original}, "
                f"GPS: ({self.gps_latitude}, {self.gps_longitude}, {self.gps_altitude}), "
                f"Orientation: {self.orientation})")

    @staticmethod
    def from_exif(exif_data):
        """ Factory method to create ExifData from a raw EXIF dictionary. """
        # Extract common EXIF fields if they exist in the dictionary
        image_size = f"{exif_data.get('ExifImageWidth')}x{exif_data.get('ExifImageHeight')}"
        camera_make = exif_data.get('Make')
        camera_model = exif_data.get('Model')
        lens_make = exif_data.get('LensMake')
        lens_model = exif_data.get('LensModel')
        focal_length = exif_data.get('FocalLength')
        exposure_time = exif_data.get('ExposureTime')
        f_number = exif_data.get('FNumber')
        iso_speed = exif_data.get('ISOSpeedRatings')
        datetime_original = exif_data.get('DateTimeOriginal')
        if datetime_original:
            datetime_original = datetime.datetime.strptime(datetime_original, '%Y:%m:%d %H:%M:%S')
        gps_latitude = exif_data.get('GPSLatitude')
        gps_longitude = exif_data.get('GPSLongitude')
        gps_altitude = exif_data.get('GPSAltitude')
        orientation = exif_data.get('Orientation')

        # Return an instance of ExifData
        return ExifData(
            image_size=image_size,
            camera_make=camera_make,
            camera_model=camera_model,
            lens_make=lens_make,
            lens_model=lens_model,
            focal_length=focal_length,
            exposure_time=exposure_time,
            f_number=f_number,
            iso_speed=iso_speed,
            datetime_original=datetime_original,
            gps_latitude=gps_latitude,
            gps_longitude=gps_longitude,
            gps_altitude=gps_altitude,
            orientation=orientation
        )

In [70]:
def rename_file(src, dst):
    try:
        # Check if source file exists
        if not os.path.exists(src):
            return f"Source file does not exist: {src}"
        
        # Rename file from source to destination
        os.rename(src, dst)
        return f"File renamed successfully from {src} to {dst}"
    
    except Exception as e:
        raise f"Error while renaming file: {e}"
    
def count_no_files_has_name(name, folder):
    count = 1
    for file in os.listdir(folder):
        if str(file).__contains__(name):
            count += 1
    return count

def copy_file(src, dst):
    try:
        # Check if source file exists
        if not os.path.exists(src):
            return f"Source file does not exist: {src}"
        
        # Copy file from source to destination
        shutil.copy(src, dst)
        return f"File copied successfully from {src} to {dst}"
    
    except Exception as e:
        return f"Error while copying file: {e}"

In [71]:
def name_of_file(format, metadata: ExifData):
    file_count = count_no_files_has_name(f"{datetime.datetime.strftime(metadata.datetime_original, '%Y-%m-%d')}" if metadata.datetime_original else "Unknown Date", OUTPUT)
    format = format.replace("{camera_make}", str(metadata.camera_make or "Unknown Make"))
    format = format.replace("{camera_model}", str(metadata.camera_model or "Unknown Model"))
    format = format.replace("{lens_model}", str(metadata.lens_model or ""))
    format = format.replace("{focal_length}", str(metadata.focal_length or ""))
    format = format.replace("{exposure_time}", str(metadata.exposure_time or ""))
    format = format.replace("{f_number}", str(metadata.f_number or ""))
    format = format.replace("{iso_speed}", str(metadata.iso_speed or ""))
    format = format.replace("{datetime_original}", 
                            f"{datetime.datetime.strftime(metadata.datetime_original, '%Y-%m-%d %H:%M:%S')}_{file_count}"
                            if metadata.datetime_original else "Unknown Date")
    format = format.replace("{gps_latitude}", str(metadata.gps_latitude or ""))
    format = format.replace("{gps_longitude}", str(metadata.gps_longitude or ""))
    format = format.replace("{lens_make}", str(metadata.lens_make or ""))
    format = format.replace("{gps_altitude}", str(metadata.gps_altitude or ""))
    format = format.replace("{orientation}", str(metadata.orientation or ""))
    return format

def attempt_to_get_date_from_file(file_name):
    """
    Attempts to extract a valid date or datetime object from a file name.
    It first searches for the format '%Y%m%d%H%M%S'. If that is not found, it tries '%Y%m%d'.

    Parameters:
    - file_name (str): The file name to search within.

    Returns:
    - datetime: A datetime object if a valid date is found, otherwise None.
    """
    # Loop through each substring in the file name, checking for valid date patterns
    for i in range(0, len(file_name) - 8):
        # Extract a substring of 14 characters for datetime parsing
        datetime_str = ''.join(e for e in file_name[i:i+14] if e.isalnum())
        
        # Try to parse as full datetime '%Y%m%d%H%M%S'
        if len(datetime_str) == 14:
            try:
                date = datetime.datetime.strptime(datetime_str, '%Y%m%d%H%M%S')
                # Validate the year range
                if 1995 < date.year <= datetime.datetime.now().year:
                    return date
            except ValueError:
                pass  # If parsing fails, continue to the next step

        # Fallback to trying as date '%Y%m%d' if the full datetime doesn't work
        date_str = ''.join(e for e in file_name[i:i+8] if e.isalnum())
        
        if len(date_str) == 8:
            try:
                date_day = datetime.datetime.strptime(date_str, '%Y%m%d')
                # Validate the year range
                if 1995 < date_day.year <= datetime.datetime.now().year:
                    return date_day
            except ValueError:
                continue  # If parsing fails, continue to the next substring
    
    return None

In [None]:
def get_files(input_directory):
    """Retrieve all files from the input directory and its subdirectories."""
    files = []
    for root, dirs, filenames in os.walk(input_directory):
        for filename in filenames:
            files.append(os.path.join(root, filename))  # Append the full path
    return files

if __name__ == '__main__':
    df = pd.DataFrame(columns=['File Location', 'File Name', 'Renamed File Name', 'Date Time', 'Camera Make', 'Camera Model', 'Lens Model', 'Focal Length', 'Exposure Time', 'F Number', 'ISO Speed', 'GPS Latitude', 'GPS Longitude', 'Lens Make', 'GPS Altitude', 'Orientation'])
    files = get_files(INPUT)
    for f in files:
        meta = extract_image_metadata(f)
        if meta is None:
            continue
        metadata = ExifData.from_exif(meta)
        if metadata.datetime_original is None:
            metadata.datetime_original = attempt_to_get_date_from_file(f)
        df.loc[len(df)] = [f, os.path.basename(f), name_of_file(NAMING_FORMAT, metadata), metadata.datetime_original, metadata.camera_make, metadata.camera_model, metadata.lens_model, metadata.focal_length, metadata.exposure_time, metadata.f_number, metadata.iso_speed, metadata.gps_latitude, metadata.gps_longitude, metadata.lens_make, metadata.gps_altitude, metadata.orientation]
        extension = f.split('.')[-1]
        renamed_file = f'{name_of_file(NAMING_FORMAT, metadata)}.{extension}'
        output_path = os.path.join(OUTPUT, renamed_file)
        copy_file(f, output_path)
        
# save the dataframe to a csv file
# df.to_csv('metadata.csv', index=False)