# Problem statement
When on a recent vacation, hundreds of photos were taken by multiple people then shared through various messaging apps. The naming convention of the photos is inconsistent, and there is no EXIF timestamp metadata information for downloaded images. Since some photos were downloaded days after the event, organizing the photos chronologically is therefore not possible using the conventional methods of sorting by name or by date modified.

This code attempts to rename and reorganize shortlisted photos in such a way that manual sorting is minimized. Photos were shortlisted using colored tags on Mac OS and presorted into folders labeled with the date taken. Some example photos and naming conventions can be found in the `img/` folder.

The types of names of the photos were as follows:
- `IMG_YYYYMMDD_hhmmss.jpg`: personal photos with time taken in local time in filename (also has EXIF timestamp data)
- `PXL_YYYYMMDD_hhmmssmmm.jpg`: downloaded files with time taken in UTC time in filename (8 hours behind local time; no EXIF timestamp data)
- `16###########.jpg`: downloaded files with time uploaded to messaging app in Unix time (including milliseconds) in filename (no EXIF timestamp data)

# Strategy
- Go folder by folder
- Search for shortlisted photos using **`osxmetadata` for Mac OS tag information**
- Estimate local time of photo based on the photo name and **create new name reflecting the local date and time**
    - If the date does not match folder date, use the folder date and set the time to noon (manually review and estimate time later)
- If satisifed with names, load images and **add local time taken as part of exif information** so other programs can sort using this information 
- Save all renamed files in `Final/` folder and order by new names

# Initial imports and variables

In [30]:
import logging
import os
import shutil
import piexif
from datetime import datetime, timedelta
from osxmetadata import OSXMetaData
from PIL import Image, ExifTags
from pprint import pprint

IMG_FOLDER = 'img'
FINAL_FOLDER = f'{IMG_FOLDER}/Final'
TMP_FOLDER = f'{IMG_FOLDER}/tmp'
EXAMPLE1 = f'{IMG_FOLDER}/20221121 objects/IMG_20221121_230114.jpg'
EXAMPLE2 = f'{IMG_FOLDER}/20221125 scenery/PXL_20221125_040253498.jpg'
EXAMPLE3 = f'{IMG_FOLDER}/20221122 food/1669256881625.jpg'
EXAMPLE4 = f'{IMG_FOLDER}/20221125 scenery/IMG_20221125_135506.jpg'
EXIF_DATETIME_TAG = 36867  # tag for date and time a photo was taken

# Exploration


## `PIL` and `piexif`
Getting the date and time for when a image was taken only works for photos taken directly from the camera but not when downloaded. `pixeif` is a package that helps with saving images with exif information.

In [2]:
# Example of what Exif data looks like without human readable tags
image_exif = Image.open(EXAMPLE3)._getexif()
image_exif

{256: 1478,
 257: 1108,
 274: 1,
 34665: 62,
 37384: 0,
 40961: 1,
 40962: 1478,
 40963: 1108}

In [3]:
# Quick function to get exif data from photos with human readable tags
def get_exif_info(filename):
    image_exif = Image.open(filename)._getexif()
    try:
        # Make a map with tag names
        exif = { ExifTags.TAGS[k]: v for k, v in image_exif.items() if k in ExifTags.TAGS and type(v) is not bytes 
        and ExifTags.TAGS[k] < 'F'}
        pprint(exif)

        # Grab the date
        date_obj = datetime.strptime(exif.get('DateTimeOriginal'), '%Y:%m:%d %H:%M:%S')
        print(f'Date {filename} was taken: {date_obj}\n')
    except:
        print(f'Unable to get date from exif for {filename}\n')

# Note that only the example named IMG_... has DateTime information
get_exif_info(EXAMPLE1)
get_exif_info(EXAMPLE2)
get_exif_info(EXAMPLE3)

{'ApertureValue': 1.61,
 'BrightnessValue': 0.0,
 'ColorSpace': 1,
 'DateTime': '2022:11:21 22:01:14',
 'DateTimeDigitized': '2022:11:21 22:01:14',
 'DateTimeOriginal': '2022:11:21 22:01:14',
 'ExifImageHeight': 3000,
 'ExifImageWidth': 4000,
 'ExifInteroperabilityOffset': 700,
 'ExifOffset': 197,
 'ExposureBiasValue': nan,
 'ExposureMode': 0,
 'ExposureProgram': 0,
 'ExposureTime': 0.01}
Date img/20221121 objects/IMG_20221121_230114.jpg was taken: 2022-11-21 22:01:14

{}
Unable to get date from exif for img/20221125 scenery/PXL_20221125_040253498.jpg

{'ColorSpace': 1,
 'ExifImageHeight': 1108,
 'ExifImageWidth': 1478,
 'ExifOffset': 62}
Unable to get date from exif for img/20221122 food/1669256881625.jpg



In [4]:
# Quick method to save an image with new DateTime information
# Create empty exif dictionary
empty_exif_dict = {'Exif': {}}

# Open an image file
im = Image.open(EXAMPLE1)
exif_dict = piexif.load(im.info["exif"])

# Create new dict only with date and time information
new_exif_dict = empty_exif_dict.copy()
new_exif_dict['Exif'][EXIF_DATETIME_TAG] = exif_dict['Exif'].get(EXIF_DATETIME_TAG)
exif_bytes = piexif.dump(new_exif_dict)

# Create final folder if it does not exist
if not os.path.exists(FINAL_FOLDER):
   os.makedirs(FINAL_FOLDER)

# Save a copy of the image file with the new exif dictionary information
im.save(f"{FINAL_FOLDER}/test.jpg", "jpeg", exif=exif_bytes, quality=100, subsampling=0)

## `osxmetdata`

In [5]:
# Quick function to check whether file has MacOS tags
def get_tag_info(filenames):
    tags = []
    for file in filenames:
        md = OSXMetaData(file)
        tags.append(md.tags)
    return tags

get_tag_info([EXAMPLE1, EXAMPLE2, EXAMPLE3, EXAMPLE4])

[[Tag(name='Green', color=2)],
 [Tag(name='Blue', color=4)],
 [Tag(name='Green', color=2)],
 []]

# Solution

In [None]:
# Initialize folders
if not os.path.exists(TMP_FOLDER):
    os.makedirs(TMP_FOLDER)

if not os.path.exists(FINAL_FOLDER):
    os.makedirs(FINAL_FOLDER)

In [56]:
# Get list of subfolders
subfolders = [ f.path for f in os.scandir(IMG_FOLDER) if f.is_dir() and FINAL_FOLDER not in f.path]

# Go folder by folder
for subfolder in subfolders:
    current_date = subfolder.split(IMG_FOLDER)[1].replace('/', '')[:8]  # assumes first 8 characters of subdirectory is the date

    # Search for shortlisted photos using `osxmetadata` for Mac OS tag information
    photos = [ f for f in os.scandir(subfolder) if '.jpg' in f.name and OSXMetaData(f).tags ]

    # Estimate local time of photo based on the photo name and create new name reflecting the local date and time
    for photo in photos:
        utc_time = None

        # IMG_ files can use name directly
        if photo.name[:3] == 'IMG':
            new_name = photo.name

        # PXL_ files need to convert UTC to local time
        elif photo.name[:3] == 'PXL':
            utc_str = photo.name[4:19]
            utc_time = datetime.strptime(utc_str, '%Y%m%d_%H%M%S')
        
        # 16## files need to convert Unix to local time
        elif photo.name[:2] == '16':
            unix_ts = int(photo.name[:13])/1000
            utc_time = datetime.utcfromtimestamp(unix_ts)
    
        else:
            new_name = None
            logging.warning(f'{photo.name} is not one of the known file naming schemes')

        if utc_time:
            local_time = utc_time + timedelta(hours=8)
            new_name = local_time.strftime('IMG_%Y%m%d_%H%M%S.jpg')

            # If the date does not match folder date, use the folder date and set the time to noon
            if local_time.strftime('%Y%m%d') != current_date:
                new_name = local_time.strftime(f'IMG_{current_date}_120000_{photo.name}')

        # Save the files using new names temporarily for review
        shutil.copy(photo.path, f'{TMP_FOLDER}/{new_name}')

    # For own purposes, directly move shortlisted videos over
    videos = [ f for f in os.scandir(subfolder) if 'VID_' in f.name and OSXMetaData(f).tags ]

    for video in videos:
        shutil.copy(video.path, f'{FINAL_FOLDER}/')

In [54]:
photos = [ f for f in os.scandir(TMP_FOLDER) if '.jpg' in f.name ]

for photo in photos:
    # If satisifed with names in tmp folder, load images and add local time taken as part of exif information
    im = Image.open(photo.path)
    exif_dict = piexif.load(im.info["exif"])

    try:
        exif_dict = piexif.load(im.info.get("exif"))
    except TypeError:
        # logging.warning(f'Exif information does not exist for photo {photo.name}')
        exif_dict = {'Exif': {}}

    local_str = photo.name[4:19]
    local_time = datetime.strptime(local_str, '%Y%m%d_%H%M%S')
    local_time_bytestr = f'{local_time.strftime("%Y:%m:%d %H:%M:%S")}'.encode()

    # If datetime information does not exist, add information to exif dict
    if not exif_dict.get('Exif').get(EXIF_DATETIME_TAG):
        exif_dict.get('Exif')[EXIF_DATETIME_TAG] = local_time_bytestr
        exif_bytes = piexif.dump(exif_dict)
        
        im.save(photo.path, "jpeg", exif=exif_bytes, quality=100, subsampling=0)
    
    # Move the file over
    shutil.move(photo.path, f'{FINAL_FOLDER}/')

In [55]:
photos = [ f for f in os.scandir(FINAL_FOLDER) if '.jpg' in f.name ]

for photo in photos:
    print(photo.name)
    # If satisifed with names in tmp folder, load images and add local time taken as part of exif information
    im = Image.open(photo.path)
    exif_dict = piexif.load(im.info["exif"])

    if exif_dict.get('Exif').get(EXIF_DATETIME_TAG):
        print(exif_dict.get('Exif')[EXIF_DATETIME_TAG])
    

    else:
        print('error')

IMG_20221122_135646.jpg
b'2022:11:22 13:56:46'
IMG_20221125_134449.jpg
b'2022:11:25 13:44:49'
IMG_20221122_120000_1669256881625.jpg
b'2022:11:22 12:00:00'
IMG_20221121_230114.jpg
b'2022:11:21 22:01:14'
IMG_20221122_120000_1670823782713.jpg
b'2022:11:22 12:00:00'
IMG_20221125_120253.jpg
b'2022:11:25 12:02:53'
test.jpg
b'2022:11:21 22:01:14'
