In [None]:
import os
import shutil
import exifread
import timm
import torch
import torchvision.transforms as transforms
import subprocess

from PIL import Image
from datetime import datetime
from collections import defaultdict
#    from mutagen import File



In [2]:
# Define paths
SOURCE_DIR = "/Volumes/NFP4TBSSD/PHOTOS_ORGANIZE"
DEST_DIR = "/Volumes/NFP4TBSSD/zz_organized_photos"

In [3]:

# Load a pre-trained image classification model (e.g., timm's resnet50)
model = timm.create_model("resnet50", pretrained=True)
model.eval()

# ImageNet labels for class names

#TODO: refactor the image labels
#LABELS_PATH = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
LABELS_PATH = "../data/imagenet_classes.txt"
LABELS = [line.strip() for line in open("imagenet_classes.txt")] if os.path.exists("imagenet_classes.txt") else []


In [4]:
# Image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
register_heif_opener()  # Enable HEIC support for PIL

def get_image_metadata(image_path):
    """Extract date taken from image metadata"""
    try:
        if image_path.lower().endswith(".heic"):
            image = Image.open(image_path)
            exif_data = image.info.get("exif", None)
            if exif_data:
                return datetime.strptime(exif_data.get(36867), "%Y:%m:%d %H:%M:%S")
        else:
            with open(image_path, 'rb') as f:
                tags = exifread.process_file(f)
            date_tag = tags.get('EXIF DateTimeOriginal') or tags.get('Image DateTime')
            if date_tag:
                return datetime.strptime(str(date_tag), "%Y:%m:%d %H:%M:%S")
    except Exception as e:
        print(f"Error reading metadata for {image_path}: {e}")
    return None

In [6]:
def get_video_metadata(video_path):
    """Extract creation date from video metadata using ffmpeg"""
    try:
        cmd = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "format_tags=creation_time", "-of", "default=noprint_wrappers=1:nokey=1", video_path]
        result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        if result.stdout:
            return datetime.strptime(result.stdout.strip(), "%Y-%m-%dT%H:%M:%S.%fZ")
    except Exception:
        pass
    return None

In [7]:
def classify_image(image_path):
    """Classify an image and return the top label."""
    if not LABELS:
        return "Unknown"
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0)
    with torch.no_grad():
        outputs = model(image)
    _, predicted = torch.max(outputs, 1)
    return LABELS[predicted.item()]

In [None]:

def organize_files():
    """Recursively organize files from source to destination"""
    for root, _, files in os.walk(SOURCE_DIR):
        for file in files:
            file_path = os.path.join(root, file)
            file_ext = file.lower().split('.')[-1]
            
            # Get metadata
            date_taken = None
            event_name = None
            if file_ext in ['jpg', 'jpeg', 'png', 'tiff', 'gif', 'bmp', 'webp']:
                try:
                    date_taken = get_image_metadata(file_path)
                except Exception:
                    print(f"Skipping file {file_path}: Unsupported metadata format.")
                    date_taken = None  # Fallback to modification date later
                    
                event_name = classify_image(file_path) if not date_taken else None
            elif file_ext in ['heic', 'heif']:
                # For HEIC files, we can use pyheif or similar library to extract metadata...
                date_taken = get_image_metadata(file_path)
                event_name = classify_image(file_path) if not date_taken else None
            #elif file_ext in ['mp3', 'wav', 'flac']:
            #    # For audio files, we can use a library like mutagen to extract metadata
            #    from mutagen import File
            #    audio = File(file_path)
            #    date_taken = datetime.fromtimestamp(audio.info.pprint().get('TDRL', None))
            elif file_ext in ['mp4', 'mov', 'avi', 'mkv', 'wmv']:
                date_taken = get_video_metadata(file_path)
            
            # Fallback date if metadata is missing
            if not date_taken:
                date_taken = datetime.fromtimestamp(os.path.getmtime(file_path))
            
            # Format destination path
            date_str = date_taken.strftime("%Y/%m-%d")
            dest_folder = os.path.join(DEST_DIR, date_str + ("_" + event_name if event_name else ""))
            os.makedirs(dest_folder, exist_ok=True)
            
            # Move file
            shutil.copy(file_path, os.path.join(dest_folder, file))
            print(f"Copied {file_path} -> {dest_folder}")

IndentationError: expected an indented block after 'elif' statement on line 19 (1517188722.py, line 20)

In [None]:
organize_files()