# File organization per people

the program identify people faces in a picture and create a link in the correct folder. also update the file properties with the identified category

In [None]:
# importing necessary libraries
import os
import shutil
import exifread
import timm
import torch
import torchvision.transforms as transforms
from PIL import Image
from datetime import datetime
from collections import defaultdict
import subprocess
import pyheif

In [None]:
# Define paths
SOURCE_DIR = "/Volumes/NFP4TBSSD/organized_photos/organized_photos_labled/_train"
DEST_DIR = "/Volumes/NFP4TBSSD/organized_photos/organized_photos_labeled/_val"

In [None]:
# Load a pre-trained image classification model (e.g., timm's resnet50)
model = timm.create_model("resnet50", pretrained=True)
model.eval()

# ImageNet labels for class names
#LABELS_PATH = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
LABELS_PATH = "../data/imagenet_classes.txt"
LABELS = [line.strip() for line in open("imagenet_classes.txt")] if os.path.exists("imagenet_classes.txt") else []

# Image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
def get_image_metadata(image_path):
    """Extract date taken from image metadata"""
    with open(image_path, 'rb') as f:
        tags = exifread.process_file(f)
    
    date_tag = tags.get('EXIF DateTimeOriginal') or tags.get('Image DateTime')
    if date_tag:
        return datetime.strptime(str(date_tag), "%Y:%m:%d %H:%M:%S")
    return None

In [None]:
def get_video_metadata(video_path):
    """Extract creation date from video metadata using ffmpeg"""
    try:
        cmd = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "format_tags=creation_time", "-of", "default=noprint_wrappers=1:nokey=1", video_path]
        result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        if result.stdout:
            return datetime.strptime(result.stdout.strip(), "%Y-%m-%dT%H:%M:%S.%fZ")
    except Exception:
        pass
    return None

In [None]:
def classify_image(image_path):
    """Classify an image and return the top labels."""
    if not LABELS:
        return ["Unknown"]
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0)
    with torch.no_grad():
        outputs = model(image)
    probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
    top5_prob, top5_catid = torch.topk(probabilities, 5)
    return [LABELS[catid] for catid in top5_catid]

In [None]:
def extract_context_from_path(file_path):
    """Extract context from the directory structure"""
    path_parts = os.path.normpath(file_path).split(os.sep)
    context_keywords = [part for part in path_parts if part.lower() not in ['source', 'images', 'videos', 'photos']]
    return '_'.join(context_keywords[-2:]) if context_keywords else None

In [None]:
def convert_heic_to_jpg(heic_path):
    """Convert HEIC to JPEG and return the new path."""
    heif_file = pyheif.read(heic_path)
    image = Image.frombytes(heif_file.mode, heif_file.size, heif_file.data, "raw", heif_file.mode, heif_file.stride)
    jpg_path = heic_path.rsplit('.', 1)[0] + ".jpg"
    image.save(jpg_path, "JPEG")
    return jpg_path

In [None]:

def organize_files():
    """Recursively organize files from source to destination by creating symbolic links."""
    for root, _, files in os.walk(SOURCE_DIR):
        for file in files:
            file_path = os.path.join(root, file)
            file_ext = file.lower().split('.')[-1]
            
            # Convert HEIC to JPEG
            if file_ext == "heic":
                file_path = convert_heic_to_jpg(file_path)
                file_ext = "jpg"
            
            # Get metadata
            date_taken = None
            event_names = None
            if file_ext in ['jpg', 'jpeg', 'png', 'tiff', 'gif']:
                date_taken = get_image_metadata(file_path)
                event_names = classify_image(file_path) if not date_taken else None
            elif file_ext in ['mp4', 'mov', 'avi', 'mkv', 'wmv']:
                date_taken = get_video_metadata(file_path)
            
            # Fallback date if metadata is missing
            if not date_taken:
                date_taken = datetime.fromtimestamp(os.path.getmtime(file_path))
            
            # Extract context from the directory structure
            context_from_path = extract_context_from_path(file_path)
            
            # Format destination path
            date_str = date_taken.strftime("%Y/%m-%d")
            final_event_names = event_names or [context_from_path]
            dest_folder = os.path.join(DEST_DIR, date_str + ("_" + '_'.join(final_event_names) if final_event_names else "Others1"))
            os.makedirs(dest_folder, exist_ok=True)
            
            # Update file metadata with categories
            if final_event_names:
                update_file_metadata(file_path, final_event_names)
            
            # Create symbolic link instead of moving file
            dest_path = os.path.join(dest_folder, file)
            if not os.path.exists(dest_path):
                os.symlink(file_path, dest_path)
                print(f"Linked {file_path} -> {dest_path}")

In [None]:
#if __name__ == "__main__":
#    organize_files()