# 2. Process Photos:


## [2.1] Read `configs.ini` :

In [1]:
import json
import configparser
config = configparser.ConfigParser()

In [2]:
config.read('configs.ini')
source_dir      = config['PROCESS']['source_dir']
destination_dir = config['PROCESS']['destination_dir'] 
log_process_dir = config['PROCESS']['log_process_dir'] 
csv_process_dir = config['PROCESS']['csv_process_dir']

PROCESS_MOVE_PHOTOS = config['PROCESS']['PROCESS_MOVE_PHOTOS'] 
PROCESS_MOVE_VIDEOS = config['PROCESS']['PROCESS_MOVE_VIDEOS'] 

MASTER_REGEX_PHOTOS_1 = config['NOEDIT']['MASTER_REGEX_PHOTOS_1'] 
MASTER_REGEX_PHOTOS_2 = config['NOEDIT']['MASTER_REGEX_PHOTOS_2'] 
MASTER_REGEX_VIDEOS_1 = config['NOEDIT']['MASTER_REGEX_VIDEOS_1'] 
MASTER_REGEX_VIDEOS_2 = config['NOEDIT']['MASTER_REGEX_VIDEOS_2'] 

# Convert to Boolean: https://stackoverflow.com/a/13706457/1176573
PROCESS_MOVE_PHOTOS = json.loads(PROCESS_MOVE_PHOTOS.lower())
PROCESS_MOVE_VIDEOS = json.loads(PROCESS_MOVE_VIDEOS.lower())

In [3]:
print("Source Directory: ",source_dir)
print("Destionation Directory: ",destination_dir)
print("Log Directory: ",log_process_dir)
print("CSV Directory: ",csv_process_dir)
print("Should Photo files be moved? : ",PROCESS_MOVE_PHOTOS)
print("Should Photo files be moved? : ",PROCESS_MOVE_VIDEOS)


Source Directory:  E:/R_PhotosVideos/R_Photos_Unsorted/Plain-11
Destionation Directory:  E:/R_PhotosVideos/R_Photos_Sorted_Test
Log Directory:  E:/R_PhotosVideos/PythonPhotoSort/logs
CSV Directory:  E:/R_PhotosVideos/PythonPhotoSort/logs
Should Photo files be moved? :  True
Should Photo files be moved? :  True


In [4]:
print("MASTER_REGEX_PHOTOS_1 : ",MASTER_REGEX_PHOTOS_1)
print("MASTER_REGEX_PHOTOS_2 : ",MASTER_REGEX_PHOTOS_2)
print("MASTER_REGEX_VIDEOS_1 : ",MASTER_REGEX_VIDEOS_1)
print("MASTER_REGEX_VIDEOS_2 : ",MASTER_REGEX_VIDEOS_2)

MASTER_REGEX_PHOTOS_1 :  ^[iImMgG]{3}[-_]([0-9]{8})[-_].*\.(?:jpg|jpeg)$
MASTER_REGEX_PHOTOS_2 :  ^[iImMgG]{3}([0-9]{8}).*\.(?:jpg|jpeg)$
MASTER_REGEX_VIDEOS_1 :  ^[VvIiDdvideo]{3,}[-_]([0-9]{8})[-_].*\.(?:mp4)$
MASTER_REGEX_VIDEOS_2 :  ^[VvIiDd]{3}([0-9]{8}).*\.(?:mp4)$


## [2.2] Process all Photos: 

In [5]:
import re
import os
from datetime import datetime
import shutil
import dateutil

In [6]:
input_folder_path  = source_dir
output_folder_path = destination_dir

In [7]:
# Log file :
temp     = "log-photos-" + datetime.now().strftime("%Y%m%d-%H%M%S") + ".txt"
log_file = os.path.join(log_process_dir, temp)

# Process CSV File: 
temp     = "photos-" + datetime.now().strftime("%Y%m%d-%H%M%S") + ".csv"
csv_file = os.path.join(csv_process_dir, temp)

print("Log File: ", log_file)
print("CSV File: ", csv_file) 

Log File:  E:/R_PhotosVideos/PythonPhotoSort/logs\log-photos-20240520-104648.txt
CSV File:  E:/R_PhotosVideos/PythonPhotoSort/logs\photos-20240520-104648.csv


In [8]:
FILE_COUNT = 0
FILE_EXT_LIST = set([])
FILE_EXT_COUNTER = []

In [9]:
src = input_folder_path
dst = output_folder_path

logfile_handle = open(log_file, "w")
logfile_handle.write("================================\nProcess Photos Execution Started\n================================\n")

csvfile_handle = open(csv_file, "w", encoding="utf-8")
csvfile_handle.write("Filename;Extension;Matched_Regex_Name;Extracted_Date;Source_Path;Destination_Path;IsDuplicate;MoveStatus;NOTHING;\n")

# OS.walk() generate the file names in a directory tree (nested subfolders) by walking the tree either top-down or bottom-up.
for root, subdirs, files in os.walk(src):
    for file in files:
        path = os.path.join(root, file)

        _fullfilepath = path   # e.g. H:/myfolder/IMG_20150829_141244.jpg
        _filenameonly = file   # e.g. IMG_20150829_141244.jpg
        _extension = os.path.splitext(file)[1] # e.g. jpg

        matched_regex_name = "None"
        _extracted_ts      = "Null" # extracted timestamp 

        

        logfile_handle.write("INFO: Checking File: "+_filenameonly+"\n")

        ## Try matching against each regex listed in config.ini

        m1 = re.search(r'{}'.format(MASTER_REGEX_PHOTOS_1), file)
        if m1: 
            logfile_handle.write("INFO: Matched Regex: "+matched_regex_name+"\n")
            _extracted_ts = m1.group(1)
            matched_regex_name = "MASTER_REGEX_PHOTOS_1"
            date_obj = dateutil.parser.parse(_extracted_ts)

            # 2024 / 2024-12_(December) / 2024-December-25
            create_new_folder = output_folder_path + "/" + \
             str( date_obj.strftime("%Y") ) + "/" + \
             str( date_obj.strftime("%Y-%m_(%B)") ) + "/" + \
             str( date_obj.strftime("%Y-%B-%d") ) 
            
            os.makedirs(create_new_folder, exist_ok=True)
            logfile_handle.write("INFO: Folder Created: "+create_new_folder+"\n")

            source_file      = _fullfilepath
            destination_file = create_new_folder + "/" + _filenameonly

            
            if PROCESS_MOVE_PHOTOS:
                try:
                    dest = shutil.move(source_file, destination_file)
                    # Logging
                    logfile_handle.write("INFO: Flag value PROCESS_MOVE_PHOTOS: "+str(PROCESS_MOVE_PHOTOS)+"\n")
                    logfile_handle.write("INFO: A File is MOVED From: "+source_file+" To File:"+ destination_file +"\n")
                    # Csv Info
                    csv_line = _filenameonly + ";" + _extension + ";" + matched_regex_name + ";" + \
                    _extracted_ts + ";" + source_file + ";"+ destination_file +";False;Success;NOTHING;\n"                    
                    csvfile_handle.write(csv_line)
 
                except shutil.SameFileError:
                    # print("Source and destination represents the same file.")
                    logfile_handle.write("Duplicate File Found: ", filename + ", Source: " + source_file +", Destination: " + destination_file + "\n")
                    ## Add the logic to add random string to the file name. 
                    ## This will not be implemented since this might create multiple files taking up space on disk
                    ## Duplicate files has to be manually handled
                    csv_line = _filenameonly + ";" + _extension + ";" + matched_regex_name + ";" + \
                    _extracted_ts + ";" + source_file + ";"+ destination_file +";True;Skipped;NOTHING;\n"                    
                    csvfile_handle.write(csv_line)
                      
            
        m2 = re.search(r'{}'.format(MASTER_REGEX_PHOTOS_2), file)
        if m2: 
            logfile_handle.write("INFO: Matched Regex: "+matched_regex_name+"\n")
            _extracted_ts = m2.group(1)
            matched_regex_name = "MASTER_REGEX_PHOTOS_2"
            date_obj = dateutil.parser.parse(_extracted_ts)

            # 2024 / 2024-12_(December) / 2024-December-25
            create_new_folder = output_folder_path + "/" + \
             str( date_obj.strftime("%Y") ) + "/" + \
             str( date_obj.strftime("%Y-%m_(%B)") ) + "/" + \
             str( date_obj.strftime("%Y-%B-%d") ) 
            
            os.makedirs(create_new_folder, exist_ok=True)
            logfile_handle.write("INFO: Folder Created: "+create_new_folder+"\n")

            source_file      = _fullfilepath
            destination_file = create_new_folder + "/" + _filenameonly

            
            if PROCESS_MOVE_PHOTOS:
                try:
                    dest = shutil.move(source_file, destination_file)
                    # Logging
                    logfile_handle.write("INFO: Flag value PROCESS_MOVE_PHOTOS: "+str(PROCESS_MOVE_PHOTOS)+"\n")
                    logfile_handle.write("INFO: A File is MOVED From: "+source_file+" To File:"+ destination_file +"\n")
                    # Csv Info
                    csv_line = _filenameonly + ";" + _extension + ";" + matched_regex_name + ";" + \
                    _extracted_ts + ";" + source_file + ";"+ destination_file +";False;Success;NOTHING;\n"                    
                    csvfile_handle.write(csv_line)
                except shutil.SameFileError:
                    # print("Source and destination represents the same file.")
                    logfile_handle.write("Duplicate File Found: ", filename + ", Source: " + source_file +", Destination: " + destination_file + "\n")
                    ## Add the logic to add random string to the file name. 
                    ## This will not be implemented since this might create multiple files taking up space on disk
                    ## Duplicate files has to be manually handled 
                    csv_line = _filenameonly + ";" + _extension + ";" + matched_regex_name + ";" + \
                    _extracted_ts + ";" + source_file + ";"+ destination_file +";True;Skipped;NOTHING;\n"                    
                    csvfile_handle.write(csv_line)
   
        #_fullfilepath = path   # e.g. H:/myfolder/IMG_20150829_141244.jpg

        csv_line = _filenameonly + ";" + _extension + ";" + matched_regex_name + ";" + \
                    _extracted_ts + ";" + path + ";NULL;NULL;NULL;NOTHING;\n"
        csvfile_handle.write(csv_line)

        # Increment File Count
        FILE_COUNT = FILE_COUNT+ 1
        # Add file extension to set
        FILE_EXT_LIST.add(_extension)
        # Group counter
        FILE_EXT_COUNTER.append(_extension)
 



In [10]:
csvfile_handle.close()
logfile_handle.close()

## [2.3] Print Post-processing info: 

In [11]:
print("Number of files: ",FILE_COUNT)
print("List of file extensions: ",FILE_EXT_LIST)
print("Info written to CSV File : ", csv_file) 

Number of files:  188
List of file extensions:  {'.jpg', '.mp4'}
Info written to CSV File :  E:/R_PhotosVideos/PythonPhotoSort/logs\photos-20240520-104648.csv


In [12]:
from collections import Counter
import pandas as pd
filecount_by_ext = Counter(FILE_EXT_COUNTER)
df = pd.DataFrame.from_records(list(dict(filecount_by_ext).items()), columns=['extensions','count'])
df.sort_values(by=['count'],inplace=True, ascending=False)
df.head()


Unnamed: 0,extensions,count
0,.jpg,184
1,.mp4,4
