In [1]:
# Import the necessary packages
from PIL import Image
import imagehash
import os
import time
import numpy as np
import matplotlib.pyplot as plt
colors = ["red", "blue", "green", "gray", "purple", "orange"]
# Generic Function to create a plot
def create_plot(x, y, xLabel=["X-Values"], yLabel=["Y-Values"],
                title=["Plot"], num_rows=1, size=(16, 12), mode="Norm"):
    plt.figure(figsize=size, dpi=300)
    for c, (x_vals, y_vals, x_labels, y_labels, titles) in enumerate(
        zip(x, y, xLabel, yLabel, title)):
        for c2, (y_v, t) in enumerate(zip(y_vals, titles)):
            plt.subplot(num_rows, 1, c + 1)
            # Add a plot to the subplot, use transparency so they can both be seen
            if mode is "Norm":
                plt.plot(x_vals, y_v, label=t, color=colors[c2+c], alpha=0.70)
            else:
                plt.semilogx(x_vals, y_v, label=t, color=colors[c2+c], alpha=0.70)
            plt.ylabel(y_labels)
            plt.xlabel(x_labels)
            plt.grid(True)
            plt.legend(loc='lower right')
    
    plt.show()
    
time_per_compare = 0.08 # How many seconds each comparison takes

In [2]:
moveEXT = tuple([".jpg", ".jpeg", ".png", ".gif", ".mp4", ".mov"])
dupeEXT = tuple([".jpg", ".jpeg", ".png",])

In [3]:
# Read the provided directory and return a list of all images in said directory
def grabImageList(path, move=True):
    return [image for image in os.listdir(path) if image.lower().endswith(moveEXT)] if move else [image for image in os.listdir(path) if image.lower().endswith(dupeEXT)]

In [4]:
# For the provided list of images, calculate how many comparisons will be performed: n(n+1)/2
def calcComparisons(imageList):
    return (len(imageList)*(len(imageList)+1)/2)

In [5]:
# Compare all images in the provided directory, if they're similar, notify the user
def checkDupes(path):
    imageList = grabImageList(path, move=False)
    print ("%i images, %i comparisons" % (len(imageList), calcComparisons(imageList)))
    print ("This should take about %f minutes" % (time_per_compare * calcComparisons(imageList) / 60.0))
    time_list = []
    start_time = time.time()
    for count, image in enumerate(imageList):
        time_list.append(time.time())
        if count % 25 is 0:
            print ("On image #%i, %i comparisons to go." % (count, calcComparisons(imageList[count:])))
        currImage_hash = imagehash.average_hash(Image.open(imagePath + image))
        for imageB in imageList[count+1:]:
            compareImage_hash = imagehash.average_hash(Image.open(imagePath + imageB))
            if (currImage_hash - compareImage_hash < 3):
                print ("%i: Look at %s, %s (%i)" % (count, image, imageB, int(currImage_hash-compareImage_hash)))
                
    print ("Took %f minutes" % ((time.time() - start_time) / 60))
    return time_list

In [6]:
# Rename the /Grouped/ subfolder
def renameGrouped(path):
    imageList = grabImageList(path)
    originalLen = len(imageList)
    if not os.path.isdir(path + "new/"): # Make the /new/ subfolder if it doesn't exist
        os.makedirs(path + "new/")
        print ("Created /new/ folder")
    adjust_val = 0
    for count, image in enumerate(imageList):
        group_count = image.split("-")[0] # Grab the group count of this image
        # group_list is the list of all image in the same group
        group_list = [img for img in imageList if img.split("-")[0] == group_count]
         # If the subgroup of this image has already been renamed, adjust the count accordingly
        if len(group_list) == 0:
            adjust_val += 1
        # Loop through all images of this same subgroup
        for sub_count, subimage in enumerate(group_list):
            if image.lower().endswith(".jpeg"): # New name for longer file names
                new_name = "%snew/%s-%s%s" % (path, count+1-adjust_val, sub_count+1, image[-5:])
            else:
                new_name = "%snew/%s-%s%s" % (path, count+1-adjust_val, sub_count+1, image[-4:])
            os.rename(path + subimage, new_name) # Move to the /new/ folder (with the new name)
            time.sleep(0.08) # Sleep between each command to avoid losing files
            
        imageList = grabImageList(path) # Reset the image list now that some have been moved
        
    # Move the images back from /Grouped/new/ to /Grouped/
    for image in os.listdir(path + "new/"):
        os.rename(path + "new/" + image, path + image)
        time.sleep(0.08)
        
    os.rmdir(path + "new") # Delete /path/new/ subfolder
    print ("%i images renamed." % originalLen)

In [7]:
# Rename all images in the provided directory in order of 1, 2, ..., len(imagelist @ path)
def renameAll(path):
    if path.endswith("Grouped/") or path.endswith("Known/"): # Prevent accidentally renaming the grouped folder
        renameGrouped(path)
        return
    imageList = grabImageList(path)
    if not os.path.isdir(path + "new/"): # Make the /new/ subfolder if it doesn't exist
        os.makedirs(path + "new/")
        print ("Created /new/ folder")
    for count, image in enumerate(imageList):
        if image.lower().endswith(".jpeg"): # Rename command for longer file names
            new_name = "%snew/%s%s" % (path, count+1, image[-5:])
        else:
            new_name = "%snew/%s%s" % (path, count+1, image[-4:])
        os.rename(path + image, new_name)
        time.sleep(0.08) # Sleep between each command to avoid losing files
        
    # Move images back from /path/new/ to /path/
    for image in os.listdir(path + "new/"):
        os.rename(path + "new/" + image, path + image)
        time.sleep(0.08)
        
    os.rmdir(path + "new/") # Delete /path/new/ subfolder
    print ("%i images renamed." % len(imageList))

In [180]:
imagePath = "/Users/CollinHeist/Downloads/Check/SORT/Unnamed/Ungrouped"
imagePath += "/"
rename = True

if rename:
    renameAll(imagePath)
else:
    time_list = np.asarray(checkDupes(imagePath))
#     time_list = np.subtract(time_list, time_list[0])
#     n = np.arange(-1, len(time_list))[-1:0:-1] # Reverse the list from n -> 0
#     create_plot([time_list], [(n, )], ["$t$"], ["n"], [("Images Remaining / Time", )], 1)

Created /new/ folder
997 images renamed.
