In [23]:
import os 
import cv2
import random
import numpy as np 
import imagehash as ih
from uuid import uuid4
from PIL import Image, ImageFilter, ImageOps
from multiprocessing import Pool
from os.path import join as path_join


BASE_PATH = path_join("data")
IMAGES_PATH = path_join(BASE_PATH, "original")
MODIFIED_PATH = path_join(BASE_PATH, "mod")
TRASH_PATH = path_join(BASE_PATH, "__trash__")

os.makedirs(TRASH_PATH, exist_ok=True)

In [21]:
def unique_photo():
    # Create a uuid for each image, for uniformity
    for img in os.listdir(IMAGES_PATH):
        ext = img.split(".")[-1]
        original = path_join(IMAGES_PATH, img)
        mod = path_join(IMAGES_PATH, f"{uuid4()}.{ext}")
        os.rename(original, mod)

# unique_photo()

In [None]:
def worsen_resolution(image: str):
    resolutions = (95, 75, 50, 40, 30, 20, 10, 5, 1) # A list of predetermined resolutions
    print(image)

    tokens = image.split(".")
    basefile, ext = tokens[0], tokens[-1]

    mod_path = path_join(MODIFIED_PATH, basefile)
    os.makedirs(mod_path, exist_ok=True)

    img = Image.open(path_join(IMAGES_PATH, image))
    for res in resolutions:
        save_path = path_join(mod_path, f"img-res-{res}.{ext}")
        img.save(save_path, quality=res)

# with Pool(processes=10) as pool:
#     pool.map(worsen_resolution, os.listdir(IMAGES_PATH))

In [34]:
ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
FILTERS = [
    ImageFilter.GaussianBlur(2),
    ImageFilter.EDGE_ENHANCE_MORE,
    ImageFilter.ModeFilter(7),
    ImageFilter.MaxFilter(3),
    ImageFilter.MinFilter(3),
    ImageFilter.MedianFilter(5),
]
FILTERS_LEN = len(FILTERS)

# A random chance of the transformation occuring
def should_apply(percentage: float=0.5):
    return random.random() < percentage

# Create a new filename for an augmented image
def new_filename(name: str):
    tokens = name.split(".")
    if len(tokens) > 2: # For weird paths
        tokens = [''.join(tokens[:-1]), tokens[-1]]
    path, extension = tokens

    # Adds salt, because an image can have multiple augmented versions
    salt = ''.join(random.choice(ALPHABET) for _ in range(6))
    return f"{path}-aug-{salt}.{extension}"

# Data Augmentation 
def data_augmentation(filepath: str, outpath: str, num_variations: int=5):
    # # Give a random chance of not having an augmented version of the image 
    # if should_apply(0.1) and num_variations == 0: return 
    # # And a chance of having multiple augmented versions 
    # if should_apply(0.3) and num_variations == 0: data_augmentation(filepath, save=save)

    ext = filepath.split(".")[-1]
    for _ in range(num_variations):
        name = "img-"
        img = Image.open(filepath)
        if should_apply(0.5):
            # Mirror the image on its vertical axis.
            name += "mir-"
            img = ImageOps.mirror(img)
        
        if should_apply(0.4):
            # We do not want to flip it upside down, because is hard that a real image
            # will be supplied flipped. But an angled face is very common.
            angle = random.randint(-90, 90)
            name += f"r{angle}-"
            img = img.rotate(angle)

        # if should_apply(0.2):
        #     if should_apply(0.5):
        #         # Returns a grayscale image
        #         img = ImageOps.grayscale(img) 
        #     else: 
        #         # It'll create a kinda dithered B&W image
        #         img = img.convert('1')

        # Be sure that the image is in the right format before filters
        if img.mode != "RGB":
            img = img.convert("RGB")
        
        # # Apply a random filter
        # if should_apply(0.6):
        #     idx = random.randint(0, FILTERS_LEN)
        #     if idx == FILTERS_LEN:
        #         # Those filters don't have a dedicated class, so we do a little trolling
        #         if should_apply(0.5):
        #             img = img.effect_spread(5)
        #         elif should_apply(0.5):
        #             # Filter a # of colors from the image
        #             color_scale = [8, 16, 14, 32, 48, 64]
        #             img = img.convert('P', 
        #                             palette=Image.ADAPTIVE, 
        #                             colors=random.choice(color_scale))
        #             img = img.convert("RGB")
        #         else:
        #             # Adds some noise
        #             img = img.convert('P', palette=Image.BICUBIC)
        #             img = img.convert("RGB")
        #     else:
        #         img = img.filter(FILTERS[idx])

        res = random.choice((95, 75, 50, 40, 30, 20, 10, 5, 1))
        name += f"q{res}.{ext}"
        img.save(name, quality=res)

  
data_augmentation("image.jpg", ".", 70)

In [None]:
for hosh in hashes:
    print(hosh["img"], ":")
    if hosh["avg"] == original["avg"]:
        print("Average:", original["avg"] - hosh["avg"], "=>", hosh["avg"] == original["avg"])

    if hosh["crop"] == original["crop"]:
        print("Crop Resistant:", original["crop"] - hosh["crop"], "=>", hosh["crop"] == original["crop"])
    
    if hosh["whash"] == original["whash"]:
        print("Wavelet:", original["whash"] - hosh["whash"], "=>", hosh["whash"] == original["whash"])
    
    if hosh["phash"] == original["phash"]:
        print("Perceptual:", original["phash"] - hosh["phash"], "=>", hosh["phash"] == original["phash"])
    
    if hosh["dhash"] == original["dhash"]:
        print("Difference:", original["dhash"] - hosh["dhash"], "=>", hosh["dhash"] == original["dhash"])
    
    if hosh["color"] == original["color"]:
        print("Color:", original["color"] - hosh["color"], "=>", hosh["color"] == original["color"])
    print()


In [38]:
for hosh in hashes:
    print(hosh["img"], ":")
    print("Average:", hosh["avg"])
    print("Crop Resistant:", hosh["crop"])
    print("Wavelet:", hosh["whash"])
    print("Perceptual:", hosh["phash"])
    print("Difference:", hosh["dhash"])
    print("Color:", hosh["color"])
    print()


img-mir-q95.jpg :
Average: c742ffb971e4c418
Crop Resistant: 64c4fc941c9898e8,1899d80c8d9b1f27,dc2de99a909648e1,a498c97c365a7a1e,1b0380038f98f03f,322b6959d48c9e2d,8d94f323a38c8c72,c00f0e38343c8fa6,2fe3953965114521,193931303633111a
Wavelet: c706ffb971e44418
Perceptual: caad1572aab7d380
Difference: 8d94f323a38c8c72
Color: 0a202008000

img-r-55-q50.jpg :
Average: 0e7c4e0edf9bfc30
Crop Resistant: b9b471d18d8f4eee,647861e4c4a6d290,b47630b933264c1d,a070ec6edaf23c4f,3a6e1c3c61cd8b33,c61e4d5a36868eec,7ce0d8b83232b0d0,4d1e3d3dbc0e2d4c,6c3911338d9fc3e7
Wavelet: 067c0e0edf9bdc30
Perceptual: 911646bd3e163d78
Difference: 7ce0d8b83232b0d0
Color: 11202008000

img-q1.jpg :
Average: e742ff998e272318
Crop Resistant: d9d0c6c7e6682d6d,e666e5cd4c64071a,c44b69a6e79627c9,ced89ce6f2f83693,1c0e26d3f234e183,d0d0999192a48892,4cd630333aceceb1,36073371611382a7,3d0d2f37251f3377
Wavelet: e362ff998e272218
Perceptual: 9ff80027faea86d0
Difference: 4cd630333aceceb1
Color: 01001208000

img-q75.jpg :
Average: e342ff9d8e272

In [61]:
FRAME_INTERVAL = 1000 # 1 second

def compute_hashes(frame: Image):
    return {
        "img": str(uuid4()),
        "avg": ih.average_hash(frame),
        "crop": ih.crop_resistant_hash(frame),
        "whash": ih.whash(frame),
        "phash": ih.phash(frame),
        "dhash": ih.dhash(frame),
        "color": ih.colorhash(frame)
    }

def video_hashes(path: str) -> str:
    vidcap = cv2.VideoCapture(path)

    frame_count, fps = vidcap.get(cv2.CAP_PROP_FRAME_COUNT), vidcap.get(cv2.CAP_PROP_FPS)
    duration = (frame_count / fps * 1000) if fps > 0 else 0 # In milliseconds

    count, hashes = 0, []
    while True:
        if count >= duration: 
            break
        vidcap.set(cv2.CAP_PROP_POS_MSEC, count)
        print("Frame", count)
        success, image = vidcap.read()
        if not success:
            break

        hash = compute_hashes(Image.fromarray(image))
        hash["order"] = count // FRAME_INTERVAL
        hashes.append(hash)
        
        count += 2000

    return hashes 

In [None]:
# orig = video_hashes("/mnt/c/Users/Breno/Downloads/ghqe/nok/b345aa54-44d8-4100-83ad-0ede47d9c563.m4v")
# print("Video 1")
crop = video_hashes("/mnt/c/Users/Breno/Downloads/ghqe/nok/00000000.m4v")
print("Video 2")

for original, hosh in zip(orig, crop):
    print(hosh["img"], ":")
    if hosh["avg"] == original["avg"]:
        print("Average:", original["avg"] - hosh["avg"], "=>", hosh["avg"] == original["avg"])

    if hosh["crop"] == original["crop"]:
        print("Crop Resistant:", original["crop"] - hosh["crop"], "=>", hosh["crop"] == original["crop"])
    
    if hosh["whash"] == original["whash"]:
        print("Wavelet:", original["whash"] - hosh["whash"], "=>", hosh["whash"] == original["whash"])
    
    if hosh["phash"] == original["phash"]:
        print("Perceptual:", original["phash"] - hosh["phash"], "=>", hosh["phash"] == original["phash"])
    
    if hosh["dhash"] == original["dhash"]:
        print("Difference:", original["dhash"] - hosh["dhash"], "=>", hosh["dhash"] == original["dhash"])
    
    if hosh["color"] == original["color"]:
        print("Color:", original["color"] - hosh["color"], "=>", hosh["color"] == original["color"])
    print()

img => a685a0e8-587a-4c65-a453-073ead553155
avg => e342ff9d8e272318
crop => d9dcc0d6c7e6e6e8,e766e4cf4e26071b,c44b68a6f696ed78,dae46cc193a5a187,273ffe3f0ee6f003,b32b6965c4ce864b,0cd6303b3aceceb1,fc0f8fe3d3c30e9a,0b38566359355d7b,676373f3933377a7
whash => e360ff9d8e272218
phash => 9ff80027fae286d1
dhash => 0cd6303b3aceceb1
color => 09202008000
