Skip to content

Commit

Permalink
Merge 77a04e4 into 9b7bcf7
Browse files Browse the repository at this point in the history
  • Loading branch information
Avasam committed Sep 3, 2022
2 parents 9b7bcf7 + 77a04e4 commit 1bf40be
Show file tree
Hide file tree
Showing 25 changed files with 652 additions and 591 deletions.
7 changes: 7 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[*]
indent_style = tab
indent_size = 2
[*.yml]
indent_style = space
[*.py]
indent_size = 4
11 changes: 7 additions & 4 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,22 @@ jobs:
- name: Install imagemagick
run: |
sudo apt-get install -y imagemagick
# conda does not support environment markers
- name: Install testing dependencies (Python 3.6+)
if: ${{ matrix.python-version >= 3.6 }}
run: |
conda install -c anaconda -c conda-forge --file requirements-linting-old.txt
- name: Install testing dependencies
run: |
conda install -c anaconda --file conda-requirements.txt flake8 packaging pytest coveralls coverage
conda install -c anaconda -c conda-forge --file requirements-conda.txt --file requirements-linting-old.txt six packaging pytest coveralls coverage
- name: Conda info
run: |
conda info
conda list
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
flake8 . --show-source
- name: Test install from setup.py
run: pip install .
- run: coverage run -m pytest .
Expand Down
4 changes: 2 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
include README.rst
include *.txt
include LICENSE
include *.txt
include LICENSE
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ clean-doc:
rm -rf docs/build

lint: ## check style with flake8
flake8 imagehash/__init__.py tests
flake8 .

lint-fix: ## fix style with autopep8 and isort; ignores to not autofix tabs to spaces, but still warn when mixed
autopep8 . --in-place --aggressive --aggressive --aggressive --recursive --ignore=W191,E101,E111,E122
isort .

test: ## run tests quickly with the default Python
pytest
Expand Down
13 changes: 7 additions & 6 deletions examples/crop_resistance.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,26 @@
import imagehash
from PIL import Image

import imagehash

SAVE_IMAGES = False

# Load image
full_image = Image.open("../tests/data/peppers.png")
full_image = Image.open('../tests/data/peppers.png')
width, height = full_image.size
# Hash it
full_hash = imagehash.crop_resistant_hash(full_image)

# Crop it
for x in range(5, 50, 5):
start = x/100
end = 1-start
start = x / 100
end = 1 - start
crop_img = full_image.crop((start * width, start * height, end * width, end * height))
crop_hash = imagehash.crop_resistant_hash(crop_img)
if SAVE_IMAGES:
crop_img.save("crop_{}.png".format(str(x).zfill(2)))
crop_img.save('crop_{}.png'.format(str(x).zfill(2)))
crop_diff = full_hash.hash_diff(crop_hash)
print(
"Cropped {}% from each side. Hash has {} matching segments with {} total hamming distance".format(
'Cropped {}% from each side. Hash has {} matching segments with {} total hamming distance'.format(
x, crop_diff[0], crop_diff[1]
)
)
8 changes: 4 additions & 4 deletions examples/crop_resistant_segmentation.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import numpy
from PIL import Image, ImageFilter

import imagehash
from PIL import Image, ImageFilter

IMAGE_FILE = "../tests/data/peppers.png"
IMAGE_FILE = '../tests/data/peppers.png'
IMG_SIZE = 300
SEGMENT_THRESHOLD = 128
MIN_SEGMENT_SIZE = 500
Expand All @@ -24,14 +24,14 @@
full_image = Image.open(IMAGE_FILE)
width, height = full_image.size
# Image pre-processing
image = full_image.convert("L").resize((IMG_SIZE, IMG_SIZE), Image.ANTIALIAS)
image = full_image.convert('L').resize((IMG_SIZE, IMG_SIZE), Image.ANTIALIAS)
# Add filters
image = image.filter(ImageFilter.GaussianBlur()).filter(ImageFilter.MedianFilter())
pixels = numpy.array(image).astype(numpy.float32)
# Split segments
segments = imagehash._find_all_segments(pixels, SEGMENT_THRESHOLD, MIN_SEGMENT_SIZE)
# Change back to RGB
image = image.convert("RGB")
image = image.convert('RGB')
# Colour in segments
for num, segment in enumerate(segments):
for x, y in segment:
Expand Down
68 changes: 35 additions & 33 deletions examples/hashimages.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,56 @@
#!/usr/bin/env python
from __future__ import (absolute_import, division, print_function)
from PIL import Image
from __future__ import absolute_import, division, print_function

import sys

import numpy as np
from PIL import Image

import imagehash

hashfuncs = [
('ahash', imagehash.average_hash),
('phash', imagehash.phash),
('dhash', imagehash.dhash),
('whash-haar', imagehash.whash),
('whash-db4', lambda img: imagehash.whash(img, mode='db4')),
('colorhash', imagehash.colorhash),
('ahash', imagehash.average_hash),
('phash', imagehash.phash),
('dhash', imagehash.dhash),
('whash-haar', imagehash.whash),
('whash-db4', lambda img: imagehash.whash(img, mode='db4')),
('colorhash', imagehash.colorhash),
]


def alpharemover(image):
if image.mode != 'RGBA':
return image
canvas = Image.new('RGBA', image.size, (255,255,255,255))
canvas.paste(image, mask=image)
return canvas.convert('RGB')
if image.mode != 'RGBA':
return image
canvas = Image.new('RGBA', image.size, (255, 255, 255, 255))
canvas.paste(image, mask=image)
return canvas.convert('RGB')


def image_loader(hashfunc, hash_size=8):
def function(path):
image = alpharemover(Image.open(path))
return hashfunc(image)
return function
def function(path):
image = alpharemover(Image.open(path))
return hashfunc(image)
return function


def with_ztransform_preprocess(hashfunc, hash_size=8):
def function(path):
image = alpharemover(Image.open(path))
image = image.convert("L").resize((hash_size, hash_size), Image.ANTIALIAS)
data = image.getdata()
quantiles = np.arange(100)
quantiles_values = np.percentile(data, quantiles)
zdata = (np.interp(data, quantiles_values, quantiles) / 100 * 255).astype(np.uint8)
image.putdata(zdata)
return hashfunc(image)
return function
def function(path):
image = alpharemover(Image.open(path))
image = image.convert('L').resize((hash_size, hash_size), Image.ANTIALIAS)
data = image.getdata()
quantiles = np.arange(100)
quantiles_values = np.percentile(data, quantiles)
zdata = (np.interp(data, quantiles_values, quantiles) / 100 * 255).astype(np.uint8)
image.putdata(zdata)
return hashfunc(image)
return function


hashfuncopeners = [(name, image_loader(func)) for name, func in hashfuncs]
hashfuncopeners += [(name + '-z', with_ztransform_preprocess(func)) for name, func in hashfuncs if name != 'colorhash']

files = sys.argv[1:]
for path in files:
hashes = [str(hashfuncopener(path)) for name, hashfuncopener in hashfuncopeners]
print(path, ' '.join(hashes))
#print(path, colorhash(path))



hashes = [str(hashfuncopener(path)) for name, hashfuncopener in hashfuncopeners]
print(path, ' '.join(hashes))
# print(path, colorhash(path))
113 changes: 58 additions & 55 deletions find_similar_images.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,53 @@
#!/usr/bin/env python
from __future__ import (absolute_import, division, print_function)
from __future__ import absolute_import, division, print_function

from PIL import Image
import six

import imagehash

"""
Demo of hashing
"""
def find_similar_images(userpaths, hashfunc = imagehash.average_hash):
def is_image(filename):
f = filename.lower()
return f.endswith(".png") or f.endswith(".jpg") or \
f.endswith(".jpeg") or f.endswith(".bmp") or \
f.endswith(".gif") or '.jpg' in f or f.endswith(".svg")

image_filenames = []
for userpath in userpaths:
image_filenames += [os.path.join(userpath, path) for path in os.listdir(userpath) if is_image(path)]
images = {}
for img in sorted(image_filenames):
try:
hash = hashfunc(Image.open(img))
except Exception as e:
print('Problem:', e, 'with', img)
continue
if hash in images:
print(img, ' already exists as', ' '.join(images[hash]))
if 'dupPictures' in img:
print('rm -v', img)
images[hash] = images.get(hash, []) + [img]

#for k, img_list in six.iteritems(images):
# if len(img_list) > 1:
# print(" ".join(img_list))


if __name__ == '__main__':
import sys, os
def usage():
sys.stderr.write("""SYNOPSIS: %s [ahash|phash|dhash|...] [<directory>]
def find_similar_images(userpaths, hashfunc=imagehash.average_hash):
def is_image(filename):
f = filename.lower()
return f.endswith('.png') or f.endswith('.jpg') or \
f.endswith('.jpeg') or f.endswith('.bmp') or \
f.endswith('.gif') or '.jpg' in f or f.endswith('.svg')

image_filenames = []
for userpath in userpaths:
image_filenames += [os.path.join(userpath, path) for path in os.listdir(userpath) if is_image(path)]
images = {}
for img in sorted(image_filenames):
try:
hash = hashfunc(Image.open(img))
except Exception as e:
print('Problem:', e, 'with', img)
continue
if hash in images:
print(img, ' already exists as', ' '.join(images[hash]))
if 'dupPictures' in img:
print('rm -v', img)
images[hash] = images.get(hash, []) + [img]

# for k, img_list in six.iteritems(images):
# if len(img_list) > 1:
# print(" ".join(img_list))


if __name__ == '__main__': # noqa: C901
import os
import sys

def usage():
sys.stderr.write("""SYNOPSIS: %s [ahash|phash|dhash|...] [<directory>]
Identifies similar images in the directory.
Method:
Method:
ahash: Average hash
phash: Perceptual hash
dhash: Difference hash
Expand All @@ -54,26 +58,25 @@ def usage():
(C) Johannes Buchner, 2013-2017
""" % sys.argv[0])
sys.exit(1)

hashmethod = sys.argv[1] if len(sys.argv) > 1 else usage()
if hashmethod == 'ahash':
hashfunc = imagehash.average_hash
elif hashmethod == 'phash':
hashfunc = imagehash.phash
elif hashmethod == 'dhash':
hashfunc = imagehash.dhash
elif hashmethod == 'whash-haar':
hashfunc = imagehash.whash
elif hashmethod == 'whash-db4':
hashfunc = lambda img: imagehash.whash(img, mode='db4')
elif hashmethod == 'colorhash':
hashfunc = imagehash.colorhash
elif hashmethod == 'crop-resistant':
hashfunc = imagehash.crop_resistant_hash
else:
usage()
userpaths = sys.argv[2:] if len(sys.argv) > 2 else "."
find_similar_images(userpaths=userpaths, hashfunc=hashfunc)

sys.exit(1)

hashmethod = sys.argv[1] if len(sys.argv) > 1 else usage()
if hashmethod == 'ahash':
hashfunc = imagehash.average_hash
elif hashmethod == 'phash':
hashfunc = imagehash.phash
elif hashmethod == 'dhash':
hashfunc = imagehash.dhash
elif hashmethod == 'whash-haar':
hashfunc = imagehash.whash
elif hashmethod == 'whash-db4':
def hashfunc(img):
return imagehash.whash(img, mode='db4')
elif hashmethod == 'colorhash':
hashfunc = imagehash.colorhash
elif hashmethod == 'crop-resistant':
hashfunc = imagehash.crop_resistant_hash
else:
usage()
userpaths = sys.argv[2:] if len(sys.argv) > 2 else '.'
find_similar_images(userpaths=userpaths, hashfunc=hashfunc)

0 comments on commit 1bf40be

Please sign in to comment.