In [2]:
from PIL import Image
import imagehash
import os
import itertools
import numpy as np

image_folder = "extracted_images"

# Step 1: Compute hashes for each author
author_hashes = {}

for author in os.listdir(image_folder):
    author_path = os.path.join(image_folder, author)
    if not os.path.isdir(author_path):
        continue

    hashes = []
    for f in os.listdir(author_path):
        if f.lower().endswith(('png','jpg','jpeg')):
            img_path = os.path.join(author_path, f)
            img = Image.open(img_path)
            h = imagehash.phash(img)
            hashes.append(h)
    if hashes:
        author_hashes[author] = hashes

# Step 2: Compute similarity between authors
def author_similarity(hashes1, hashes2, threshold=5):
    """
    Compute similarity between two authors.
    Returns fraction of similar image pairs.
    """
    count = 0
    total = len(hashes1) * len(hashes2)
    for h1 in hashes1:
        for h2 in hashes2:
            if h1 - h2 <= threshold:
                count += 1
    return count / total if total > 0 else 0

# Step 3: Build similarity matrix
authors = list(author_hashes.keys())
similarity_matrix = np.zeros((len(authors), len(authors)))

for i, a1 in enumerate(authors):
    for j, a2 in enumerate(authors):
        if j >= i:  # matrix is symmetric
            sim = author_similarity(author_hashes[a1], author_hashes[a2])
            similarity_matrix[i, j] = sim
            similarity_matrix[j, i] = sim

# Step 4: Display results
print("Author similarity matrix (fraction of similar images):")
print(authors)
print(similarity_matrix)


Author similarity matrix (fraction of similar images):
['Amit Saxena', 'Amita Jain', 'Animesh Chaturvedi', 'Ankita Jain', 'Arun Chauhan', 'Aruna Malapati', 'Aruna Tiwari', 'Barsha Mitra', 'Bhanukiran Perabathini', 'Bharghava Rajaram', 'Deepak K T', 'Devendra K Tayal', 'Dilip Singh Sisodia', 'dipanjan roy', 'Dipti Mishra', 'Dr. Ashish Jain', 'Dr. Shikha Mehta', 'Dr.Manpreet Kaur', 'Dr.Rohit Beniwal', 'Dr.Ruchi Mittal', 'esha baidya kayal', 'Geeta Rani', 'Himanee Bansal', 'Himanshu Mittal', 'J. Balasubramaniam', 'Jagdish Bansal', 'Jayasri D', 'Jian Wang', 'K.V. Sambasivarao', 'Kastuv Nag', 'Khaldoon Dhou', 'Krishna Asawa', 'Mala Saraswat', 'Manju_JaypeeTech', 'Manoranjan Mohanty', 'Minni Jain', 'Mukesh Prasad', 'Navneet Pratap Singh', 'Nikhil Tripathi', 'Nishchal K. Verma', 'Om Prakash Patel', 'OmPrakash Kaiwartya', 'Pabitra Mitra', 'Payal Khurana Batra', 'Pinaki Chakraborty', 'Prakash Chandra Sharma', 'Prof. B Subudhi', 'Rama Murthy Garimella', 'Ramakrishan Maheshwari', 'Ramalinga Swamy