In [1]:
!pip install tabulate




In [2]:
import hashlib
import sqlite3
from PIL import Image
import os
import datetime
import pandas as pd
from tabulate import tabulate

# Function to convert image to cryptographic hash
def rgb_to_cryptographic_hash(image):
    # Convert image to RGB format and then to bytes
    rgb_bytes = image.convert('RGB').tobytes()
    # Compute SHA-256 hash
    hash_value = hashlib.sha256(rgb_bytes).hexdigest()
    return hash_value

def initialize_database(db_path):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    # Drop the table if it exists to create it with the correct schema
    cursor.execute('DROP TABLE IF EXISTS image_data')
    # Create the table with the `location` column
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS image_data (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            filename TEXT,
            hash_value TEXT,
            location TEXT,
            size INTEGER,
            created_date TEXT,
            extension TEXT
        )
    ''')
    conn.commit()
    return conn

# Insert image data into the database
def store_image_data(cursor, filename, hash_value, location, size, created_date, extension):
    cursor.execute('''
        INSERT INTO image_data (filename, hash_value, location, size, created_date, extension)
        VALUES (?, ?, ?, ?, ?, ?)
    ''', (filename, hash_value, location, size, created_date, extension))

# Load images, compute cryptographic hashes, and store details in the database
def compute_and_store_hashes(folder_path, db_path):
    # Initialize database and cursor
    conn = initialize_database(db_path)
    cursor = conn.cursor()
    
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)
        
        try:
            # Open the image and calculate the hash
            image = Image.open(img_path)
            hash_value = rgb_to_cryptographic_hash(image)
            
            # Get image size in bytes
            size = os.path.getsize(img_path)
            # Get file extension
            extension = os.path.splitext(filename)[1]
            # Get created date of the file
            created_date = datetime.datetime.fromtimestamp(os.path.getctime(img_path)).strftime('%Y-%m-%d %H:%M:%S')
            
            # Store data in the database
            store_image_data(cursor, filename, hash_value, img_path, size, created_date, extension)
        
        except Exception as e:
            print(f"Failed to process {filename}: {e}")
    
    # Commit and close the database connection
    conn.commit()
    conn.close()

# Retrieve and display image data in a single-line format using pandas
def display_image_data(db_path):
    conn = sqlite3.connect(db_path)
    query = "SELECT id, filename, hash_value, extension, location, size, created_date FROM image_data"
    df = pd.read_sql_query(query, conn)
    
    # Rename columns to match the desired format
    df.columns = ["ID", "Filename", "Hash Value", "Extension", "Location", "Size (bytes)", "Created"]

    # Set display options for a cleaner look in larger tables
    pd.set_option('display.max_columns', None)  # Display all columns
    pd.set_option('display.width', 100)         # Increase width for larger tables
    pd.set_option('display.colheader_justify', 'center')  # Center column headers

    # Print column headers
    print(f"{'ID':<5} | {'Filename':<15} | {'Hash Value':<30} | {'Extension':<8} | {'Location':<30} | {'Size (bytes)':<12} | {'Created'}")

    # Print each row in a single line format
    for index, row in df.iterrows():
        print(f"{row['ID']:<5} | {row['Filename'][:15]:<15} | {row['Hash Value'][:30]:<30} | {row['Extension']:<8} | {row['Location'][:30]:<30} | {row['Size (bytes)']:<12} | {row['Created']}")
    
    conn.close()


# Example usage
folder_path = r"C:\APPU SELVA\Pongal 2024"
db_path = "image_data.db"  # Name of the SQLite database file
compute_and_store_hashes(folder_path, db_path)
display_image_data(db_path)


ID    | Filename        | Hash Value                     | Extension | Location                       | Size (bytes) | Created
1     | 1.JPG           | baaaa85bd08a4f41ce186aa7ba876f | .JPG     | C:\APPU SELVA\Pongal 2024\1.JP | 12646064     | 2024-01-14 20:27:21
2     | 10.JPG          | 7a3064ef3a3896e713df6b15f6dd76 | .JPG     | C:\APPU SELVA\Pongal 2024\10.J | 13271954     | 2024-01-14 20:52:18
3     | 11.JPG          | 21d8ef77e345bee34c9a600a9ca2a7 | .JPG     | C:\APPU SELVA\Pongal 2024\11.J | 13321929     | 2024-01-14 20:52:13
4     | 12.JPG          | 46a76564058f9dfaa20f1c5b5defa8 | .JPG     | C:\APPU SELVA\Pongal 2024\12.J | 13419597     | 2024-01-14 20:52:06
5     | 13.JPG          | 6245bde678279010a49c2603f238f4 | .JPG     | C:\APPU SELVA\Pongal 2024\13.J | 13206084     | 2024-01-14 20:51:56
6     | 14.JPG          | f2cab9e2a10139dbcead258a07befc | .JPG     | C:\APPU SELVA\Pongal 2024\14.J | 13328231     | 2024-01-14 20:51:45
7     | 15.JPG          | 1e0c9baf9bac435b9e5

In [3]:
import hashlib
import os
from PIL import Image
import imagehash

# Function to convert image to cryptographic hash
def rgb_to_cryptographic_hash(image):
    # Convert image to RGB format and then to bytes
    rgb_bytes = image.convert('RGB').tobytes()
    # Compute SHA-256 hash
    hash_value = hashlib.sha256(rgb_bytes).hexdigest()
    return hash_value

def compute_perceptual_hash(image):
    # Generate a perceptual hash using average hashing
    perceptual_hash = imagehash.average_hash(image)
    return perceptual_hash

def compute_and_store_hashes(folder_path):
    # Initialize image_hashes dictionary
    image_hashes = {}

    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)
        
        try:
            # Open the image and calculate the cryptographic hash
            image = Image.open(img_path)
            cryptographic_hash = rgb_to_cryptographic_hash(image)

            # Store the cryptographic hash and filename in the dictionary
            image_hashes[filename] = cryptographic_hash
            
        except Exception as e:
            print(f"Failed to process {filename}: {e}")
    
    return image_hashes  # Return the dictionary containing image hashes

def group_similar_images(folder_path, image_hashes):
    groups = {}
    for filename, cryptographic_hash in image_hashes.items():
        image_path = os.path.join(folder_path, filename)
        image = Image.open(image_path)
        perceptual_hash = compute_perceptual_hash(image)
        
        # Group by perceptual hash; images with similar perceptual hashes are grouped together
        groups.setdefault(str(perceptual_hash), []).append(filename)
    return groups

# Example usage
folder_path = r"C:\APPU SELVA\Pongal 2024"  # Define the folder containing images

# Step 1: Compute and store image hashes (cryptographic)
image_hashes = compute_and_store_hashes(folder_path)

# Step 2: Group similar images based on perceptual hash
similar_image_groups = group_similar_images(folder_path, image_hashes)

# Print grouped similar images
for group, images in similar_image_groups.items():
    print(f"Group: {group}")
    print(f"Images: {images}")


Group: defefae0c2c1c0cc
Images: ['1.JPG']
Group: 860c58f830733074
Images: ['10.JPG', '11.JPG']
Group: ccf0f8fc8c8680c0
Images: ['12.JPG']
Group: 004000026f7e1eff
Images: ['13.JPG']
Group: 004040026f7e1eff
Images: ['14.JPG']
Group: 0040c0023f5e3eff
Images: ['15.JPG']
Group: 0040c0023f1e3eff
Images: ['16.JPG']
Group: 004080023f1e3eff
Images: ['17.JPG']
Group: 008080007e1e3eff
Images: ['18.JPG']
Group: 008080804e7e3c7f
Images: ['19.JPG']
Group: e2e6ccccc999b818
Images: ['2.JPG']
Group: 00006060607efefe
Images: ['20.JPG']
Group: 000060606c7c7c7c
Images: ['21.JPG', '22.JPG']
Group: 0301010202067fff
Images: ['23.JPG']
Group: 00003c7e7c703dff
Images: ['24.JPG']
Group: 0000001f7efedfff
Images: ['25.JPG']
Group: 0000003cdfdfdfff
Images: ['26.JPG']
Group: 0000002c3cdfdfff
Images: ['27.JPG']
Group: 0000002c3c5e7fff
Images: ['28.JPG']
Group: 968216021c381f7f
Images: ['29.jpg']
Group: 0b07e74703809c99
Images: ['3.JPG']
Group: 10100111581fff03
Images: ['30.jpg', '31.jpg']
Group: 1c1818001909ffff
Ima

In [4]:
from itertools import combinations

def calculate_hamming_distance(hash1, hash2):
    # Hamming distance between two imagehash objects
    return hash1 - hash2

# Calculate distances within each group
def compute_similarity_within_groups(groups):
    for group, filenames in groups.items():
        print(f"Group {group}:")
        for file1, file2 in combinations(filenames, 2):
            image1 = Image.open(os.path.join(folder_path, file1))
            image2 = Image.open(os.path.join(folder_path, file2))
            perceptual_hash1 = compute_perceptual_hash(image1)
            perceptual_hash2 = compute_perceptual_hash(image2)
            distance = calculate_hamming_distance(perceptual_hash1, perceptual_hash2)
            print(f"Distance between {file1} and {file2}: {distance}")

# Compute similarity
compute_similarity_within_groups(similar_image_groups)


Group defefae0c2c1c0cc:
Group 860c58f830733074:
Distance between 10.JPG and 11.JPG: 0
Group ccf0f8fc8c8680c0:
Group 004000026f7e1eff:
Group 004040026f7e1eff:
Group 0040c0023f5e3eff:
Group 0040c0023f1e3eff:
Group 004080023f1e3eff:
Group 008080007e1e3eff:
Group 008080804e7e3c7f:
Group e2e6ccccc999b818:
Group 00006060607efefe:
Group 000060606c7c7c7c:
Distance between 21.JPG and 22.JPG: 0
Group 0301010202067fff:
Group 00003c7e7c703dff:
Group 0000001f7efedfff:
Group 0000003cdfdfdfff:
Group 0000002c3cdfdfff:
Group 0000002c3c5e7fff:
Group 968216021c381f7f:
Group 0b07e74703809c99:
Group 10100111581fff03:
Distance between 30.jpg and 31.jpg: 0
Group 1c1818001909ffff:
Group 1818180019091fff:
Group 1818100019097fff:
Group 000000209c1c1fff:
Group 0000002c1c1c3fff:
Group 200d5e9b83d3dbc9:
Group 1c1e1f3e3c3c3800:
Group 3f1f3f3f3e380000:
Group 1f1f3f3f38300000:
Group 000e3f7ffe7c3c00:
Group a080d8e4f0f08000:
