In [1]:
import hashlib
from PIL import Image
import imagehash
import os
import datetime
import pandas as pd
import numpy as np  # Add this import to resolve the error
import matplotlib.pyplot as plt  # You also need to import matplotlib for plotting
import pprint
from IPython.display import display
import seaborn as sns


In [2]:
def load_images_from_directory(directory_path):
    images = []
    failed_files = []  # To track files that couldn't be loaded
    
    # List all files in the directory
    for filename in os.listdir(directory_path):
        file_path = os.path.join(directory_path, filename)
        
        # Check if the file is an image (based on file extension)
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
            try:
                # Open the image using Pillow
                img = Image.open(file_path)
                img.verify()  # Verify the image file integrity
                images.append(file_path)  # Save the file path instead of the image object
                print(f"Loaded: {filename}")
            except Exception as e:
                failed_files.append((filename, str(e)))
                print(f"Error loading {filename}: {e}")
    
    # Summary
    print("\nSummary:")
    print(f"Total images loaded: {len(images)}")
    print(f"Failed to load: {len(failed_files)}")
    if failed_files:
        print("\nFailed Files:")
        for file, error in failed_files:
            print(f"{file}: {error}")
    
    return images


In [3]:
# Function to compute SHA-256 hash for an image
def compute_cryptographic_hash(image_path):
    hash_sha256 = hashlib.sha256()  # Initialize SHA-256 hash object
    
    try:
        # Open the image file in binary mode
        with open(image_path, 'rb') as img_file:
            # Read the image in chunks to avoid memory issues with large files
            while chunk := img_file.read(8192):
                hash_sha256.update(chunk)  # Update the hash with each chunk of data
        
        return hash_sha256.hexdigest()  # Return the hexadecimal representation of the hash
    
    except Exception as e:
        print(f"Error computing hash for {image_path}: {e}")
        return None


In [4]:
# Directory path where images are stored
directory_path = r"C:\APPU SELVA\temp"

# Load the images from the directory
image_paths = load_images_from_directory(directory_path)


Loaded: DSC_0172.JPG
Loaded: DSC_0769.JPG
Loaded: DSC_0777.JPG

Summary:
Total images loaded: 3
Failed to load: 0


In [5]:
# Compute hash for each image and print
cryptographic_hashes = {}
for image_path in image_paths:
    img_hash = compute_cryptographic_hash(image_path)
    if img_hash:
        cryptographic_hashes[image_path] = img_hash
        print(f"Hash for {image_path}: {img_hash}")

# Example: Display all computed hashes
if cryptographic_hashes:
    print("Image hashes could be computed:")
else:
    print("No images found or hashes could not be computed.")


Hash for C:\APPU SELVA\temp\DSC_0172.JPG: 9e422052fd039a43db88ce1a96759febfa60fee2111a7081b3fcb187c035092d
Hash for C:\APPU SELVA\temp\DSC_0769.JPG: 7623262c32c591f0aa586d17e0205457d2032347414dddf20035b3c216e99e35
Hash for C:\APPU SELVA\temp\DSC_0777.JPG: 7c5c213df376f81da6002acca512972b671f2d7e11e20aae125f470a556fc25b
Image hashes could be computed:


In [6]:
# Function to compute perceptual hash for an image
def compute_perceptual_hash(image):
    try:
        # Convert image to RGB (in case it's not in RGB format)
        image = image.convert('RGB')
        
        # Compute the perceptual hash using average hash method
        perceptual_hash = imagehash.average_hash(image)
        
        return perceptual_hash
    
    except Exception as e:
        print(f"Error computing perceptual hash: {e}")
        return None


In [7]:
# Print perceptual hashes for all images
perceptual_hashes = {}
for image_path in image_paths:
    try:
        # Open the image using Pillow
        with Image.open(image_path) as img:
            # Compute perceptual hash for the image
            perceptual_hash = compute_perceptual_hash(img)
            if perceptual_hash:
                perceptual_hashes[image_path] = perceptual_hash
                print(f"Perceptual hash for {image_path}: {perceptual_hash}")
    except Exception as e:
        print(f"Error opening image {image_path}: {e}")


Perceptual hash for C:\APPU SELVA\temp\DSC_0172.JPG: bb9383818181c1ff
Perceptual hash for C:\APPU SELVA\temp\DSC_0769.JPG: 0723002727272727
Perceptual hash for C:\APPU SELVA\temp\DSC_0777.JPG: 2707818617072727


In [8]:
# Function to create three instances of an image and compute perceptual hashes
def create_image_instances_and_hashes(image, compute_perceptual_hash):
    # Instance 1: Rotate the image by 15 degrees clockwise
    rotation_15degrees_Clockwise = image.rotate(15, resample=Image.NEAREST)
    # Compute perceptual hash for the rotated image
    hash_rotation_15degrees_Clockwise = compute_perceptual_hash(rotation_15degrees_Clockwise)
    
    # Instance 2: Rotate the image by -15 degrees (counter-clockwise)
    rotation_15degrees_CounterClockwise = image.rotate(-15, resample=Image.NEAREST)
    # Compute perceptual hash for the rotated image
    hash_rotation_15degrees_CounterClockwise = compute_perceptual_hash(rotation_15degrees_CounterClockwise)
    
    # Instance 3: Apply a shear transformation with minimal difference
    width, height = image.size
    shear_matrix = (1, 0.05, 0, 0.05, 1, 0)  # Minimal shearing transformation matrix
    Shear_Low_Difference = image.transform((width, height), Image.AFFINE, shear_matrix, resample=Image.NEAREST)
    # Compute perceptual hash for the sheared image
    hash_Shear_Low_Difference = compute_perceptual_hash(Shear_Low_Difference)
    
    return {
        "rotation_15degrees_Clockwise": hash_rotation_15degrees_Clockwise,
        "rotation_15degrees_CounterClockwise": hash_rotation_15degrees_CounterClockwise,
        "Shear_Low_Difference": hash_Shear_Low_Difference
    }


In [9]:
# Create an empty list to store the data for the table
hash_table_data = []

# Loop through each image and compute the perceptual hashes for its instances
for image_path in image_paths:
    with Image.open(image_path) as img:
        # Create the instances and compute perceptual hashes
        image_hashes = create_image_instances_and_hashes(img, compute_perceptual_hash)
        
        # Add the data to the table
        hash_table_data.append({
            "FileName": image_path,
            "Perceptual HashValue_rotation_15degrees_Clockwise": image_hashes["rotation_15degrees_Clockwise"],
            "Perceptual HashValue_rotation_15degrees_CounterClockwise": image_hashes["rotation_15degrees_CounterClockwise"],
            "Perceptual HashValue_Shear_Low_Difference": image_hashes["Shear_Low_Difference"]
        })

# Convert the data into a pandas DataFrame for easy viewing
df = pd.DataFrame(hash_table_data)

# Display the table
df.head(10)


Unnamed: 0,FileName,Perceptual HashValue_rotation_15degrees_Clockwise,Perceptual HashValue_rotation_15degrees_CounterClockwise,Perceptual HashValue_Shear_Low_Difference
0,C:\APPU SELVA\temp\DSC_0172.JPG,3e3383858181c77c,d8ddc3838581e13e,bb9383818181cffc
1,C:\APPU SELVA\temp\DSC_0769.JPG,0e70270737172720,302f0335272f4f0e,272104272f2f2f06
2,C:\APPU SELVA\temp\DSC_0777.JPG,0f2e3307b7971710,10978fb1372f2f0e,272f80a7272f2f04


In [10]:
pip install sqlalchemy


Note: you may need to restart the kernel to use updated packages.


In [11]:
# image_metadata_db.py
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime
from sqlalchemy.orm import declarative_base, sessionmaker

In [12]:
# Initialize the base class for the ORM
Base = declarative_base()

# Define the database schema
class ImageMetadata(Base):
    __tablename__ = 'image_metadata'

    ID = Column(Integer, primary_key=True, nullable=False)
    filename = Column(String, nullable=False)
    cryptographic_hash = Column(String, nullable=False)
    perceptual_hash = Column(String, nullable=False)
    file_location = Column(String, nullable=False)
    file_size = Column(Float, nullable=False)
    image_width = Column(Integer, nullable=False)
    image_height = Column(Integer, nullable=False)
    file_creation_date = Column(DateTime, nullable=False)
    file_extension = Column(String, nullable=False)
    perceptual_hash_rotation_15degrees_cw = Column(String, nullable=False)
    perceptual_hash_rotation_15degrees_ccw = Column(String, nullable=False)
    perceptual_hash_shear_low_difference = Column(String, nullable=False)

# Initialize the database
def initialize_db(db_name):
    engine = create_engine(f'sqlite:///{db_name}')  # SQLite database
    Base.metadata.create_all(engine)
    Session = sessionmaker(bind=engine)
    return Session()


def check_db_creation(db_name):
    # Check if the database file exists
    db_path = os.path.abspath(db_name)
    if os.path.exists(db_path):
        print(f"Database '{db_name}' exists at: {db_path}")
    else:
        print(f"Database '{db_name}' does not exist. It may not have been created yet.")
    return db_path


# Function to add image metadata
def insert_image_metadata(session, image_metadata):
    session.add(image_metadata)
    session.commit()


In [81]:
# Initialize the database session
db_name = "image_metadata.db"
session = initialize_db(db_name)
# Check if the database is created and get its path
db_path = check_db_creation(db_name)

def clear_existing_data(session):
    session.query(ImageMetadata).delete()
    session.commit()

# Clear data before inserting new records
clear_existing_data(session)

# Function to process images and populate the database
def process_images_and_populate_db(session, image_paths, cryptographic_hashes, perceptual_hashes, compute_perceptual_hash):
    
    if not image_paths:
        print("No images found in the specified directory.")
        return

    idx = 1
    for image_path in image_paths:
        try:
            with Image.open(image_path) as img:
                # Retrieve cryptographic hash 
                cryptographic_hash = cryptographic_hashes.get(image_path, compute_cryptographic_hash(image_path))


                # Retrieve perceptual hash 
                perceptual_hash = str(perceptual_hashes.get(image_path, 'NA'))

                print(f"Image Path: {image_path}, Cryptographic Hash: {cryptographic_hash}, Perceptual Hash: {perceptual_hash}")
                
                # Generate transformed perceptual hash values (mock data for now)
                image_hashes = create_image_instances_and_hashes(img, compute_perceptual_hash)
                ph_rot_15deg_cw = str(image_hashes.get("rotation_15degrees_Clockwise", 'NA'))
                ph_rot_15deg_ccw = str(image_hashes.get("rotation_15degrees_CounterClockwise", 'NA'))
                ph_shear_ld = str(image_hashes.get("Shear_Low_Difference", 'NA'))

                # Retrieve file metadata
                file_location = os.path.abspath(image_path)
                file_size = os.path.getsize(image_path)
                width, height = img.size
                creation_date = datetime.datetime.fromtimestamp(os.path.getctime(image_path))
                file_extension = os.path.splitext(image_path)[1].lower()

                # Create a metadata object
                image_metadata = ImageMetadata(
                    filename=os.path.basename(image_path),
                    cryptographic_hash=cryptographic_hash,
                    perceptual_hash=perceptual_hash,
                    file_location=file_location,
                    file_size=file_size,
                    image_width=width,
                    image_height=height,
                    file_creation_date=creation_date,
                    file_extension=file_extension,
                    perceptual_hash_rotation_15degrees_cw=ph_rot_15deg_cw,
                    perceptual_hash_rotation_15degrees_ccw=ph_rot_15deg_ccw,
                    perceptual_hash_shear_low_difference=ph_shear_ld
                )

                # Insert into the database using the imported function
                insert_image_metadata(session, image_metadata)
                print(f"Inserted metadata for {image_path} into database.")
                idx += 1

        except Exception as e:
            print(f"Error processing image {image_path}: {e}")

process_images_and_populate_db(session, image_paths, cryptographic_hashes, perceptual_hashes, compute_perceptual_hash)

# Connect to the SQLite database
db_file = "image_metadata.db"
engine = create_engine(f"sqlite:///{db_file}")

# Reflect the database schema
metadata = MetaData()
metadata.reflect(bind=engine)

# Print available tables
print("Tables in the database:", metadata.tables.keys())

# Access a specific table
table_name = "image_metadata"  # Replace with your table name
if table_name in metadata.tables:
    table = metadata.tables[table_name]
else:
    raise ValueError(f"Table '{table_name}' does not exist in the database.")

# Query the table and print results
with engine.connect() as connection:
    result = connection.execute(table.select())
    for row in result:
        print(row)


Database 'image_metadata.db' exists at: C:\Users\selva\image_metadata.db
Image Path: C:\APPU SELVA\temp\DSC_0172.JPG, Cryptographic Hash: 9e422052fd039a43db88ce1a96759febfa60fee2111a7081b3fcb187c035092d, Perceptual Hash: bb9383818181c1ff
Inserted metadata for C:\APPU SELVA\temp\DSC_0172.JPG into database.
Image Path: C:\APPU SELVA\temp\DSC_0769.JPG, Cryptographic Hash: 7623262c32c591f0aa586d17e0205457d2032347414dddf20035b3c216e99e35, Perceptual Hash: 0723002727272727
Inserted metadata for C:\APPU SELVA\temp\DSC_0769.JPG into database.
Image Path: C:\APPU SELVA\temp\DSC_0777.JPG, Cryptographic Hash: 7c5c213df376f81da6002acca512972b671f2d7e11e20aae125f470a556fc25b, Perceptual Hash: 2707818617072727
Inserted metadata for C:\APPU SELVA\temp\DSC_0777.JPG into database.
Tables in the database: dict_keys(['image_metadata'])
(1, 'DSC_0172.JPG', '9e422052fd039a43db88ce1a96759febfa60fee2111a7081b3fcb187c035092d', 'bb9383818181c1ff', 'C:\\APPU SELVA\\temp\\DSC_0172.JPG', 7457167.0, 6000, 4000, da

In [97]:
import sqlite3 
def export_db_to_excel(db_name, excel_file_name):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_name)
    
    # Query all data from the table
    query = "SELECT * FROM image_metadata"
    df = pd.read_sql_query(query, conn)
    
    # Export to Excel
    excel_file_path = f"{excel_file_name}.xlsx"
    df.to_excel(excel_file_path, index=False)
    conn.close()
    
    print(f"Database exported successfully to {excel_file_path}")
    return excel_file_path

# Export the database to an Excel file
db_name = 'image_metadata.db'
excel_file_name = 'image_metadata_export'
excel_file_path = export_db_to_excel(db_name, excel_file_name)


Database exported successfully to image_metadata_export.xlsx


In [None]:
# # Dictionary to store perceptual hash data
# perceptual_hash_data = {}

# # Loop through images and populate the dictionary
# for image_path in image_paths:
#     try:
#         # Retrieve the cryptographic hash
#         cryptographic_hash = cryptographic_hashes.get(image_path, 'NA')
        
#         # Retrieve the perceptual hash
#         perceptual_hash = str(perceptual_hashes.get(image_path, 'NA'))
        
#         # Retrieve transformed perceptual hashes
#         with Image.open(image_path) as img:
#             image_hashes = create_image_instances_and_hashes(img, compute_perceptual_hash)
#             ph_rot_15deg_cw = str(image_hashes.get("rotation_15degrees_Clockwise", 'N/A'))
#             ph_rot_15deg_ccw = str(image_hashes.get("rotation_15degrees_CounterClockwise", 'N/A'))
#             ph_shear_ld = str(image_hashes.get("Shear_Low_Difference", 'N/A'))
        
#         # Store the data in the dictionary
#         perceptual_hash_data[cryptographic_hash] = {
#             "Perceptual_Hash": perceptual_hash,
#             "Perceptual_Hash_rotation_15degrees_Clockwise": ph_rot_15deg_cw,
#             "Perceptual_Hash_rotation_15degrees_CounterClockwise": ph_rot_15deg_ccw,
#             "Perceptual_Hash_Shear_Low_Difference": ph_shear_ld
#         }
        
#         print(f"Stored perceptual hash data for {image_path}.")
#     except Exception as e:
#         print(f"Error processing image {image_path}: {e}")

# # Display the resulting dictionary
# print("Perceptual Hash Data Dictionary:")
# print(perceptual_hash_data)


In [None]:
# Dictionary to store perceptual hash data
perceptual_hash_data = {}

# Loop through images and populate the dictionary
for image_path in image_paths:
    try:
        # Use the filename as the primary key (you can extract it from the image_path)
        filename = image_path.split("\\")[-1]  # Extracts the filename from the path
        
        # Retrieve the  perceptual hash
        
        perceptual_hash = str(perceptual_hashes.get(image_path, 'NA'))
        
        # Retrieve transformed perceptual hashes
        with Image.open(image_path) as img:
            image_hashes = create_image_instances_and_hashes(img, compute_perceptual_hash)
            ph_rot_15deg_cw = str(image_hashes.get("rotation_15degrees_Clockwise", 'N/A'))
            ph_rot_15deg_ccw = str(image_hashes.get("rotation_15degrees_CounterClockwise", 'N/A'))
            ph_shear_ld = str(image_hashes.get("Shear_Low_Difference", 'N/A'))
        
        # Store the data in the dictionary, using the filename as the key
        perceptual_hash_data[filename] = {
            "Perceptual_Hash": perceptual_hash,
            "Perceptual_Hash_rotation_15degrees_Clockwise": ph_rot_15deg_cw,
            "Perceptual_Hash_rotation_15degrees_CounterClockwise": ph_rot_15deg_ccw,
            "Perceptual_Hash_Shear_Low_Difference": ph_shear_ld
        }
        
        print(f"Stored perceptual hash data for {filename}.")
    except Exception as e:
        print(f"Error processing image {image_path}: {e}")

# Display the resulting dictionary
print("Perceptual Hash Data Dictionary:")
print(perceptual_hash_data)


In [None]:
# Function to calculate Hamming distance
def calculate_hamming_distance(hash1, hash2):
    return hash1 - hash2  # Replace with actual logic for Hamming distance

# Function to compute pairwise Hamming distance for all images
def compute_pairwise_hamming_distances(perceptual_hash_data):
    pairwise_distances = {}

    # Get a list of filenames (keys) in the dictionary
    filenames = list(perceptual_hash_data.keys())
    
    # Iterate through each pair of images
    for i in range(len(filenames)):
        for j in range(i + 1, len(filenames)):
            image_A = filenames[i]
            image_B = filenames[j]
            
            # Extract perceptual hash values for image A and B
            ph_A = perceptual_hash_data[image_A]
            ph_B = perceptual_hash_data[image_B]
            
            # List of perceptual hashes for image A and image B
            ph_A_values = [
                imagehash.hex_to_hash(ph_A["Perceptual_Hash"]),
                imagehash.hex_to_hash(ph_A["Perceptual_Hash_rotation_15degrees_Clockwise"]),
                imagehash.hex_to_hash(ph_A["Perceptual_Hash_rotation_15degrees_CounterClockwise"]),
                imagehash.hex_to_hash(ph_A["Perceptual_Hash_Shear_Low_Difference"])
            ]
            
            ph_B_values = [
                imagehash.hex_to_hash(ph_B["Perceptual_Hash"]),
                imagehash.hex_to_hash(ph_B["Perceptual_Hash_rotation_15degrees_Clockwise"]),
                imagehash.hex_to_hash(ph_B["Perceptual_Hash_rotation_15degrees_CounterClockwise"]),
                imagehash.hex_to_hash(ph_B["Perceptual_Hash_Shear_Low_Difference"])
            ]
            
            # Compute pairwise Hamming distances between A and B's perceptual hashes
            min_distance = float('inf')  # Start with a large value
            
            # Compute all pairwise distances between A and B's perceptual hashes
            for ph_A_value in ph_A_values:
                for ph_B_value in ph_B_values:
                    distance = calculate_hamming_distance(ph_A_value, ph_B_value)
                    min_distance = min(min_distance, distance)
            
            # Store the minimum distance for the current pair of images
            pairwise_distances[(image_A, image_B)] = min_distance

    return pairwise_distances

# Compute pairwise Hamming distances
pairwise_hamming_distances = compute_pairwise_hamming_distances(perceptual_hash_data)

# Display the pairwise Hamming distances
print("Pairwise Hamming Distances:")
for (image_A, image_B), min_distance in pairwise_hamming_distances.items():
    print(f"Minimum Hamming Distance between {image_A} and {image_B}: {min_distance}")


In [None]:
# Function to calculate Hamming distance (same as before)
def calculate_hamming_distance(hash1, hash2):
    return hash1 - hash2  # Replace with actual logic for Hamming distance

# Function to compute pairwise Hamming distance for all images (full matrix)
def compute_pairwise_hamming_distances(perceptual_hash_data):
    pairwise_distances = {}

    # Get a list of filenames (keys) in the dictionary
    filenames = list(perceptual_hash_data.keys())
    
    # Initialize a distance matrix
    distance_matrix = np.zeros((len(filenames), len(filenames)))
    
    # Iterate through each pair of images and compute distances
    for i in range(len(filenames)):
        for j in range(i + 1, len(filenames)):
            image_A = filenames[i]
            image_B = filenames[j]
            
            # Extract perceptual hash values for image A and B
            ph_A = perceptual_hash_data[image_A]
            ph_B = perceptual_hash_data[image_B]
            
            # List of perceptual hashes for image A and image B
            ph_A_values = [
                imagehash.hex_to_hash(ph_A["Perceptual_Hash"]),
                imagehash.hex_to_hash(ph_A["Perceptual_Hash_rotation_15degrees_Clockwise"]),
                imagehash.hex_to_hash(ph_A["Perceptual_Hash_rotation_15degrees_CounterClockwise"]),
                imagehash.hex_to_hash(ph_A["Perceptual_Hash_Shear_Low_Difference"])
            ]
            
            ph_B_values = [
                imagehash.hex_to_hash(ph_B["Perceptual_Hash"]),
                imagehash.hex_to_hash(ph_B["Perceptual_Hash_rotation_15degrees_Clockwise"]),
                imagehash.hex_to_hash(ph_B["Perceptual_Hash_rotation_15degrees_CounterClockwise"]),
                imagehash.hex_to_hash(ph_B["Perceptual_Hash_Shear_Low_Difference"])
            ]
            
            # Compute pairwise Hamming distances between A and B's perceptual hashes
            min_distance = float('inf')  # Start with a large value
            
            # Compute all pairwise distances between A and B's perceptual hashes
            for ph_A_value in ph_A_values:
                for ph_B_value in ph_B_values:
                    distance = calculate_hamming_distance(ph_A_value, ph_B_value)
                    min_distance = min(min_distance, distance)
            
            # Store the minimum distance for the current pair of images in the distance matrix
            distance_matrix[i, j] = min_distance
            distance_matrix[j, i] = min_distance  # Matrix is symmetric

    return filenames, distance_matrix

# Compute pairwise Hamming distances
filenames, pairwise_hamming_distances = compute_pairwise_hamming_distances(perceptual_hash_data)

# Mask the lower triangular part of the matrix (set NaN for lower triangle)
mask_lower = np.tril(np.ones_like(pairwise_hamming_distances, dtype=bool), k=0)  # Mask lower triangle
pairwise_hamming_distances[mask_lower] = np.nan  # Set lower triangle to NaN (no annotation)

# Plot the distance matrix as a heatmap with binary color map
plt.figure(figsize=(10, 8))

# Set the color map to 'binary' and annotate only for the lower triangle
sns.heatmap(pairwise_hamming_distances, 
            xticklabels=filenames, 
            yticklabels=filenames, 
            cmap="binary", 
            annot=True, 
            annot_kws={'size': 10}, 
            cbar=True, 
            mask=mask_lower, 
            square=True)

# Adding labels and title
plt.title("Pairwise Minimum Hamming Distances between Images")
plt.xlabel("Image Filename")
plt.ylabel("Image Filename")

# Show the plot
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()
