<img src="https://github.com/PKhosravi-CityTech/LightCnnRad/raw/main/Images/BioMindLogo.png" alt="BioMind AI Lab Logo" width="150" height="150" align="left" style="margin-bottom: 40px;"> **Repository Developed by Pegah Khosravi, Principal Investigator of the BioMind AI Lab**

Welcome to this repository! This repository is a result of collaborative efforts from our dedicated team at the lab. We are committed to advancing the field of biomedical AI and pushing the boundaries of medical data analysis. Your interest and contributions to our work are greatly appreciated. For more information about our lab and ongoing projects, please visit the [BioMind AI Lab website](https://sites.google.com/view/biomind-ai-lab). Thank you for your interest and support!

This code finds duplicate imgages and removes them.

In [None]:
import os
import hashlib
from PIL import Image

def compute_hash(image_path):
    """Compute a hash for an image."""
    with Image.open(image_path) as img:
        img = img.convert('L')  # Ensure image is in grayscale mode
        img_bytes = img.tobytes()
        return hashlib.md5(img_bytes).hexdigest()

def find_and_remove_duplicates(folder_path):
   # Find and remove duplicate images in the specified folder, and report file counts.
    hash_dict = {}
    duplicates = []

    all_files_before = [file for file in os.listdir(folder_path) if file.lower().endswith(('.jpg', '.jpeg', '.png'))]
    num_files_before = len(all_files_before)

    for file in all_files_before:
        file_path = os.path.join(folder_path, file)
        
        try:
            img_hash = compute_hash(file_path)
            if img_hash in hash_dict:
                duplicates.append(file_path)
            else:
                hash_dict[img_hash] = file_path
        except Exception as e:
            print(f"Could not process {file}: {e}")

    # Remove duplicate images
    for dup in duplicates:
        try:
            os.remove(dup)
            print(f"Removed duplicate image: {dup}")
        except Exception:
            print(f"Could not remove {dup}: {e}")

    # Get a list of all files in the folder after removal
    all_files_after = [file for file in os.listdir(folder_path) if file.lower().endswith(('.jpg', '.jpeg', 'png'))]
    num_files_after = len(all_files_after)

    # Report the number of files before and after removal
    print(f"Number of files before removal: {num_files_before}")
    print(f"Number of files after removal: {num_files_after}")

    if not duplicates:
        print("No duplicate images found.")

# Example usage
# Change this to your folder path
folder_path = 'Datasets/Alzheimer MRI/Validation/Very_Mild_Demented'
find_and_remove_duplicates(folder_path)