In [23]:
# Step 1: Importing Necessary Libraries
import os
import shutil
import hashlib
import matplotlib.pyplot as plt
import cv2

# Step 2: Creating Function for generating Hash value of each Image
def generate_image_hash(image_path):
    """Generate SHA-256 hash for the given image file."""
    with open(image_path, 'rb') as f:
        image_bytes = f.read()
    return hashlib.sha256(image_bytes).hexdigest()

# # Step 3: Getting Current Directory (Optional: just to show where you are)
# current_dir = os.getcwd()
# print("Current Working Directory:", current_dir)

# Step 4: Change directory to where your multiple images are located
folder_path =r'C:\Users\Global\Fabric project\WFDD\grey cloth\normal'
os.chdir(folder_path)
print("Changed working directory to:", os.getcwd())

# Step 5: Finding length (number) of the images in that folder
all_files = os.listdir(folder_path)
# Filter for common image formats if needed. Adjust extensions accordingly.
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
images = [f for f in all_files if f.lower().endswith(image_extensions)]
print("Total images in folder:", len(images))

# Step 6: Method for generating Hash values of each image & finding duplicates
hash_dict = {}  # This dictionary will map a hash -> list of file names having that hash

for image_file in images:
    image_hash = generate_image_hash(os.path.join(folder_path, image_file))
    if image_hash in hash_dict:
        hash_dict[image_hash].append(image_file)
    else:
        hash_dict[image_hash] = [image_file]

# Identify duplicate groups (those with more than one file name)
duplicates = {h: files for h, files in hash_dict.items() if len(files) > 1}

if duplicates:
    print("\nFound duplicate images:")
    for h, files in duplicates.items():
        print(f"Hash: {h}\nFiles: {files}\n")
else:
    print("\nNo duplicate images found.")

# Step 7: Visualizing (display one sample duplicate image from each duplicate group)
if duplicates:
    print("Displaying one sample image from each duplicate group:")
    plt.figure(figsize=(12, len(duplicates) * 4))
    for idx, (h, files) in enumerate(duplicates.items()):
        # Read the first image in the duplicate list with cv2 and convert to RGB for display
        sample_image_path = os.path.join(folder_path, files[0])
        img = cv2.imread(sample_image_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.subplot(len(duplicates), 1, idx + 1)
        plt.imshow(img_rgb)
        plt.title(f"Duplicate Group (Hash: {h}) with {len(files)} images")
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Step 8: Remove duplicates by copying only one image per unique hash into a new folder.
# Creating a new folder 'normal1' within the same parent directory.
parent_dir = os.path.dirname(folder_path)  # One level above your current folder
destination = os.path.join(parent_dir, 'normal1')
os.makedirs(destination, exist_ok=True)
print("New folder for filtered images created at:", destination)

# Copy one image per hash into destination (this removes duplicates)
for h, file_list in hash_dict.items():
    # Pick the first file of this hash group
    src_file = os.path.join(folder_path, file_list[0])
    dst_file = os.path.join(destination, file_list[0])
    shutil.copy(src_file, dst_file)

print("Filtered images have been copied to the new folder.")

# Step 9: Verify new folder image count
new_images = [f for f in os.listdir(destination) if f.lower().endswith(image_extensions)]
print("Total unique images in new folder (normal1):", len(new_images))


Changed working directory to: C:\Users\Global\Fabric project\WFDD\grey cloth\normal
Total images in folder: 309

No duplicate images found.
New folder for filtered images created at: C:\Users\Global\Fabric project\WFDD\grey cloth\normal1
Filtered images have been copied to the new folder.
Total unique images in new folder (normal1): 309


In [21]:
import os
import shutil
import hashlib
import matplotlib.pyplot as plt
import cv2

# --- Helper Function: Generate Hash for an Image ---
def generate_image_hash(image_path):
    """Generate SHA-256 hash for the given image file."""
    with open(image_path, 'rb') as f:
        image_bytes = f.read()
    return hashlib.sha256(image_bytes).hexdigest()

# --- Settings ---
# Folder containing defect subfolders
defect_folder =r'C:\Users\Global\Fabric project\WFDD\grey cloth\defect'
# Valid image extensions (adjust as needed)
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')

# New parent folder for filtered defect images (one subfolder per defect type)
parent_dir = os.path.dirname(defect_folder)
destination_parent = os.path.join(parent_dir, 'defect1')
os.makedirs(destination_parent, exist_ok=True)
print("New parent folder for filtered defect images created at:", destination_parent)

# --- Process Each Defect Subfolder ---
defect_types = [d for d in os.listdir(defect_folder) if os.path.isdir(os.path.join(defect_folder, d))]
if not defect_types:
    raise ValueError("No defect subfolders found in the defect folder.")

for defect_type in defect_types:
    print(f"\nProcessing defect type: {defect_type}")
    subfolder_path = os.path.join(defect_folder, defect_type)
    
    # List all valid image files in the defect subfolder
    all_files = os.listdir(subfolder_path)
    images = [f for f in all_files if f.lower().endswith(image_extensions)]
    print(f"Total images in '{defect_type}' folder: {len(images)}")
    
    # Dictionary to map hash -> list of file names
    hash_dict = {}
    for image_file in images:
        image_path = os.path.join(subfolder_path, image_file)
        image_hash = generate_image_hash(image_path)
        hash_dict.setdefault(image_hash, []).append(image_file)
    
    # Identify duplicate groups (hash groups with more than one image)
    duplicates = {h: files for h, files in hash_dict.items() if len(files) > 1}
    if duplicates:
        print("Found duplicate images:")
        for h, files in duplicates.items():
            print(f"Hash: {h}\nFiles: {files}\n")
    else:
        print("No duplicate images found.")

    # --- Visualization: Display one sample image from each duplicate group (if any) ---
    if duplicates:
        print("Displaying one sample image from each duplicate group:")
        plt.figure(figsize=(12, len(duplicates) * 4))
        for idx, (h, files) in enumerate(duplicates.items()):
            sample_image_path = os.path.join(subfolder_path, files[0])
            img = cv2.imread(sample_image_path)
            if img is None:
                continue
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            plt.subplot(len(duplicates), 1, idx + 1)
            plt.imshow(img_rgb)
            plt.title(f"Duplicate Group (Hash: {h}) with {len(files)} images")
            plt.axis('off')
        plt.tight_layout()
        plt.show()
    
    # --- Copy Unique Images: One image per unique hash ---
    # Create corresponding subfolder in destination
    destination = os.path.join(destination_parent, defect_type)
    os.makedirs(destination, exist_ok=True)
    for h, file_list in hash_dict.items():
        # Pick the first file from each group (which removes duplicates)
        src_file = os.path.join(subfolder_path, file_list[0])
        dst_file = os.path.join(destination, file_list[0])
        shutil.copy(src_file, dst_file)
    
    # Verify the new folder count for the defect type
    new_images = [f for f in os.listdir(destination) if f.lower().endswith(image_extensions)]
    print(f"Total unique images in new folder '{defect_type}':", len(new_images))


New parent folder for filtered defect images created at: C:\Users\Global\Fabric project\WFDD\grey cloth\defect1

Processing defect type: contaminated
Total images in 'contaminated' folder: 20
No duplicate images found.
Total unique images in new folder 'contaminated': 20

Processing defect type: flecked
Total images in 'flecked' folder: 14
No duplicate images found.
Total unique images in new folder 'flecked': 14

Processing defect type: line
Total images in 'line' folder: 9
No duplicate images found.
Total unique images in new folder 'line': 9

Processing defect type: string
Total images in 'string' folder: 16
No duplicate images found.
Total unique images in new folder 'string': 16


In [25]:
import os
import cv2

# Specify the folder containing images
folder_path = r'C:\Users\Global\Fabric project\WFDD\grey cloth\normal' # Change as necessary

# Define valid image extensions
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')

# Iterate over all files in the folder
print("Image Sizes in the Folder:")
for filename in os.listdir(folder_path):
    if filename.lower().endswith(image_extensions):
        image_path = os.path.join(folder_path, filename)
        image = cv2.imread(image_path)
        if image is not None:
            # cv2 image shape returns (height, width, channels)
            height, width = image.shape[:2]
            print(f"{filename}: Width = {width} px, Height = {height} px")
        else:
            print(f"Could not read {filename}")


Image Sizes in the Folder:
img1.png: Width = 512 px, Height = 512 px
img10.png: Width = 512 px, Height = 512 px
img100.png: Width = 512 px, Height = 512 px
img101.png: Width = 512 px, Height = 512 px
img102.png: Width = 512 px, Height = 512 px
img103.png: Width = 512 px, Height = 512 px
img104.png: Width = 512 px, Height = 512 px
img105.png: Width = 512 px, Height = 512 px
img106.png: Width = 512 px, Height = 512 px
img107.png: Width = 512 px, Height = 512 px
img108.png: Width = 512 px, Height = 512 px
img109.png: Width = 512 px, Height = 512 px
img11.png: Width = 512 px, Height = 512 px
img110.png: Width = 512 px, Height = 512 px
img111.png: Width = 512 px, Height = 512 px
img112.png: Width = 512 px, Height = 512 px
img113.png: Width = 512 px, Height = 512 px
img114.png: Width = 512 px, Height = 512 px
img115.png: Width = 512 px, Height = 512 px
img116.png: Width = 512 px, Height = 512 px
img117.png: Width = 512 px, Height = 512 px
img118.png: Width = 512 px, Height = 512 px
img119.pn

In [27]:
import os
import cv2

# Specify the parent folder containing subfolders with images (defect1)
folder_parent = r'C:\Users\Global\Fabric project\WFDD\grey cloth\defect'  # Change as necessary

# Define valid image extensions
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')

print("Image Sizes in the Folder:")

# Iterate over each subfolder in folder_parent
for subfolder in os.listdir(folder_parent):
    subfolder_path = os.path.join(folder_parent, subfolder)
    if os.path.isdir(subfolder_path):
        print(f"\nDefect Type: {subfolder}")
        # Iterate over all files in the subfolder
        for filename in os.listdir(subfolder_path):
            if filename.lower().endswith(image_extensions):
                image_path = os.path.join(subfolder_path, filename)
                image = cv2.imread(image_path)
                if image is not None:
                    # cv2 image shape returns (height, width, channels)
                    height, width = image.shape[:2]
                    print(f"{filename}: Width = {width} px, Height = {height} px")
                else:
                    print(f"Could not read {filename}")


Image Sizes in the Folder:

Defect Type: contaminated
001.png: Width = 512 px, Height = 512 px
002.png: Width = 512 px, Height = 512 px
003.png: Width = 512 px, Height = 512 px
004.png: Width = 512 px, Height = 512 px
005.png: Width = 512 px, Height = 512 px
006.png: Width = 512 px, Height = 512 px
007.png: Width = 512 px, Height = 512 px
008.png: Width = 512 px, Height = 512 px
009.png: Width = 512 px, Height = 512 px
010.png: Width = 512 px, Height = 512 px
011.png: Width = 512 px, Height = 512 px
012.png: Width = 512 px, Height = 512 px
013.png: Width = 512 px, Height = 512 px
014.png: Width = 512 px, Height = 512 px
015.png: Width = 512 px, Height = 512 px
016.png: Width = 512 px, Height = 512 px
017.png: Width = 512 px, Height = 512 px
018.png: Width = 512 px, Height = 512 px
019.png: Width = 512 px, Height = 512 px
020.png: Width = 512 px, Height = 512 px

Defect Type: flecked
001.png: Width = 512 px, Height = 512 px
002.png: Width = 512 px, Height = 512 px
003.png: Width = 512 p