In [None]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt

In [None]:
DataPath = 'Data'

In [None]:
OutputPath = 'Output'
os.makedirs(OutputPath, exist_ok=True)

In [None]:
# Loading all the images from input directory
images = [cv2.imread(os.path.join(DataPath, f)) for f in os.listdir(DataPath) if f.endswith('.jpg')]

In [None]:
depth_to_print = 3
curvature_threshold = 0.015
Curve_comparison = 4

# Function to check if a quadrilateral is a rectangle by verifying the angles
def is_rectangle(approx):
    if len(approx) != 4:
        return False
    
    def angle(pt1, pt2, pt3):
        vec1 = pt1 - pt2
        vec2 = pt3 - pt2
        dot_product = np.dot(vec1, vec2)
        norm1 = np.linalg.norm(vec1)
        norm2 = np.linalg.norm(vec2)
        cos_angle = dot_product / (norm1 * norm2)
        angle_deg = np.degrees(np.arccos(np.clip(cos_angle, -1.0, 1.0)))
        return angle_deg
    
    angles = []
    for i in range(4):
        pt1 = approx[i][0]
        pt2 = approx[(i+1) % 4][0]
        pt3 = approx[(i+2) % 4][0]
        angles.append(angle(pt1, pt2, pt3))
    
    # Check if all angles are close to 90 degrees (within a small tolerance)
    return all(85 <= a <= 95 for a in angles)

# Find contour of the image up to 4 level hierarchy
for i, img in enumerate(images):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(gray, 127, 255, 0)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    img_contour = img

    for j in range(len(contours)):
        level = 0
        parent = hierarchy[0][j][3]
        
        # Traverse up the hierarchy to calculate the depth of the contour
        while parent != -1:
            level += 1
            parent = hierarchy[0][parent][3]
        
        # If contour is deeper than allowed depth, ignore it
        if level > depth_to_print:
            continue
        
        # Approximate the contour to see if it's curved
        epsilon = curvature_threshold * cv2.arcLength(contours[j], True)
        approx = cv2.approxPolyDP(contours[j], epsilon, True)
        
        # Ignore highly curved contours (if the approximation has too many points)
        if len(approx) > Curve_comparison:  
            continue
        
        # Check if the contour is a rectangle
        if is_rectangle(approx):
            # Draw the contour if it's a rectangle
            cv2.drawContours(img_contour, [approx], -1, (0, 0, 255), 8)

            copytest = img_contour.copy()

            # Draw the bounding box around the rectangle
            x, y, w, h = cv2.boundingRect(approx)
            cv2.rectangle(copytest, (x, y), (x+w, y+h), (0, 255, 0), 3)

            # croping the image to only save the rectangle
            crop_img = copytest[y:y+h, x:x+w]

            # Save the cropped image
            cv2.imwrite(os.path.join(OutputPath, f'{i}_{j}.jpg'), crop_img)


In [None]:
# Display the images
for i, img in enumerate(images):
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.axis('off')

In [None]:
import cv2

# Load the image
img = cv2.imread('/teamspace/studios/this_studio/Output/13_812.jpg')

# Get the dimensions of the image (height, width, channels)
height, width, _ = img.shape

# Calculate the area (in terms of pixels)
area = height * width

print(f"The area of the image is: {area} pixels")


In [None]:
import os
import cv2

def delete_small_images(directory, area_threshold_lower, area_threshold_upper):
    # Traverse the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            
            # Check if the file is an image (you can extend this list for other formats)
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.gif')):
                try:
                    # Load the image
                    img = cv2.imread(file_path)
                    
                    # Ensure the image was loaded properly
                    if img is not None:
                        # Get image dimensions
                        height, width = img.shape[:2]
                        # Calculate the area
                        area = height * width

                        # Check if the area is less than the threshold
                        if area < area_threshold_lower or area > area_threshold_upper:
                            print(f"Deleting {file_path} (area: {area} pixels)")
                            os.remove(file_path)  # Delete the file
                    else:
                        print(f"Unable to load image: {file_path}")
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

def delete_images(directory, area_threshold_lower, area_threshold_upper):
    # Traverse the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            
            # Check if the file is an image (you can extend this list for other formats)
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.gif')):
                try:
                    # Load the image
                    img = cv2.imread(file_path)
                    
                    # Ensure the image was loaded properly
                    if img is not None:
                        # Get image dimensions
                        height, width = img.shape[:2]
                        # Calculate the area
                        area = height * width

                        # Check if the area is less than the threshold
                        if area > area_threshold_lower and area < area_threshold_upper:
                            print(f"Deleting {file_path} (area: {area} pixels)")
                            os.remove(file_path)  # Delete the file
                    else:
                        print(f"Unable to load image: {file_path}")
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

# Define the directory and area threshold (example: 5000 pixels)
directory = 'Output'
area_threshold_lower = 30
area_threshold_upper = 730000
# Call the function
delete_small_images(directory, area_threshold_lower,area_threshold_upper)


# print("Deleting numbered images")

area_threshold_lower = 200000 
area_threshold_upper = 260600 
# Call the function
delete_images(directory, area_threshold_lower,area_threshold_upper)


In [None]:
import cv2
import pytesseract
import re

# Function to check if an image contains a number using OCR
def contains_number(image_path):
    # Load the image
    img = cv2.imread(image_path)

    # Convert the image to grayscale for better OCR results
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Use pytesseract to extract text from the image
    text = pytesseract.image_to_string(gray)

    # Check if there is any number in the extracted text using regex
    if re.search(r'\d', text):
        print(f"Number found in image: {image_path} File Deleted")
        os.remove(image_path)
        return True
    else:
        return False

# Example usage
image_path = 'Output'

for file in os.listdir(image_path):
    if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.gif')):
        contains_number(os.path.join(image_path, file))


In [None]:
import os
import re
from collections import defaultdict

# Function to extract both parts of the filename (before and after the underscore)
def extract_numbers(filename):
    # Use regex to extract the numbers before and after the underscore
    match = re.match(r'(\d+)[_.](\d+)', filename)
    if match:
        return int(match.group(1)), int(match.group(2))  # Return tuple of two numbers
    return None, None

# Function to group images based on the first number and the difference in the second number
def group_images(directory, max_difference=12, max_group_size=4):
    grouped_images = defaultdict(list)
    
    # Get list of all jpg files in the directory
    images = [f for f in os.listdir(directory) if f.endswith('.jpg')]

    # Create a list of tuples (first number, second number, filename)
    image_data = [(extract_numbers(img)[0], extract_numbers(img)[1], img) for img in images]
    
    # Sort by the first number and then the second number
    image_data.sort(key=lambda x: (x[0], x[1]))
    
    current_group = []
    previous_first, previous_second = None, None
    group_id = 0

    for first_num, second_num, img in image_data:
        if (previous_first is None 
            or first_num != previous_first 
            or (second_num - previous_second <= max_difference and len(current_group) < max_group_size)):
            current_group.append(img)
        else:
            grouped_images[group_id] = current_group
            current_group = [img]
            group_id += 1

        # Check if the current group has reached the max group size
        if len(current_group) == max_group_size:
            grouped_images[group_id] = current_group
            current_group = []
            group_id += 1

        previous_first, previous_second = first_num, second_num

    # Add the last group if it's not empty
    if current_group:
        grouped_images[group_id] = current_group

    return grouped_images

# Directory where the images are stored
directory = "Output"

# Group the images and print the result
grouped_images = group_images(directory)

# Print grouped images with their paths
for group_id, images in grouped_images.items():
    print(f"Group {group_id}: {', '.join(images)}")

In [None]:
# converting the grouped images to a list
grouped_images_list = list(grouped_images.values())

for i in grouped_images_list:
    print(i)

In [None]:
Single_Image = []
Multi_Image = []

for i in grouped_images_list:
    if len(i) == 0:
        continue
    if len(i) == 1:
        Single_Image.append(i)
    else:
        Multi_Image.append(i)

In [None]:
print("Single Image")
for i in Single_Image:
    print(i)

In [None]:
Grouping_Image = {}

def LeftMost(num):
    while num >= 10:
        num //= 10
    return num

# Assuming extract_numbers is defined and returns two values
for image in Single_Image:
    L1, R1 = extract_numbers(image[0])
    left_most_r1 = LeftMost(R1)

    key = (L1, left_most_r1)  # Use a tuple as the key for grouping

    # Append the image to the corresponding group in the dictionary
    if key not in Grouping_Image:
        Grouping_Image[key] = []
    Grouping_Image[key].append(image[0])

# Convert dictionary values to a list of lists
Grouping_Image = list(Grouping_Image.values())


In [None]:
for i in Grouping_Image:
    print(i)

In [None]:
Grouping_Image.extend(Multi_Image)

In [None]:
# convert to a dataframe
import pandas as pd

df = pd.DataFrame(Grouping_Image)

print(df)

In [None]:
# creating a new directory to store the grouped images
Grouped_Output = 'Grouped_Output'

for i in range(len(df)):
    os.makedirs(os.path.join(Grouped_Output, f'Group_{i}'), exist_ok=True)

    for j in range(len(df.columns)):
        if pd.notna(df.iloc[i, j]):
            img = cv2.imread(os.path.join('Output', df.iloc[i, j]))
            cv2.imwrite(os.path.join(Grouped_Output, f'Group_{i}', df.iloc[i, j]), img)

In [111]:
# go to every directory and then enter every sub direcotry and routate the images by 90 degree clockwise in total each folder should have 4 images
import os
from PIL import Image
import glob

def rotate_images_in_directory(directory):
    # Get all image files in the directory
    image_files = glob.glob(os.path.join(directory, '*.jpg'))
    # Get the count of images
    image_count = len(image_files)

    # If there are less than 4 images, rotate and duplicate to make 4
    if image_count < 4 and image_count > 0:  # Check if image_files is not empty
        print(f"Rotating images in {directory}...")
        for i in range(4 - image_count):
            # Rotate the first image 90 degrees clockwise
            original_image = Image.open(image_files[0])
            rotated_image = original_image.rotate(-90, expand=True)

            # Generate a new file name for the rotated image
            new_image_name = f'rotated_{i + 1}.jpg'
            rotated_image.save(os.path.join(directory, new_image_name))

def traverse_and_process_images(root_directory):
    # Walk through all directories and subdirectories
    for dirpath, dirnames, filenames in os.walk(root_directory):
        rotate_images_in_directory(dirpath)

# Set your root directory here
root_directory = 'Grouped_Output'
traverse_and_process_images(root_directory)


Rotating images in Grouped_Output/Group_111...
Rotating images in Grouped_Output/Group_43...
Rotating images in Grouped_Output/Group_49...
Rotating images in Grouped_Output/Group_80...
Rotating images in Grouped_Output/Group_117...
Rotating images in Grouped_Output/Group_112...
Rotating images in Grouped_Output/Group_118...
Rotating images in Grouped_Output/Group_24...
Rotating images in Grouped_Output/Group_1...
Rotating images in Grouped_Output/Group_135...
Rotating images in Grouped_Output/Group_92...
Rotating images in Grouped_Output/Group_107...
Rotating images in Grouped_Output/Group_34...
Rotating images in Grouped_Output/Group_62...
Rotating images in Grouped_Output/Group_23...
Rotating images in Grouped_Output/Group_146...
Rotating images in Grouped_Output/Group_93...
Rotating images in Grouped_Output/Group_14...
Rotating images in Grouped_Output/Group_82...
Rotating images in Grouped_Output/Group_143...
Rotating images in Grouped_Output/Group_130...
Rotating images in Grouped

Rotating images in Grouped_Output/Group_77...
Rotating images in Grouped_Output/Group_52...
Rotating images in Grouped_Output/Group_55...
Rotating images in Grouped_Output/Group_110...
Rotating images in Grouped_Output/Group_119...
Rotating images in Grouped_Output/Group_18...
Rotating images in Grouped_Output/Group_108...
Rotating images in Grouped_Output/Group_140...
Rotating images in Grouped_Output/Group_71...
Rotating images in Grouped_Output/Group_121...
Rotating images in Grouped_Output/Group_116...
Rotating images in Grouped_Output/Group_9...
Rotating images in Grouped_Output/Group_13...
Rotating images in Grouped_Output/Group_56...
Rotating images in Grouped_Output/Group_6...
Rotating images in Grouped_Output/Group_79...
Rotating images in Grouped_Output/Group_5...
Rotating images in Grouped_Output/Group_35...
Rotating images in Grouped_Output/Group_51...
Rotating images in Grouped_Output/Group_78...
Rotating images in Grouped_Output/Group_7...
Rotating images in Grouped_Outpu