In [None]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def download_google_images(folder_path, query, num_images):
    # Create the directory if it doesn't exist
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # Find the highest existing image number
    existing_images = [f for f in os.listdir(folder_path) if f.endswith('.jpg')]
    existing_numbers = [int(os.path.splitext(img)[0]) for img in existing_images]

    if existing_numbers:
        highest_number = max(existing_numbers)
    else:
        highest_number = 0

    # Perform a Google Image search
    url = f"https://unsplash.com/s/photos/{query.lower()}"
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        img_tags = soup.select("img[src^='https://images.unsplash.com/photo']")

        # Download the first 'num_images' images
        for i, img_tag in enumerate(img_tags[:num_images]):
            img_url = img_tag.get("src")

            # Check if the URL starts with "http" to filter out non-image URLs
            if img_url and img_url.startswith("http"):
                img_url = urljoin(url, img_url)  # Ensure absolute URL
                img_data = requests.get(img_url).content

                # Save the images with sequential names starting from the highest existing number
                img_path = os.path.join(folder_path, f"{highest_number + i + 1}.jpg")

                with open(img_path, "wb") as img_file:
                    img_file.write(img_data)

                print(f"Downloaded {highest_number + i + 1}/{highest_number + num_images} images")

        print(f"Downloaded {num_images} images in numeric order to {folder_path}")
    else:
        print("Failed to connect to Google Images")

# Example usage: Download 5 images in numeric order inside the color_input folder
download_google_images("color_input/House", "House", 100)


Downloaded 46/145 images
Downloaded 47/145 images
Downloaded 48/145 images
Downloaded 49/145 images
Downloaded 50/145 images
Downloaded 51/145 images
Downloaded 52/145 images
Downloaded 53/145 images
Downloaded 54/145 images
Downloaded 55/145 images
Downloaded 56/145 images
Downloaded 57/145 images
Downloaded 58/145 images
Downloaded 59/145 images
Downloaded 60/145 images
Downloaded 61/145 images
Downloaded 62/145 images
Downloaded 63/145 images
Downloaded 64/145 images
Downloaded 65/145 images
Downloaded 66/145 images
Downloaded 67/145 images
Downloaded 68/145 images
Downloaded 69/145 images
Downloaded 70/145 images
Downloaded 71/145 images
Downloaded 72/145 images
Downloaded 73/145 images
Downloaded 74/145 images
Downloaded 75/145 images
Downloaded 76/145 images
Downloaded 77/145 images
Downloaded 78/145 images
Downloaded 79/145 images
Downloaded 80/145 images
Downloaded 81/145 images
Downloaded 82/145 images
Downloaded 83/145 images
Downloaded 84/145 images
Downloaded 85/145 images


In [41]:
import os

def rename_images(folder_path):
    # Get a list of all files in the folder
    files = os.listdir(folder_path)

    # Sort the files based on their numeric part
    sorted_files = sorted(files, key=lambda x: int(x.split('.')[0]))

    # Rename the files sequentially
    for i, old_name in enumerate(sorted_files):
        extension = os.path.splitext(old_name)[1]
        new_name = f"{i + 1}{extension}"

        old_path = os.path.join(folder_path, old_name)
        new_path = os.path.join(folder_path, new_name)

        os.rename(old_path, new_path)
        print(f"Renamed: {old_name} -> {new_name}")

# Replace 'path_to_your_folder' with the actual path to your folder
folder_path = 'color_input\Food'
rename_images(folder_path)


Renamed: 1.jpg -> 1.jpg
Renamed: 2.jpg -> 2.jpg
Renamed: 3.jpg -> 3.jpg
Renamed: 4.jpg -> 4.jpg
Renamed: 5.jpg -> 5.jpg
Renamed: 7.jpg -> 6.jpg
Renamed: 8.jpg -> 7.jpg
Renamed: 9.jpg -> 8.jpg
Renamed: 10.jpg -> 9.jpg
Renamed: 12.jpg -> 10.jpg
Renamed: 13.jpg -> 11.jpg
Renamed: 14.jpg -> 12.jpg
Renamed: 15.jpg -> 13.jpg
Renamed: 17.jpg -> 14.jpg
Renamed: 18.jpg -> 15.jpg
Renamed: 19.jpg -> 16.jpg
Renamed: 21.jpg -> 17.jpg
Renamed: 22.jpg -> 18.jpg
Renamed: 23.jpg -> 19.jpg
Renamed: 24.jpg -> 20.jpg
Renamed: 25.jpg -> 21.jpg
Renamed: 26.jpg -> 22.jpg
Renamed: 27.jpg -> 23.jpg
Renamed: 28.jpg -> 24.jpg
Renamed: 29.jpg -> 25.jpg
Renamed: 30.jpg -> 26.jpg
Renamed: 31.jpg -> 27.jpg
Renamed: 32.jpg -> 28.jpg
Renamed: 33.jpg -> 29.jpg
Renamed: 34.jpg -> 30.jpg
Renamed: 35.jpg -> 31.jpg
Renamed: 36.jpg -> 32.jpg
Renamed: 37.jpg -> 33.jpg
Renamed: 38.jpg -> 34.jpg
Renamed: 39.jpg -> 35.jpg
Renamed: 40.jpg -> 36.jpg
Renamed: 41.jpg -> 37.jpg
Renamed: 42.jpg -> 38.jpg
Renamed: 43.jpg -> 39.jpg
Ren

In [21]:
import os
import cv2

def convert_to_black_and_white_folder(input_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Get a list of all subfolders in the input folder
    subfolders = [f.name for f in os.scandir(input_folder) if f.is_dir()]

    # Loop through each subfolder in the input folder
    for subfolder in subfolders:
        input_subfolder = os.path.join(input_folder, subfolder)
        output_subfolder = os.path.join(output_folder, subfolder)

        # Create a subfolder in bw_output for the current category
        if not os.path.exists(output_subfolder):
            os.makedirs(output_subfolder)

        # Get a sorted list of all files in the current subfolder
        files = sorted(os.listdir(input_subfolder), key=lambda x: int(x.split('.')[0]))

        # Loop through each file in the current subfolder
        for i, file in enumerate(files):
            # Construct the full path for the input image
            input_path = os.path.join(input_subfolder, file)

            # Read the image
            img = cv2.imread(input_path)

            # Check if the image is loaded successfully
            if img is None:
                print(f"Failed to load image: {input_path}")
                continue  # Skip to the next iteration

            # Convert the image to grayscale
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            # Construct the full path for the output image
            output_path = os.path.join(output_subfolder, f"{i + 1}.jpg")

            # Save the black and white image
            cv2.imwrite(output_path, gray_img)

            print(f"Converted {file} in {subfolder} to black and white as {i + 1}.jpg")

# Example usage:
# Replace 'color_input' and 'bw_output' with your input and output folder paths
convert_to_black_and_white_folder('color_input', 'bw_output')


Converted 1.jpg in Bike to black and white as 1.jpg
Converted 2.jpg in Bike to black and white as 2.jpg
Converted 3.jpg in Bike to black and white as 3.jpg
Converted 4.jpg in Bike to black and white as 4.jpg
Converted 5.jpg in Bike to black and white as 5.jpg
Converted 6.jpg in Bike to black and white as 6.jpg
Converted 7.jpg in Bike to black and white as 7.jpg
Converted 8.jpg in Bike to black and white as 8.jpg
Converted 9.jpg in Bike to black and white as 9.jpg
Converted 10.jpg in Bike to black and white as 10.jpg
Converted 11.jpg in Bike to black and white as 11.jpg
Converted 12.jpg in Bike to black and white as 12.jpg
Converted 13.jpg in Bike to black and white as 13.jpg
Converted 14.jpg in Bike to black and white as 14.jpg
Converted 15.jpg in Bike to black and white as 15.jpg
Converted 16.jpg in Bike to black and white as 16.jpg
Converted 17.jpg in Bike to black and white as 17.jpg
Converted 18.jpg in Bike to black and white as 18.jpg
Converted 19.jpg in Bike to black and white as

In [22]:
import os

def get_dataset_size(dataset_path):
    if not os.path.exists(dataset_path):
        print(f"Error: The specified path '{dataset_path}' does not exist.")
        return None

    total_size = 0
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            file_path = os.path.join(root, file)
            total_size += os.path.getsize(file_path)

    return total_size / (1024 * 1024)  # Convert to megabytes

dataset_path = 'bw_output'
dataset_size = get_dataset_size(dataset_path)

if dataset_size is not None:
    print(f"Total Size: {dataset_size:.2f} MB")
else:
    print("Unable to determine dataset size.")


Total Size: 191.48 MB


In [23]:
def count_classes(dataset_path):
    return len([d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))])

print(f"Number of Classes: {count_classes(dataset_path)}")


Number of Classes: 10


In [24]:
def count_samples_per_class(dataset_path):
    return {d: len(os.listdir(os.path.join(dataset_path, d))) for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))}

samples_per_class = count_samples_per_class(dataset_path)
for class_name, count in samples_per_class.items():
    print(f"Class: {class_name}, Number of Samples: {count}")


Class: Bike, Number of Samples: 90
Class: Bird, Number of Samples: 77
Class: Car, Number of Samples: 90
Class: Cat, Number of Samples: 94
Class: Dog, Number of Samples: 94
Class: Food, Number of Samples: 86
Class: House, Number of Samples: 90
Class: Lion, Number of Samples: 105
Class: Monkey, Number of Samples: 84
Class: Tiger, Number of Samples: 94


In [28]:
import os

def count_images_recursive(folder_path):
    if not os.path.exists(folder_path):
        print(f"The folder '{folder_path}' does not exist.")
        return

    total_image_count = 0

    # Walk through all subdirectories
    for root, dirs, files in os.walk(folder_path):
        # Count images in the current subdirectory
        image_count = sum(1 for file in files if file.lower().endswith(('.png', '.jpg', '.jpeg')) and not file.startswith('.'))
        total_image_count += image_count

    print(f"Total number of images in '{folder_path}': {total_image_count}")

# Example usage:
# Replace 'bw_output' with your folder path
count_images_recursive('bw_output')


Total number of images in 'bw_output': 904
