In [5]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin  # Import urljoin for proper URL handling

def download_google_images(query, num_images):
    # Replace spaces with '+' in the search query
    query = query.replace(" ", "+")

    # Create a directory to save the images
    if not os.path.exists(query):
        os.makedirs(query)

    # Perform a Google Image search
    url = f"https://www.google.com/search?q={query}&tbm=isch"
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        img_tags = soup.find_all("img")

        # Download the first 'num_images' images
        for i, img_tag in enumerate(img_tags[:num_images]):
            img_url = img_tag.get("src")

            # Check if the URL starts with "http" to filter out non-image URLs
            if img_url and img_url.startswith("http"):
                img_url = urljoin(url, img_url)  # Ensure absolute URL
                img_data = requests.get(img_url).content

                with open(f"{query}/{query}{i + 1}.jpg", "wb") as img_file:
                    img_file.write(img_data)

                print(f"Downloaded {i + 1}/{num_images} images")

        print(f"Downloaded {num_images} images for '{query}'")
    else:
        print("Failed to connect to Google Images")

# Example usage: Download 5 lion images
download_google_images("lion", 5)


Downloaded 2/5 images
Downloaded 3/5 images
Downloaded 4/5 images
Downloaded 5/5 images
Downloaded 5 images for 'lion'


In [14]:
import os
import cv2

def convert_to_black_and_white_folder(input_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Get a list of all subfolders in the input folder
    subfolders = [f.name for f in os.scandir(input_folder) if f.is_dir()]

    # Loop through each subfolder in the input folder
    for subfolder in subfolders:
        input_subfolder = os.path.join(input_folder, subfolder)
        output_subfolder = os.path.join(output_folder, subfolder)

        # Create a subfolder in bw_output for the current category
        if not os.path.exists(output_subfolder):
            os.makedirs(output_subfolder)

        # Get a list of all files in the current subfolder
        files = os.listdir(input_subfolder)

        # Loop through each file in the current subfolder
        for i, file in enumerate(files):
            # Construct the full path for the input image
            input_path = os.path.join(input_subfolder, file)

            # Read the image
            img = cv2.imread(input_path)

            # Check if the image is loaded successfully
            if img is None:
                print(f"Failed to load image: {input_path}")
                continue  # Skip to the next iteration

            # Convert the image to grayscale
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            # Construct the full path for the output image
            output_path = os.path.join(output_subfolder, f"{i + 1}.jpg")

            # Save the black and white image
            cv2.imwrite(output_path, gray_img)

            print(f"Converted {file} in {subfolder} to black and white as {i + 1}.jpg")

# Example usage:
# Replace 'color_input' and 'bw_output' with your input and output folder paths
convert_to_black_and_white_folder('color_input', 'bw_output')


Converted 1.jpg in cat to black and white as 1.jpg
Converted 2.jpg in cat to black and white as 2.jpg
Converted 1.jpg in dog to black and white as 1.jpg
Converted 2.jpg in dog to black and white as 2.jpg
Converted 1.jpg in lion to black and white as 1.jpg
Converted 2.jpg in lion to black and white as 2.jpg
Converted 3.jpg in lion to black and white as 3.jpg
