In [1]:
import os

def rename_images_in_folder(folder_path):
    # Get a list of all files in the folder
    files = os.listdir(folder_path)
    
    # Filter out non-image files (e.g., only allow common image extensions)
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff']
    images = [f for f in files if os.path.splitext(f)[1].lower() in image_extensions]
    
    # Rename images
    for idx, image in enumerate(images, start=1):
        new_name = f'ketchup{idx}{os.path.splitext(image)[1].lower()}'  # Keep original extension
        src = os.path.join(folder_path, image)
        dst = os.path.join(folder_path, new_name)
        
        # Rename the file
        os.rename(src, dst)
        print(f'Renamed: {image} -> {new_name}')

# Example usage:
folder_path = 'ketchup'
rename_images_in_folder(folder_path)


Renamed: image_17.jpeg -> ketchup1.jpeg
Renamed: image_19.jpeg -> ketchup2.jpeg
Renamed: image_21.jpeg -> ketchup3.jpeg
Renamed: image_22.jpeg -> ketchup4.jpeg
Renamed: image_24.jpeg -> ketchup5.jpeg
Renamed: image_26.jpeg -> ketchup6.jpeg
Renamed: image_28.jpeg -> ketchup7.jpeg
Renamed: image_30.jpeg -> ketchup8.jpeg
Renamed: image_32.jpeg -> ketchup9.jpeg
Renamed: image_33.jpeg -> ketchup10.jpeg
Renamed: image_35.jpeg -> ketchup11.jpeg
Renamed: image_36.jpeg -> ketchup12.jpeg
Renamed: image_37.jpeg -> ketchup13.jpeg
Renamed: image_38.jpeg -> ketchup14.jpeg
Renamed: image_40.jpeg -> ketchup15.jpeg
Renamed: image_46.jpeg -> ketchup16.jpeg
Renamed: image_48.jpeg -> ketchup17.jpeg
Renamed: image_50.jpeg -> ketchup18.jpeg
Renamed: image_52.jpeg -> ketchup19.jpeg
Renamed: image_53.jpeg -> ketchup20.jpeg
Renamed: image_55.jpeg -> ketchup21.jpeg
Renamed: image_56.jpeg -> ketchup22.jpeg
Renamed: image_63.jpg -> ketchup23.jpg
Renamed: image_65.jpg -> ketchup24.jpg
Renamed: image_67.jpg -> ketc

In [9]:
! pip install beautifulsoup4 requests


Defaulting to user installation because normal site-packages is not writeable


In [12]:
! pip install selenium webdriver-manager


Defaulting to user installation because normal site-packages is not writeable
Collecting selenium
  Downloading selenium-4.24.0-py3-none-any.whl.metadata (7.1 kB)
Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.26.2-py3-none-any.whl.metadata (8.6 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting websocket-client~=1.8 (from selenium)
  Downloading websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading seleni

In [13]:
import os
import time
import urllib.request
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# List of beverages to scrape
beverages = ["Slice", "Thumbs up", "Limca", "Fanta", "Mountain dew", "Sprite"]

# Create a base directory for storing the images
base_dir = "beverage_images1"
if not os.path.exists(base_dir):
    os.makedirs(base_dir)

# Function to download an image
def download_image(url, folder_name, count):
    image_name = f"{folder_name}_{count}.jpg"
    image_path = os.path.join(base_dir, folder_name, image_name)
    try:
        urllib.request.urlretrieve(url, image_path)
        print(f"Downloaded: {image_name}")
    except Exception as e:
        print(f"Failed to download {image_name}: {e}")

# Function to scrape images for a particular beverage
def scrape_images(beverage_name, driver):
    search_url = f"https://www.google.com/search?q={beverage_name}&tbm=isch"
    
    # Navigate to the Google Image search page
    driver.get(search_url)
    
    # Scroll down to load more images
    for _ in range(5):  # Adjust the range to load more images if needed
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)  # Sleep to allow images to load

    # Extracting image URLs
    img_elements = driver.find_elements(By.CSS_SELECTOR, "img.rg_i")
    
    # Create a folder for each beverage
    folder_name = beverage_name.replace(" ", "_").lower()
    folder_path = os.path.join(base_dir, folder_name)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    count = 1
    for img in img_elements:
        try:
            img_url = img.get_attribute("src")
            if img_url and img_url.startswith("http"):
                download_image(img_url, folder_name, count)
                count += 1
                if count > 110:  # Limiting to 110 images per beverage
                    break
        except Exception as e:
            print(f"Error processing image: {e}")

# Setup Selenium WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Scraping images for each beverage
for beverage in beverages:
    print(f"Scraping images for: {beverage}")
    scrape_images(beverage, driver)

# Quit the driver after scraping
driver.quit()


Scraping images for: Slice
Scraping images for: Thumbs up
Scraping images for: Limca
Scraping images for: Fanta
Scraping images for: Mountain dew


KeyboardInterrupt: 