In [1]:
import time
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from PIL import Image
from io import BytesIO
import numpy as np

def fetch_image_urls(query, max_links_to_fetch, wd, sleep_between_interactions=1):
    search_url = f"https://www.google.com/search?tbm=isch&q={query}"
    
    wd.get(search_url)
    
    image_urls = set()
    image_count = 0
    results_start = 0

    while image_count < max_links_to_fetch:
        thumbnail_results = wd.find_elements(By.CSS_SELECTOR, "img.Q4LuWd")
        number_results = len(thumbnail_results)

        for img in thumbnail_results[results_start:number_results]:
            try:
                img.click()
                time.sleep(sleep_between_interactions)
            except Exception:
                continue

            images = wd.find_elements(By.CSS_SELECTOR, "img.n3VNCb")
            for image in images:
                if image.get_attribute('src') and 'http' in image.get_attribute('src'):
                    image_urls.add(image.get_attribute('src'))

            image_count = len(image_urls)

            if len(image_urls) >= max_links_to_fetch:
                break
        else:
            load_more_button = wd.find_element(By.CSS_SELECTOR, ".mye4qd")
            if load_more_button:
                wd.execute_script("document.querySelector('.mye4qd').click();")

        results_start = len(thumbnail_results)

    return list(image_urls)

def download_image(url):
    response = requests.get(url)
    response.raise_for_status()
    return Image.open(BytesIO(response.content))

def score_image(image):
    grayscale = image.convert("L")
    np_image = np.array(grayscale)
    
    brightness = np.mean(np_image)
    sharpness = np.var(np_image)
    
    score = brightness + sharpness
    return score

def main():
    DRIVER_PATH = 'path/to/chromedriver'  # Change this to the path of your chromedriver
    query = "puppies"
    max_links_to_fetch = 5

    with webdriver.Chrome(executable_path=DRIVER_PATH) as wd:
        image_urls = fetch_image_urls(query, max_links_to_fetch, wd)
        
    for idx, url in enumerate(image_urls):
        try:
            image = download_image(url)
            score = score_image(image)
            print(f"Image {idx + 1}: URL = {url}, Score = {score}")
            
            image.save(f"image_{idx + 1}.jpg")
        except Exception as e:
            print(f"Failed to process image from {url}: {e}")

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'selenium'

In [2]:
!pip install selenium pillow


Collecting selenium
  Using cached selenium-4.21.0-py3-none-any.whl.metadata (6.9 kB)
Collecting pillow
  Using cached pillow-10.3.0-cp311-cp311-win_amd64.whl.metadata (9.4 kB)
Collecting trio~=0.17 (from selenium)
  Using cached trio-0.25.1-py3-none-any.whl.metadata (8.7 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Using cached trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Using cached sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Using cached outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting cffi>=1.14 (from trio~=0.17->selenium)
  Using cached cffi-1.16.0-cp311-cp311-win_amd64.whl.metadata (1.5 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Using cached wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Collecting pysocks!=1.5.7,<2.0,>=1.5.6 (from urllib3[socks]<3,>=1.26->selenium)
  Using cached PySocks-1