# Script to Download Google Images Automatically

## From Scratch

In [1]:
# First, you need to install sentence-transformers
# pip install -U sentence-transformers

In [2]:
# Import required modules and methods

import os
import time

import requests
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

from sentence_transformers import SentenceTransformer, util

In [3]:
# Build an object from pre-trained model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

def maximum_semantic_similarity(source_word, words_to_compare):

    # The most similar word
    most_similar_word = None
    # Similarity score of the most similar word
    similarity_score = 0

    # Compute embedding for both lists
    embeddings1 = model.encode(source_word, convert_to_tensor=True)
    embeddings2 = model.encode(words_to_compare, convert_to_tensor=True)

    # Compute cosine-similarities
    cosine_scores = util.cos_sim(embeddings1, embeddings2)

    # Store the most similar word to most_similar_word
    # and store its similarity score to similarity_score
    for i in range(len(words_to_compare)):
        if cosine_scores[0][i] > similarity_score:
            most_similar_word = words_to_compare[i]
            similarity_score = round(float(cosine_scores[0][i]), 2)
    
    # Return results
    return most_similar_word, similarity_score

In [4]:
# Keyword to search and collect related images from google image
keyword = "your_keyword"  # please enter your keyword to search in google
number_of_page = 4  # number of pages to download image
download_image_per_page = 100  # number of images to download in each page

#creating a directory to save images
folder_name = keyword
if not os.path.isdir(folder_name):
    os.makedirs(folder_name)

In [5]:
driver = webdriver.Chrome()
driver.get(f"https://images.google.com/")
driver.implicitly_wait(5)

input_box = driver.find_element(By.XPATH, '//*[@id="APjFqb"]')
input_box.send_keys(keyword)
input_box.send_keys(Keys.ENTER)

for i in range(number_of_page):

    related_keywords = {}

    for n in range(1, download_image_per_page + 1):
        if n % 25 == 0:
            if n <= 50:
                anchor_elements = driver.find_elements(
                    By.XPATH, f'//*[@id="islrg"]/div[1]/div[{n}]/div/a'
                )
            elif 51 <= n <= 104:
                anchor_elements = driver.find_elements(
                    By.XPATH, f'//*[@id="islrg"]/div[1]/div[51]/div[{n-50}]/div/a'
                )
            elif 105 <= n <= 208:
                anchor_elements = driver.find_elements(
                    By.XPATH, f'//*[@id="islrg"]/div[1]/div[52]/div[{n-104}]/div/a'
                )
            elif 209 <= n <= 312:
                anchor_elements = driver.find_elements(
                    By.XPATH, f'//*[@id="islrg"]/div[1]/div[53]/div[{n-208}]/div/a'
                )
            elif 313 <= n <= 370:
                anchor_elements = driver.find_elements(
                    By.XPATH, f'//*[@id="islrg"]/div[1]/div[53]/div[{n-312}]/div/a'
                )

            for anchor in anchor_elements:
                related_keywords[anchor.get_attribute("text")] = anchor
        else:
            if n <= 50:
                image_element = driver.find_element(
                    By.XPATH, f'//*[@id="islrg"]/div[1]/div[{n}]/a[1]/div[1]/img'
                )
            elif 51 <= n <= 104:
                image_element = driver.find_element(
                    By.XPATH,
                    f'//*[@id="islrg"]/div[1]/div[51]/div[{n-50}]/a[1]/div[1]/img',
                )
            elif 105 <= n <= 208:
                image_element = driver.find_element(
                    By.XPATH,
                    f'//*[@id="islrg"]/div[1]/div[52]/div[{n-104}]/a[1]/div[1]/img',
                )
            elif 209 <= n <= 312:
                image_element = driver.find_element(
                    By.XPATH,
                    f'//*[@id="islrg"]/div[1]/div[53]/div[{n-208}]/a[1]/div[1]/img',
                )
            elif 313 <= n <= 370:
                image_element = driver.find_element(
                    By.XPATH,
                    f'//*[@id="islrg"]/div[1]/div[54]/div[{n-312}]/a[1]/div[1]/img',
                )

            webdriver.ActionChains(driver).move_to_element(image_element).click(
                image_element
            ).perform()

            time.sleep(3)

            try:
                image = driver.find_element(
                    By.XPATH,
                    '//*[@id="Sva75c"]/div[2]/div[2]/div[2]/div[2]/c-wiz/div/div/div/div/div[3]/div[1]/a/img[1]',
                )
                src = image.get_attribute("src")
                headers = requests.utils.default_headers()
                headers.update(
                    {
                        "User-Agent": "My User Agent 1.0",
                    }
                )
                reponse = requests.get(src, headers=headers)
                if reponse.status_code == 200:
                    with open(
                        os.path.join(
                            keyword, str(n + i * download_image_per_page) + ".jpg"
                        ),
                        "wb",
                    ) as file:
                        file.write(reponse.content)
            except:
                try:
                    image = driver.find_element(
                        By.XPATH,
                        '//*[@id="Sva75c"]/div[2]/div[2]/div[2]/div[2]/c-wiz/div/div/div/div/div[2]/div/a/img',
                    )
                    src = image.get_attribute("src")
                    headers = requests.utils.default_headers()
                    headers.update(
                        {
                            "User-Agent": "My User Agent 1.0",
                        }
                    )
                    reponse = requests.get(src, headers=headers)
                    if reponse.status_code == 200:
                        with open(
                            os.path.join(
                                keyword, str(n + i * download_image_per_page) + ".jpg"
                            ),
                            "wb",
                        ) as file:
                            file.write(reponse.content)
                except:
                    image = driver.find_element(
                        By.XPATH,
                        '//*[@id="Sva75c"]/div[2]/div[2]/div[2]/div[2]/c-wiz/div/div/div/div/div[3]/div[1]/a/img',
                    )
                    screenshot = image.screenshot_as_png
                    with open(
                        os.path.join(
                            keyword, str(n + i * download_image_per_page) + ".png"
                        ),
                        "wb",
                    ) as file:
                        file.write(screenshot)

    most_similar_keyword = maximum_semantic_similarity(
        keyword, list(related_keywords.keys())
    )[0]
    next_page_link_element = related_keywords[most_similar_keyword]
    webdriver.ActionChains(driver).move_to_element(next_page_link_element).click(
        next_page_link_element
    ).perform()

## Using API

In [None]:
# pip install pygoogle-image
# from pygoogle_image import image as pi

# pi.download("firearms", limit=2000)