# Implementation of Image Web Scraping using Selenium Python
# Step1. Import all required libraries

In [1]:
#!pip install webdriver_manager
#!pip install pillow


import os
import selenium
from selenium import webdriver
import time
from PIL import Image
import io
import requests
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import ElementClickInterceptedException


# Step2. Install Chrome Driver

In [2]:
driver = webdriver.Chrome(ChromeDriverManager().install())

[WDM] - Current google-chrome version is 89.0.4389






[WDM] - Get LATEST driver version for 89.0.4389
[WDM] - There is no [win32] chromedriver for browser 89.0.4389 in cache
[WDM] - Get LATEST driver version for 89.0.4389
[WDM] - Trying to download new driver from https://chromedriver.storage.googleapis.com/89.0.4389.23/chromedriver_win32.zip
[WDM] - Driver has been saved in cache [C:\Users\HP\.wdm\drivers\chromedriver\win32\89.0.4389.23]


# Step3. Specify Search url

In [3]:
search_url = "https://www.google.com/search?q={q}&tbm=isch&tbs=sur%3Afc&hl=en&ved=0CAIQpwVqFwoTCKCa1c6s4-oCFQAAAAAdAAAAABAC&biw=1251&bih=568"
driver.get(search_url.format(q='Car'))


# Step4. Scroll to the end of the page

In [4]:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(5)#sleep_between_interactions  


# Step5. Locate the images to be scraped from the page

In [5]:
imgResults = driver.find_elements_by_xpath("//img[contains(@class,'Q4LuWd')]")
totalResults=len(imgResults)
totalResults

100

# Step6. Extract corresponding link of each Image

In [6]:
# Click on each Image to extract its corresponding link to download
img_urls = set()
for i in  range(len(imgResults)):
    img=imgResults[i]
    try:
        img.click()
        time.sleep(0.9)
        actual_images = driver.find_elements_by_css_selector('img.n3VNCb')
        for actual_image in actual_images:
            if actual_image.get_attribute('src') and 'https' in actual_image.get_attribute('src'):
                img_urls.add(actual_image.get_attribute('src'))
    except ElementClickInterceptedException or ElementNotInteractableException as err:
        print(err)

Message: element click intercepted: Element is not clickable at point (261, 657)
  (Session info: chrome=89.0.4389.82)



# Step7. Download & save each image in Destination directory

In [7]:
os.chdir('E:/practice/Python/Web_Scrapping/Dataset1')
baseDir=os.getcwd()

for i, url in enumerate(img_urls):
    file_name = f"{i:150}.jpg"    
    try:
        image_content = requests.get(url).content

    except Exception as e:
        print(f"ERROR - COULD NOT DOWNLOAD {url} - {e}")

    try:
        image_file = io.BytesIO(image_content)
        image = Image.open(image_file).convert('RGB')
        
        file_path = os.path.join(baseDir, file_name)
        
        with open(file_path, 'wb') as f:
            image.save(f, "JPEG", quality=85)
        print(f"SAVED - {url} - AT: {file_path}")
    except Exception as e:
        print(f"ERROR - COULD NOT SAVE {url} - {e}")

SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQtUasZg_xXec2B5NkNy6qdmdZh1ssdxyBWDQ&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                     0.jpg
SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRavyM0JR1sM2j8rXHL-_taqo9_hS-wBOxa7Q&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                     1.jpg
SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR3Mo2Q-UBmHeLTpQFwYKWlsuI6tKRbnTFAiA&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                     2.jpg
SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSaFAy

SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQfY-GQ5ARqPTmB3Ia8OmekL81LfHhcBZD37Q&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                    27.jpg
SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTOlkAh6prTwC9MEKtZJbinC6Ce_4zI5UHOkw&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                    28.jpg
SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQgp1QN-QeG7ipMLuj6m_C8vO6chQ4m6eciew&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                    29.jpg
ERROR - COULD NOT SAVE https://upload.wikimedia.org/wikipedia/commo

SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQFt-ilMvCXERqMfE83qDj4y1APRJhjcebLsw&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                    54.jpg
SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS06L-PQmjQ1Q0CKqFHcDtKzF9dAM5GT3FNdA&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                    55.jpg
SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTwN6udLpGpLU9RR4usrAfevQgfkxatMRsnAg&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                    56.jpg
SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcStCJz

SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSgjXR8MH1fx3DWVau4f4cV9tlKPcnjDR2m0g&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                    81.jpg
SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQzsPj5xhh1TLBZ0ktS66BWe2saYiO8eKYgBw&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                    82.jpg
SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSe48Ga_hMR0Ezmo5H2ooD88l4KMyutBPeENQ&usqp=CAU - AT: E:\practice\Python\Web_Scrapping\Dataset1\                                                                                                                                                    83.jpg
SAVED - https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTBx4p