In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import time

In [2]:
from rich.progress import track

In [3]:
import requests
import io
import os 
import base64
from urllib.parse import urlparse

In [4]:
class GoogleImageScraper():
    
    def __init__(self, folder_name = "images", search_key="cat", total_images=1, min_resolution=(0, 0), max_resolution=(1920, 1080), max_missed=10):
        #check parameter types
        if (type(total_images)!=int):
            print("[Error] Number of images must be integer value.")
            return
        if not os.path.exists(folder_name):
            print("[INFO] Image path not found. Creating a new folder.")
            os.makedirs(folder_name)

        self.search_key = search_key
        self.total_images = total_images
        self.folder_name = folder_name
        self.url = "https://www.google.com/search?q=%s&source=lnms&tbm=isch&sa=X&ved=2ahUKEwie44_AnqLpAhUhBWMBHUFGD90Q_AUoAXoECBUQAw&biw=1920&bih=947"%('plastic ' + search_key + ' ocean')
        self.min_resolution = min_resolution
        self.max_resolution = max_resolution
        self.max_missed = max_missed
        
      
    def find_image_urls(self):
        """
            This function search and return a list of image urls based on the search key.
        """ 
        image_urls = []
        missed_count = 0 
        
        try: 
            driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
            driver.get(self.url)
            driver.maximize_window()
            time.sleep(1)
            #Click reject button to be able to scrape the images 
            submit = driver.find_element("xpath","//button[@class='VfPpkd-LgbsSe VfPpkd-LgbsSe-OWXEXe-k8QpJ VfPpkd-LgbsSe-OWXEXe-dgl2Hf nCP5yc AjY5Oe DuMIQc LQeN7 Nc7WLe']").click()
            time.sleep(5)
        except Exception:
                print("[INFO] Unable to open Google and pass the terms page")

        for idx in range(1,self.total_images+1):
            try:

                # Looad the larger version of the image 
                imgurl = driver.find_element("xpath",'//*[@id="islrg"]/div[1]/div[%s]/a[1]/div[1]/img'%(str(idx)))
                imgurl.click()
                # Wait for the larger version of the image to load
                time.sleep(10)
                    
                # Get the URL of the larger version of the image              
                larger_img_url = driver.find_element(By.XPATH, '//img[@class="n3VNCb pT0Scc KAlRDb"]').get_attribute('src')
                time.sleep(10)
                print(idx,':', larger_img_url)
                image_urls.append(larger_img_url)
                missed_count = 0
            except Exception:
                missed_count = missed_count + 1
                if (missed_count > self.max_missed):
                    print("[INFO] Maximum missed photos reached, exiting...")
                    break

        driver.quit()
        print("[INFO] %s Image URLS collected successfully"%(len(image_urls)))
        return image_urls
    
    
    def save_images(self,image_urls):
        """
            This function saves the images corresponding to the image scheme.
        """        
        for idx, image in enumerate(image_urls): 
            try:
                # Replace with the name you want to give the saved image
                image_file_name = '%s_%s.jpg'%(self.search_key, idx)  
                image_file_path = os.path.join(self.folder_name, image_file_name)
                data:image/jpeg;base64
                if image.startswith("data:image/jpeg;base64"):
                    # Extract the base64-encoded image data from the string
                    image_data = image.split(',')[1]
                    # Decode the base64 data into bytes
                    image_bytes = base64.b64decode(image_data)
                    # Write the bytes to the file
                    with open(image_file_path, "wb") as f:
                        f.write(image_bytes)
                else:
                    response = requests.get(image)
                    time.sleep(1)
                    with open(image_file_path, "wb") as f:
                        f.write(response.content)
                #print("Image %s saved successfully!"%(idx))
            except Exception:
                print("[INFO] Unable to save image number %s, URL: %s"%(idx, image))
        
        

In [5]:
if __name__ == "__main__":
 
    #Parameters
    folder_name = "images"
    search_keys = ['buoy']
    
#        'bottles',
#         'floats and buoys',
#         'nets and pieces of nets including fishing nets/lines',
#         'tangled nets/cord/rope and string',
#         'bags',
#         'crates/containers/baskets',
#         'plastic cover/packaging',
#         'gloves',
#         'synthetic rope', 
#         'fish boxes',
#         'sheets/industrial packaging']

    total_images = 300                  # Desired number of images
    min_resolution = (0, 0)             # Minimum desired image resolution
    max_resolution = (9999, 9999)       # Maximum desired image resolution
    max_missed = 10                     # Max number of failed images before exit
    
    for key in search_keys: 
        image_scraper = GoogleImageScraper(
            folder_name, key, total_images, min_resolution, max_resolution, max_missed)
        print("%s images processing ...."%(key))
        image_urls = image_scraper.find_image_urls()
        image_scraper.save_images(image_urls)

buoy images processing ....
1 : https://corillamarine.com/wp-content/uploads//2014/09/2400_1.jpg
2 : https://corillamarine.com/wp-content/uploads//2019/11/P1040162.jpg
3 : https://corillamarine.com/wp-content/uploads//2014/09/2400_3.jpg
4 : https://corillamarine.com/wp-content/uploads//2014/09/1750_3.jpg
5 : https://maritime-executive.com/media/images/article/Photos/Technology/spotter-international-seakeepers-society.02e147.jpg
6 : https://thumbs.dreamstime.com/b/orange-large-round-plastic-rescue-buoy-floats-blue-salt-sea-safety-orange-large-round-plastic-rescue-buoy-floats-118101366.jpg
7 : https://www.floatex.com/wp-content/uploads/2018/05/PE-800-2.jpg
8 : https://corillamarine.com/wp-content/uploads//2014/09/1250_3.jpg
9 : https://assets.website-files.com/619fc945f047aae12d956b5a/61b2327b6593fa835fcde1e5_5faaded2dd354ab800b7853a_P6yq1WD7-E74F9gkEVbJbcFyNq_MG3UUDpi25eUCLk0_Oq7Obn6853BTMCqMVlmeHj9aZhjG3V1QBFs4MdLXVFwJYMuDFQAHJIQKpq60GIHBDzOf7CowFVtS8Fb0WmWRzfwU575A.png
10 : https://im

85 : https://www.shutterstock.com/image-vector/striped-red-plastic-buoys-lighter-260nw-688472224.jpg
86 : https://image.made-in-china.com/155f0j00UlkhAuzggZRo/Plastic-PE-Floating-Buoy-for-Ocean-Fish-Farming.jpg
87 : https://jfcmarine.com/wp-content/uploads/2019/05/Beach-Marker-Buoys-hero.jpg
88 : https://m.pneumaticrubberfenders.com/photo/pc35694555-floating_marine_marker_buoys_ocean_channel_water_quality_assurance_systems.jpg
89 : https://static.vecteezy.com/system/resources/thumbnails/020/920/497/original/blue-water-waves-and-ocean-with-buoy-and-ropes-mexico-free-video.jpg
90 : https://www.arctia.fi/media/kuvat/viitat-ja-poijut/cache/vihrea-poiju-vaaka-1200x655.jpg
91 : https://thumbs.dreamstime.com/z/orange-large-round-plastic-air-inflated-life-saving-beacon-buoy-floats-blue-salt-sea-safety-118101365.jpg
92 : https://cdn.shopify.com/s/files/1/0053/5026/0803/products/nimi-projects-buoy-yellow-orange-recycled-plastic_grande.jpg?v=1636415502
93 : https://www.sunhelmmarine.com/wp-conten

160 : https://buoy.eco/wp-content/uploads/2023/02/buoy-black-bottle-recycle_explosion-300x411.jpg
161 : https://ic.boatid.com/boating-marine/pages/marine-buoys/marine-buoys_collage_0.jpg
163 : https://img.nauticexpo.com/images_ne/photo-m2/65695-16320376.jpg
164 : https://i0.wp.com/www.pembrokeshirecoastalforum.org.uk/wp-content/uploads/2017/06/MC-Buoy-3.jpg?fit=3456%2C2353&ssl=1
166 : https://www.xylem.com/siteassets/brand/ysi/product-images/db1750/ysi-db1750-drawing-465px.jpg?width=465&height=465&mode=boxpad&bgcolor=fff
168 : https://cdn.shopify.com/s/files/1/0053/5026/0803/products/nimi-projects-buoy-red-recycled-plastic_grande.jpg?v=1636415793
169 : https://images.squarespace-cdn.com/content/v1/596316063a0411d39d8be117/1670264121237-3Z6M7JEVL95LUFDT007M/IMG_7025.jpeg
170 : https://media.gettyimages.com/id/1248595382/photo/a-photograph-shows-a-police-vessel-as-it-tows-buoys-on-which-are-secured-a-150-kilogramme.jpg?s=612x612&w=gi&k=20&c=TQvKnwZskTBLnc1QUaVg--pF-RTe2UG_m3UG-ILSwuI=
17

244 : https://ichef.bbci.co.uk/news/976/cpsprodpb/FA3A/production/_126285046_de27-2.jpg
245 : https://thegadgetflow.com/wp-content/uploads/2020/10/Buoy-Bottle-Recycled-Water-Bottle-02-1200x1125.jpg
246 : https://image.made-in-china.com/2f0j00kDsfrAWEZbcM/Ocean-Buoys-Used-for-The-Said-Area-.jpg
247 : https://images.pond5.com/floating-plastic-ropes-open-ocean-footage-098546865_iconm.jpeg
248 : https://www.shutterstock.com/image-photo/lonely-buoy-ocean-260nw-598953752.jpg
249 : https://cdn.pixabay.com/photo/2013/10/01/01/51/boje-188938_960_720.jpg
251 : https://static.vecteezy.com/system/resources/previews/018/742/895/non_2x/dolphin-playing-with-plastic-buoy-photo.JPG
[INFO] Maximum missed photos reached, exiting...
[INFO] 225 Image URLS collected successfully
