In [4]:
import json
import urllib.request
from serpapi import GoogleSearch
import socket

def serpapi_get_google_images(queries=[], path="", verbose=True, max_images=300):
    '''
    Takes in list of queries and downloads google search images of those queries using SerpAPI. 
    Downloads to user-designated folder. 
    For my personal case, path should be "C:/Users/Polar/Downloads/AI_Camp/images/"
    max_images is the maximum number of images scraped for each query.
    '''
    image_results = []
    
    for query_no, query in enumerate(queries, start=1):
        params = {
            "q": query,
            "tbm": "isch",
            "hl": "en",
            "gl": "us",
            "api_key": "44fd98696428bdba7e0402bbab4034277994e2430434ac216657decf1eb7f743",
            "num":"100",
            "ijn": 0                        
        }

        search = GoogleSearch(params)         # where data extraction happens

        images_is_present = True
        while images_is_present:
            if len(image_results) >= query_no * max_images:  
                images_is_present = False   
                break                         # Break after collecting max_images images
            
            results = search.get_dict()       # JSON -> Python dictionary

            # checks for "Google hasn't returned any results for this query."
            if "error" not in results:
                for index, image in enumerate(results["images_results"]):
                    if image["original"] not in image_results:
                        image_results.append(image["original"])
                        if len(image_results) == query_no * max_images:
                            continue

                # update to the next page
                params["ijn"] += 1
            else:  
                if verbose: 
                    print(results["error"])
                images_is_present = False
    
    # -----------------------
    # Downloading images

    for index, image in enumerate(image_results, start=1):
        if verbose:
            print(f"Downloading image number {index}...")
        
        socket.setdefaulttimeout(15) #timeout after 15 seconds

        try:
            opener=urllib.request.build_opener()
            opener.addheaders=[("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63")]
            urllib.request.install_opener(opener)
            urllib.request.urlretrieve(image, f"{path}/img_{index}.jpg")
        except:
            if verbose:
                print(f"Failed to download. Skipping image {index}.")
                
    if verbose:
        print(json.dumps(image_results, indent=2))
        print(len(image_results))

In [6]:
queries = ["Happy Human face", "Sad Human face"]

serpapi_get_google_images(queries, "/Users/PriyamSheta/Documents/Documents/Dataset", verbose=True, max_images=150)

Downloading image number 1...
Downloading image number 2...
Downloading image number 3...
Downloading image number 4...
Downloading image number 5...
Downloading image number 6...
Downloading image number 7...
Downloading image number 8...
Downloading image number 9...
Downloading image number 10...
Downloading image number 11...
Downloading image number 12...
Downloading image number 13...
Downloading image number 14...
Downloading image number 15...
Downloading image number 16...
Downloading image number 17...
Downloading image number 18...
Downloading image number 19...
Downloading image number 20...
Downloading image number 21...
Downloading image number 22...
Downloading image number 23...
Downloading image number 24...
Downloading image number 25...
Downloading image number 26...
Downloading image number 27...
Downloading image number 28...
Downloading image number 29...
Downloading image number 30...
Downloading image number 31...
Downloading image number 32...
Downloading image

Downloading image number 252...
Downloading image number 253...
Downloading image number 254...
Downloading image number 255...
Downloading image number 256...
Downloading image number 257...
Downloading image number 258...
Downloading image number 259...
Downloading image number 260...
Downloading image number 261...
Downloading image number 262...
Downloading image number 263...
Downloading image number 264...
Downloading image number 265...
Downloading image number 266...
Downloading image number 267...
Downloading image number 268...
Downloading image number 269...
Downloading image number 270...
Downloading image number 271...
Downloading image number 272...
Downloading image number 273...
Downloading image number 274...
Downloading image number 275...
Downloading image number 276...
Downloading image number 277...
Downloading image number 278...
Downloading image number 279...
Downloading image number 280...
Downloading image number 281...
Downloading image number 282...
Download