In [1]:
import os
import numpy as np
import requests
import argparse
import json
import time
import logging
import csv

from multiprocessing import Pool, Process, Value, Lock

from requests.exceptions import ConnectionError, ReadTimeout, TooManyRedirects, MissingSchema, InvalidURL

In [2]:
# args
data_root = './data'
user_class_list = False
class_list = []
scrape_only_flickr = True # default True
number_of_classes = 1100
images_per_class = 10
use_class_list = False
debug = False
multiprocessing_workers = 8

In [3]:
if debug:
    logging.basicConfig(filename='imagenet_scarper.log', level=logging.DEBUG)

In [4]:
IMAGENET_API_WNID_TO_URLS = lambda wnid: f'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid={wnid}'

current_folder = os.path.abspath('.')

class_info_json_filename = 'imagenet_class_info.json'
class_info_json_filepath = os.path.join(current_folder, class_info_json_filename)

class_info_dict = dict()

In [5]:
with open(class_info_json_filepath) as class_info_json_f:
    class_info_dict = json.load(class_info_json_f)

In [6]:
classes_to_scrape = []

if use_class_list == True:
    for item in class_list:
        classes_to_scrape.append(item)
        if item not in class_info_dict:
            logging.error(f'Class {item} not found in ImageNete')
            exit()
            
elif use_class_list == False:
    potential_class_pool = []
    for key, val in class_info_dict.items():

        if scrape_only_flickr:
            if int(val['flickr_img_url_count']) * 0.9 > images_per_class:
                potential_class_pool.append(key)
        else:
            if int(val['img_url_count']) * 0.8 > images_per_class:
                potential_class_pool.append(key)

    if (len(potential_class_pool) < number_of_classes):
        logging.error(f"With {images_per_class} images per class there are {len(potential_class_pool)} to choose from.")
        logging.error(f"Decrease number of classes or decrease images per class.")
        exit()

    picked_classes_idxes = np.random.choice(len(potential_class_pool), number_of_classes, replace = False)

    for idx in picked_classes_idxes:
        classes_to_scrape.append(potential_class_pool[idx])
            

In [7]:
print("Picked the following clases:")
print([ class_info_dict[class_wnid]['class_name'] for class_wnid in classes_to_scrape ])

Picked the following clases:
['catmint', 'easy chair', 'ski rack', 'common cotton grass', 'Dane', 'marine mussel', 'locoweed', 'hall', 'armor', 'chador', 'guest of honor', 'bonefish', 'rowel', 'cowpea', 'parfait', 'bouillon', 'flying jib', 'White Russian', 'astrolabe', 'glow lamp', 'Melkite', 'hydrometer', 'marsh plant', 'oyster mushroom', 'maple', 'entrepreneur', 'metasequoia', 'level', 'foreground', 'linseed', 'rig', 'engine', 'shunt', 'Easter egg', 'coatdress', 'American wistaria', 'Mexican sunflower', 'pavilion', 'imperialist', 'centrist', 'Minuteman', 'sequencer', 'bread dough', 'alehouse', 'cock of the rock', 'asp', 'gasmask', 'bass guitar', 'poppy', 'panther', 'ridge', 'good-king-henry', 'sculptor', 'wild boar', 'boxcar', 'bitewing', 'buckram', 'valerian', 'television antenna', 'climbing frame', 'cornetfish', 'helminth', 'Tartuffe', 'header', 'push button', 'kneeler', 'impala lily', 'plum sauce', 'drinking vessel', 'common fennel', 'green alder', 'espionage agent', 'autopilot', 

In [10]:
imagenet_images_folder = os.path.join(data_root, 'imagenet_images')
if not os.path.isdir(imagenet_images_folder):
    os.mkdir(imagenet_images_folder)

In [11]:
scraping_stats = dict(
    all=dict(
        tried=0,
        success=0,
        time_spent=0,
    ),
    is_flickr=dict(
        tried=0,
        success=0,
        time_spent=0,
    ),
    not_flickr=dict(
        tried=0,
        success=0,
        time_spent=0,
    )
)

In [12]:
def add_debug_csv_row(row):
    with open('stats.csv', "a") as csv_f:
        csv_writer = csv.writer(csv_f, delimiter=",")
        csv_writer.writerow(row)

class MultiStats():
    def __init__(self):

        self.lock = Lock()

        self.stats = dict(
            all=dict(
                tried=Value('d', 0),
                success=Value('d',0),
                time_spent=Value('d',0),
            ),
            is_flickr=dict(
                tried=Value('d', 0),
                success=Value('d',0),
                time_spent=Value('d',0),
            ),
            not_flickr=dict(
                tried=Value('d', 0),
                success=Value('d', 0),
                time_spent=Value('d', 0),
            )
        )
    def inc(self, cls, stat, val):
        with self.lock:
            self.stats[cls][stat].value += val

    def get(self, cls, stat):
        with self.lock:
            ret = self.stats[cls][stat].value
        return ret

multi_stats = MultiStats()
    
if debug:
    row = [
        "all_tried",
        "all_success",
        "all_time_spent",
        "is_flickr_tried",
        "is_flickr_success",
        "is_flickr_time_spent",
        "not_flickr_tried",
        "not_flickr_success",
        "not_flickr_time_spent"
    ]
    add_debug_csv_row(row)
    
def add_stats_to_debug_csv():
    row = [
        multi_stats.get('all', 'tried'),
        multi_stats.get('all', 'success'),
        multi_stats.get('all', 'time_spent'),
        multi_stats.get('is_flickr', 'tried'),
        multi_stats.get('is_flickr', 'success'),
        multi_stats.get('is_flickr', 'time_spent'),
        multi_stats.get('not_flickr', 'tried'),
        multi_stats.get('not_flickr', 'success'),
        multi_stats.get('not_flickr', 'time_spent'),
    ]
    add_debug_csv_row(row)

def print_stats(cls, print_func):

    actual_all_time_spent = time.time() - scraping_t_start.value
    processes_all_time_spent = multi_stats.get('all', 'time_spent')

    if processes_all_time_spent == 0:
        actual_processes_ratio = 1.0
    else:
        actual_processes_ratio = actual_all_time_spent / processes_all_time_spent

    #print(f"actual all time: {actual_all_time_spent} proc all time {processes_all_time_spent}")

    print_func(f'STATS For class {cls}:')
    print_func(f' tried {multi_stats.get(cls, "tried")} urls with'
               f' {multi_stats.get(cls, "success")} successes')

    if multi_stats.get(cls, "tried") > 0:
        print_func(f'{100.0 * multi_stats.get(cls, "success")/multi_stats.get(cls, "tried")}% success rate for {cls} urls ')
    if multi_stats.get(cls, "success") > 0:
        print_func(f'{multi_stats.get(cls,"time_spent") * actual_processes_ratio / multi_stats.get(cls,"success")} seconds spent per {cls} succesful image download')



lock = Lock()
url_tries = Value('d', 0)
scraping_t_start = Value('d', time.time())
class_folder = ''
class_images = Value('d', 0)

In [None]:
def get_image(img_url):

    #print(f'Processing {img_url}')

    #time.sleep(3)

    if len(img_url) <= 1:
        return


    cls_imgs = 0
    with lock:
        cls_imgs = class_images.value

    if cls_imgs >= images_per_class:
        return

    logging.debug(img_url)

    cls = ''

    if 'flickr' in img_url:
        cls = 'is_flickr'
    else:
        cls = 'not_flickr'
        if scrape_only_flickr:
            return

    t_start = time.time()

    def finish(status):
        t_spent = time.time() - t_start
        multi_stats.inc(cls, 'time_spent', t_spent)
        multi_stats.inc('all', 'time_spent', t_spent)

        multi_stats.inc(cls,'tried', 1)
        multi_stats.inc('all', 'tried', 1)

        if status == 'success':
            multi_stats.inc(cls,'success', 1)
            multi_stats.inc('all', 'success', 1)

        elif status == 'failure':
            pass
        else:
            logging.error(f'No such status {status}!!')
            exit()
        return


    with lock:
        url_tries.value += 1
        if url_tries.value % 250 == 0:
            print(f'\nScraping stats:')
            print_stats('is_flickr', print)
            print_stats('not_flickr', print)
            print_stats('all', print)
            if debug:
                add_stats_to_debug_csv()

    try:
        img_resp = requests.get(img_url, timeout = 1)
    except ConnectionError:
        logging.debug(f"Connection Error for url {img_url}")
        return finish('failure')
    except ReadTimeout:
        logging.debug(f"Read Timeout for url {img_url}")
        return finish('failure')
    except TooManyRedirects:
        logging.debug(f"Too many redirects {img_url}")
        return finish('failure')
    except MissingSchema:
        return finish('failure')
    except InvalidURL:
        return finish('failure')

    if not 'content-type' in img_resp.headers:
        return finish('failure')

    if not 'image' in img_resp.headers['content-type']:
        logging.debug("Not an image")
        return finish('failure')

    if (len(img_resp.content) < 1000):
        return finish('failure')

    logging.debug(img_resp.headers['content-type'])
    logging.debug(f'image size {len(img_resp.content)}')

    img_name = img_url.split('/')[-1]
    img_name = img_name.split("?")[0]

    if (len(img_name) <= 1):
        return finish('failure')

    img_file_path = os.path.join(class_folder, img_name)
    logging.debug(f'Saving image in {img_file_path}')

    with open(img_file_path, 'wb') as img_f:
        img_f.write(img_resp.content)

        with lock:
            class_images.value += 1

        logging.debug(f'Scraping stats')
        print_stats('is_flickr', logging.debug)
        print_stats('not_flickr', logging.debug)
        print_stats('all', logging.debug)

        return finish('success')

from tqdm import tqdm
    
for class_wnid in tqdm(classes_to_scrape):

    class_name = class_info_dict[class_wnid]["class_name"]
    print(f'Scraping images for class \"{class_name}\"')
    url_urls = IMAGENET_API_WNID_TO_URLS(class_wnid)

    time.sleep(0.05)
    try :
        resp = requests.get(url_urls)
    except ConnectionError:
        continue
    
    class_folder = os.path.join(imagenet_images_folder, class_name)
    if not os.path.exists(class_folder):
        os.mkdir(class_folder)

    class_images.value = 0

    urls = [url.decode('utf-8') for url in resp.content.splitlines()]

    #for url in  urls:
    #    get_image(url)

    print(f"Multiprocessing workers: {multiprocessing_workers}")
    with Pool(processes=multiprocessing_workers) as p:
        p.map(get_image,urls)


  0%|          | 0/1100 [00:00<?, ?it/s]

Scraping images for class "catmint"
Multiprocessing workers: 8


  0%|          | 1/1100 [00:07<2:24:20,  7.88s/it]

Scraping images for class "easy chair"
Multiprocessing workers: 8


  0%|          | 2/1100 [00:21<2:55:04,  9.57s/it]

Scraping images for class "ski rack"
Multiprocessing workers: 8


  0%|          | 3/1100 [00:29<2:44:43,  9.01s/it]

Scraping images for class "common cotton grass"
Multiprocessing workers: 8


  0%|          | 4/1100 [00:32<2:12:02,  7.23s/it]

Scraping images for class "Dane"
Multiprocessing workers: 8


  0%|          | 5/1100 [00:39<2:13:12,  7.30s/it]

Scraping images for class "marine mussel"
Multiprocessing workers: 8


  1%|          | 6/1100 [00:53<2:46:24,  9.13s/it]

Scraping images for class "locoweed"
Multiprocessing workers: 8


  1%|          | 7/1100 [01:04<3:01:18,  9.95s/it]

Scraping images for class "hall"
Multiprocessing workers: 8


  1%|          | 8/1100 [01:25<3:58:46, 13.12s/it]

Scraping images for class "armor"
Multiprocessing workers: 8


  1%|          | 9/1100 [01:38<4:00:33, 13.23s/it]

Scraping images for class "chador"
Multiprocessing workers: 8


  1%|          | 10/1100 [02:44<8:44:33, 28.87s/it]

Scraping images for class "guest of honor"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 243.0 urls with 162.0 successes
66.66666666666667% success rate for is_flickr urls 
1.0931328957419804 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 244.0 urls with 162.0 successes
66.12244897959184% success rate for all urls 
1.0955975033204997 seconds spent per all succesful image download


  1%|          | 11/1100 [03:12<8:38:51, 28.59s/it]

Scraping images for class "bonefish"
Multiprocessing workers: 8


  1%|          | 12/1100 [03:32<7:53:40, 26.12s/it]

Scraping images for class "rowel"
Multiprocessing workers: 8


  1%|          | 13/1100 [03:54<7:32:16, 24.96s/it]

Scraping images for class "cowpea"
Multiprocessing workers: 8


  1%|▏         | 14/1100 [04:22<7:45:57, 25.74s/it]

Scraping images for class "parfait"
Multiprocessing workers: 8


  1%|▏         | 15/1100 [04:50<7:55:58, 26.32s/it]

Scraping images for class "bouillon"
Multiprocessing workers: 8


  1%|▏         | 16/1100 [05:11<7:28:35, 24.83s/it]

Scraping images for class "flying jib"
Multiprocessing workers: 8


  2%|▏         | 17/1100 [05:22<6:15:34, 20.81s/it]

Scraping images for class "White Russian"
Multiprocessing workers: 8


  2%|▏         | 18/1100 [05:40<5:57:50, 19.84s/it]

Scraping images for class "astrolabe"
Multiprocessing workers: 8


  2%|▏         | 19/1100 [05:43<4:26:35, 14.80s/it]

Scraping images for class "glow lamp"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 492.0 urls with 295.0 successes
59.959349593495936% success rate for is_flickr urls 
1.2282542810601704 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 492.0 urls with 295.0 successes
59.959349593495936% success rate for all urls 
1.2283087811227573 seconds spent per all succesful image download


  2%|▏         | 20/1100 [06:15<6:01:45, 20.10s/it]

Scraping images for class "Melkite"
Multiprocessing workers: 8


  2%|▏         | 21/1100 [06:27<5:17:08, 17.63s/it]

Scraping images for class "hydrometer"
Multiprocessing workers: 8


  2%|▏         | 22/1100 [07:02<6:46:28, 22.62s/it]

Scraping images for class "marsh plant"
Multiprocessing workers: 8


  2%|▏         | 23/1100 [07:57<9:41:05, 32.37s/it]

Scraping images for class "oyster mushroom"
Multiprocessing workers: 8


  2%|▏         | 24/1100 [08:19<8:47:27, 29.41s/it]

Scraping images for class "maple"
Multiprocessing workers: 8


  2%|▏         | 25/1100 [08:23<6:27:59, 21.66s/it]

Scraping images for class "entrepreneur"
Multiprocessing workers: 8


  2%|▏         | 26/1100 [08:41<6:11:46, 20.77s/it]

Scraping images for class "metasequoia"
Multiprocessing workers: 8


  2%|▏         | 27/1100 [08:56<5:35:53, 18.78s/it]

Scraping images for class "level"
Multiprocessing workers: 8


  3%|▎         | 28/1100 [09:08<5:01:02, 16.85s/it]

Scraping images for class "foreground"
Multiprocessing workers: 8


  3%|▎         | 29/1100 [09:15<4:09:41, 13.99s/it]

Scraping images for class "linseed"
Multiprocessing workers: 8


  3%|▎         | 30/1100 [09:18<3:10:49, 10.70s/it]

Scraping images for class "rig"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 744.0 urls with 449.0 successes
60.3494623655914% success rate for is_flickr urls 
1.2744821689557073 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 744.0 urls with 449.0 successes
60.3494623655914% success rate for all urls 
1.2741448056193396 seconds spent per all succesful image download


  3%|▎         | 31/1100 [09:37<3:53:16, 13.09s/it]

Scraping images for class "engine"
Multiprocessing workers: 8


  3%|▎         | 32/1100 [09:41<3:03:50, 10.33s/it]

Scraping images for class "shunt"
Multiprocessing workers: 8


  3%|▎         | 33/1100 [09:58<3:37:36, 12.24s/it]

Scraping images for class "Easter egg"
Multiprocessing workers: 8


  3%|▎         | 34/1100 [10:10<3:40:54, 12.43s/it]

Scraping images for class "coatdress"
Multiprocessing workers: 8


  3%|▎         | 35/1100 [10:13<2:48:55,  9.52s/it]

Scraping images for class "American wistaria"
Multiprocessing workers: 8


  3%|▎         | 36/1100 [10:21<2:42:41,  9.17s/it]

Scraping images for class "Mexican sunflower"
Multiprocessing workers: 8


  3%|▎         | 37/1100 [10:33<2:55:17,  9.89s/it]

Scraping images for class "pavilion"
Multiprocessing workers: 8


  3%|▎         | 38/1100 [10:48<3:22:20, 11.43s/it]

Scraping images for class "imperialist"
Multiprocessing workers: 8


  4%|▎         | 39/1100 [11:06<3:57:30, 13.43s/it]

Scraping images for class "centrist"
Multiprocessing workers: 8


  4%|▎         | 40/1100 [11:19<3:51:33, 13.11s/it]

Scraping images for class "Minuteman"
Multiprocessing workers: 8


  4%|▎         | 41/1100 [11:53<5:42:33, 19.41s/it]

Scraping images for class "sequencer"
Multiprocessing workers: 8


  4%|▍         | 42/1100 [12:13<5:46:43, 19.66s/it]

Scraping images for class "bread dough"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 996.0 urls with 624.0 successes
62.65060240963855% success rate for is_flickr urls 
1.1799619751217632 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 996.0 urls with 624.0 successes
62.65060240963855% success rate for all urls 
1.179718717168539 seconds spent per all succesful image download


  4%|▍         | 43/1100 [12:35<6:01:47, 20.54s/it]

Scraping images for class "alehouse"
Multiprocessing workers: 8


  4%|▍         | 44/1100 [12:48<5:19:52, 18.18s/it]

Scraping images for class "cock of the rock"
Multiprocessing workers: 8


  4%|▍         | 45/1100 [13:13<5:53:26, 20.10s/it]

Scraping images for class "asp"
Multiprocessing workers: 8


  4%|▍         | 46/1100 [13:22<4:54:37, 16.77s/it]

Scraping images for class "gasmask"
Multiprocessing workers: 8


  4%|▍         | 47/1100 [13:36<4:40:38, 15.99s/it]

Scraping images for class "bass guitar"
Multiprocessing workers: 8


  4%|▍         | 48/1100 [13:58<5:11:32, 17.77s/it]

Scraping images for class "poppy"
Multiprocessing workers: 8


  4%|▍         | 49/1100 [14:31<6:30:44, 22.31s/it]

Scraping images for class "panther"
Multiprocessing workers: 8


  5%|▍         | 50/1100 [14:49<6:07:35, 21.01s/it]

Scraping images for class "ridge"
Multiprocessing workers: 8


  5%|▌         | 57/1100 [16:55<6:49:21, 23.55s/it]

Scraping images for class "valerian"
Multiprocessing workers: 8


  5%|▌         | 58/1100 [17:09<5:58:33, 20.65s/it]

Scraping images for class "television antenna"
Multiprocessing workers: 8


  5%|▌         | 59/1100 [17:17<4:54:29, 16.97s/it]

Scraping images for class "climbing frame"
Multiprocessing workers: 8


  5%|▌         | 60/1100 [17:40<5:21:04, 18.52s/it]

Scraping images for class "cornetfish"
Multiprocessing workers: 8


  6%|▌         | 61/1100 [17:42<3:59:10, 13.81s/it]

Scraping images for class "helminth"
Multiprocessing workers: 8


  6%|▌         | 62/1100 [17:50<3:28:32, 12.05s/it]

Scraping images for class "Tartuffe"
Multiprocessing workers: 8


  6%|▌         | 63/1100 [18:03<3:29:58, 12.15s/it]

Scraping images for class "header"
Multiprocessing workers: 8


  6%|▌         | 64/1100 [18:05<2:39:12,  9.22s/it]

Scraping images for class "push button"
Multiprocessing workers: 8


  6%|▌         | 65/1100 [18:13<2:33:29,  8.90s/it]

Scraping images for class "kneeler"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 1495.0 urls with 954.0 successes
63.812709030100336% success rate for is_flickr urls 
1.1474987328427393 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 1495.0 urls with 954.0 successes
63.812709030100336% success rate for all urls 
1.1475336986517757 seconds spent per all succesful image download


  6%|▌         | 66/1100 [18:20<2:23:16,  8.31s/it]

Scraping images for class "impala lily"
Multiprocessing workers: 8


  6%|▌         | 67/1100 [18:28<2:22:24,  8.27s/it]

Scraping images for class "plum sauce"
Multiprocessing workers: 8


  6%|▌         | 68/1100 [18:36<2:19:16,  8.10s/it]

Scraping images for class "drinking vessel"
Multiprocessing workers: 8


  6%|▋         | 69/1100 [18:49<2:43:02,  9.49s/it]

Scraping images for class "common fennel"
Multiprocessing workers: 8


  6%|▋         | 70/1100 [19:07<3:26:53, 12.05s/it]

Scraping images for class "green alder"
Multiprocessing workers: 8


  6%|▋         | 71/1100 [19:38<5:07:14, 17.92s/it]

Scraping images for class "espionage agent"
Multiprocessing workers: 8


  7%|▋         | 72/1100 [19:46<4:12:47, 14.75s/it]

Scraping images for class "autopilot"
Multiprocessing workers: 8


  7%|▋         | 73/1100 [20:29<6:41:02, 23.43s/it]

Scraping images for class "motor vehicle"
Multiprocessing workers: 8


  7%|▋         | 74/1100 [21:03<7:31:29, 26.40s/it]

Scraping images for class "footlocker"
Multiprocessing workers: 8


  7%|▋         | 75/1100 [21:14<6:12:44, 21.82s/it]

Scraping images for class "halibut"
Multiprocessing workers: 8


  7%|▋         | 76/1100 [21:17<4:34:14, 16.07s/it]

Scraping images for class "newel post"
Multiprocessing workers: 8


  7%|▋         | 77/1100 [21:29<4:13:26, 14.86s/it]

Scraping images for class "martello tower"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 1749.0 urls with 1123.0 successes
64.20811892510005% success rate for is_flickr urls 
1.149257863088049 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 1749.0 urls with 1123.0 successes
64.20811892510005% success rate for all urls 
1.1493015716039805 seconds spent per all succesful image download


  7%|▋         | 78/1100 [21:37<3:37:29, 12.77s/it]

Scraping images for class "toy"
Multiprocessing workers: 8


  7%|▋         | 79/1100 [21:50<3:38:44, 12.85s/it]

Scraping images for class "flintlock"
Multiprocessing workers: 8


  7%|▋         | 80/1100 [22:02<3:37:40, 12.80s/it]

Scraping images for class "Hungarian sauce"
Multiprocessing workers: 8


  7%|▋         | 81/1100 [22:24<4:24:05, 15.55s/it]

Scraping images for class "smut grass"
Multiprocessing workers: 8


  7%|▋         | 82/1100 [22:31<3:41:23, 13.05s/it]

Scraping images for class "axseed"
Multiprocessing workers: 8


  8%|▊         | 83/1100 [22:55<4:35:37, 16.26s/it]

Scraping images for class "blunderbuss"
Multiprocessing workers: 8


  8%|▊         | 84/1100 [23:14<4:48:15, 17.02s/it]

Scraping images for class "chili"
Multiprocessing workers: 8


  8%|▊         | 85/1100 [23:47<6:06:45, 21.68s/it]

Scraping images for class "flatfish"
Multiprocessing workers: 8


  8%|▊         | 86/1100 [24:14<6:37:04, 23.50s/it]

Scraping images for class "lentil"
Multiprocessing workers: 8


  8%|▊         | 87/1100 [24:47<7:24:53, 26.35s/it]

Scraping images for class "futon"
Multiprocessing workers: 8


  8%|▊         | 88/1100 [25:00<6:15:37, 22.27s/it]

Scraping images for class "bustle"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 1992.0 urls with 1279.0 successes
64.20682730923694% success rate for is_flickr urls 
1.1750086421981465 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 1992.0 urls with 1279.0 successes
64.20682730923694% success rate for all urls 
1.1750629788921436 seconds spent per all succesful image download


  8%|▊         | 89/1100 [25:13<5:29:39, 19.56s/it]

Scraping images for class "rotgut"
Multiprocessing workers: 8


  8%|▊         | 90/1100 [25:26<4:52:46, 17.39s/it]

Scraping images for class "building"
Multiprocessing workers: 8


  8%|▊         | 91/1100 [25:34<4:05:39, 14.61s/it]

Scraping images for class "pelagic bird"
Multiprocessing workers: 8


  8%|▊         | 92/1100 [25:47<3:59:59, 14.29s/it]

Scraping images for class "Fauve"
Multiprocessing workers: 8


  8%|▊         | 93/1100 [25:56<3:30:39, 12.55s/it]

Scraping images for class "pumpkin"
Multiprocessing workers: 8


  9%|▊         | 94/1100 [26:18<4:16:52, 15.32s/it]

Scraping images for class "rockfish"
Multiprocessing workers: 8


  9%|▊         | 95/1100 [26:27<3:48:23, 13.63s/it]

Scraping images for class "newsstand"
Multiprocessing workers: 8


  9%|▊         | 96/1100 [26:40<3:46:10, 13.52s/it]

Scraping images for class "generator"
Multiprocessing workers: 8


  9%|▉         | 97/1100 [27:19<5:53:11, 21.13s/it]

Scraping images for class "cork tree"
Multiprocessing workers: 8


  9%|▉         | 98/1100 [27:32<5:08:31, 18.47s/it]

Scraping images for class "wild madder"
Multiprocessing workers: 8


  9%|▉         | 99/1100 [28:00<5:58:36, 21.49s/it]

Scraping images for class "king penguin"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 2244.0 urls with 1434.0 successes
63.903743315508024% success rate for is_flickr urls 
1.1895210924602566 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 2244.0 urls with 1434.0 successes
63.903743315508024% success rate for all urls 
1.187957418347336 seconds spent per all succesful image download


  9%|▉         | 100/1100 [28:43<7:46:15, 27.98s/it]

Scraping images for class "domino"
Multiprocessing workers: 8


  9%|▉         | 101/1100 [28:56<6:31:27, 23.51s/it]

Scraping images for class "apple mint"
Multiprocessing workers: 8


  9%|▉         | 102/1100 [29:19<6:27:14, 23.28s/it]

Scraping images for class "cigar smoker"
Multiprocessing workers: 8


  9%|▉         | 103/1100 [29:37<5:58:54, 21.60s/it]

Scraping images for class "long trousers"
Multiprocessing workers: 8


  9%|▉         | 104/1100 [29:40<4:26:12, 16.04s/it]

Scraping images for class "fishhook"
Multiprocessing workers: 8


 10%|▉         | 105/1100 [29:53<4:09:24, 15.04s/it]

Scraping images for class "solar heater"
Multiprocessing workers: 8


 10%|▉         | 106/1100 [30:00<3:33:21, 12.88s/it]

Scraping images for class "textile screw pine"
Multiprocessing workers: 8


 10%|▉         | 107/1100 [30:14<3:37:23, 13.14s/it]

Scraping images for class "mandarin"
Multiprocessing workers: 8


 10%|▉         | 108/1100 [30:22<3:11:20, 11.57s/it]

Scraping images for class "radio compass"
Multiprocessing workers: 8


 10%|▉         | 109/1100 [30:25<2:26:00,  8.84s/it]

Scraping images for class "darling"
Multiprocessing workers: 8


 10%|█         | 110/1100 [30:34<2:28:25,  9.00s/it]

Scraping images for class "gazpacho"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 2492.0 urls with 1611.0 successes
64.64686998394863% success rate for is_flickr urls 
1.1439861018490451 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 2492.0 urls with 1611.0 successes
64.64686998394863% success rate for all urls 
1.1440149479812158 seconds spent per all succesful image download


 10%|█         | 111/1100 [30:47<2:49:06, 10.26s/it]

Scraping images for class "copper"
Multiprocessing workers: 8


 10%|█         | 112/1100 [30:55<2:36:06,  9.48s/it]

Scraping images for class "moon shell"
Multiprocessing workers: 8


 10%|█         | 113/1100 [31:08<2:53:37, 10.55s/it]

Scraping images for class "mechanical device"
Multiprocessing workers: 8


 10%|█         | 114/1100 [31:17<2:44:07,  9.99s/it]

Scraping images for class "godfather"
Multiprocessing workers: 8


 10%|█         | 115/1100 [31:30<3:00:51, 11.02s/it]

Scraping images for class "stun gun"
Multiprocessing workers: 8


 11%|█         | 116/1100 [31:39<2:50:00, 10.37s/it]

Scraping images for class "cyme"
Multiprocessing workers: 8


 11%|█         | 117/1100 [31:42<2:12:51,  8.11s/it]

Scraping images for class "common matrimony vine"
Multiprocessing workers: 8


 11%|█         | 118/1100 [32:16<4:19:55, 15.88s/it]

Scraping images for class "pants suit"
Multiprocessing workers: 8


 11%|█         | 119/1100 [32:25<3:47:36, 13.92s/it]

Scraping images for class "parasail"
Multiprocessing workers: 8


 11%|█         | 120/1100 [32:43<4:05:55, 15.06s/it]

Scraping images for class "roughcast"
Multiprocessing workers: 8


 11%|█         | 121/1100 [33:07<4:51:09, 17.84s/it]

Scraping images for class "sailfish"
Multiprocessing workers: 8


 11%|█         | 122/1100 [33:31<5:22:02, 19.76s/it]

Scraping images for class "yellowwood"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 2746.0 urls with 1768.0 successes
64.38455935906774% success rate for is_flickr urls 
1.1388948447833773 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 2746.0 urls with 1768.0 successes
64.38455935906774% success rate for all urls 
1.1388967177447151 seconds spent per all succesful image download


 11%|█         | 123/1100 [34:05<6:30:49, 24.00s/it]

Scraping images for class "scholar"
Multiprocessing workers: 8


 11%|█▏        | 124/1100 [34:19<5:43:09, 21.10s/it]

Scraping images for class "beeper"
Multiprocessing workers: 8


 11%|█▏        | 125/1100 [34:38<5:30:57, 20.37s/it]

Scraping images for class "snow bunting"
Multiprocessing workers: 8


 11%|█▏        | 126/1100 [34:41<4:06:19, 15.17s/it]

Scraping images for class "earphone"
Multiprocessing workers: 8


 12%|█▏        | 127/1100 [34:50<3:32:41, 13.12s/it]

Scraping images for class "sandpiper"
Multiprocessing workers: 8


 12%|█▏        | 128/1100 [34:53<2:43:56, 10.12s/it]

Scraping images for class "goldfields"
Multiprocessing workers: 8


 12%|█▏        | 129/1100 [35:04<2:49:19, 10.46s/it]

Scraping images for class "ditty bag"
Multiprocessing workers: 8


 12%|█▏        | 130/1100 [35:26<3:46:14, 13.99s/it]

Scraping images for class "schnapps"
Multiprocessing workers: 8


 12%|█▏        | 131/1100 [35:34<3:17:05, 12.20s/it]

Scraping images for class "queen"
Multiprocessing workers: 8


 12%|█▏        | 132/1100 [35:47<3:21:50, 12.51s/it]

Scraping images for class "cittern"
Multiprocessing workers: 8


 12%|█▏        | 133/1100 [35:58<3:12:12, 11.93s/it]

Scraping images for class "sunflower"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 2992.0 urls with 1939.0 successes
64.80614973262033% success rate for is_flickr urls 
1.1144789584584307 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 2992.0 urls with 1939.0 successes
64.80614973262033% success rate for all urls 
1.1144826139230712 seconds spent per all succesful image download


 12%|█▏        | 134/1100 [36:06<2:54:40, 10.85s/it]

Scraping images for class "analog watch"
Multiprocessing workers: 8


 12%|█▏        | 135/1100 [36:09<2:15:08,  8.40s/it]

Scraping images for class "reptile"
Multiprocessing workers: 8


 12%|█▏        | 136/1100 [36:21<2:32:00,  9.46s/it]

Scraping images for class "yellowlegs"
Multiprocessing workers: 8


 12%|█▏        | 137/1100 [36:40<3:16:51, 12.27s/it]

Scraping images for class "ruddy duck"
Multiprocessing workers: 8


 13%|█▎        | 138/1100 [36:43<2:31:42,  9.46s/it]

Scraping images for class "jaguarundi"
Multiprocessing workers: 8


 13%|█▎        | 139/1100 [36:56<2:48:30, 10.52s/it]

Scraping images for class "bench press"
Multiprocessing workers: 8


 13%|█▎        | 140/1100 [37:14<3:27:01, 12.94s/it]

Scraping images for class "mocha"
Multiprocessing workers: 8


 13%|█▎        | 141/1100 [37:29<3:36:57, 13.57s/it]

Scraping images for class "wild rosemary"
Multiprocessing workers: 8


 13%|█▎        | 142/1100 [37:37<3:08:42, 11.82s/it]

Scraping images for class "altarpiece"
Multiprocessing workers: 8


 13%|█▎        | 143/1100 [38:02<4:11:49, 15.79s/it]

Scraping images for class "wimp"
Multiprocessing workers: 8


 13%|█▎        | 144/1100 [38:10<3:36:09, 13.57s/it]

Scraping images for class "beige"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 3242.0 urls with 2098.0 successes
64.71314003701418% success rate for is_flickr urls 
1.094089469759662 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 3242.0 urls with 2098.0 successes
64.71314003701418% success rate for all urls 
1.094097282002379 seconds spent per all succesful image download


 13%|█▎        | 145/1100 [38:24<3:38:04, 13.70s/it]

Scraping images for class "framework"
Multiprocessing workers: 8


 13%|█▎        | 146/1100 [38:56<5:02:19, 19.01s/it]

Scraping images for class "tree sparrow"
Multiprocessing workers: 8


 13%|█▎        | 147/1100 [38:59<3:45:34, 14.20s/it]

Scraping images for class "cabin car"
Multiprocessing workers: 8


 13%|█▎        | 148/1100 [39:07<3:15:48, 12.34s/it]

Scraping images for class "Leonberg"
Multiprocessing workers: 8


 14%|█▎        | 149/1100 [39:10<2:31:53,  9.58s/it]

Scraping images for class "stateroom"
Multiprocessing workers: 8


 14%|█▎        | 150/1100 [39:18<2:24:31,  9.13s/it]

Scraping images for class "appointee"
Multiprocessing workers: 8


 14%|█▎        | 151/1100 [39:21<1:56:10,  7.35s/it]

Scraping images for class "bottle-nosed whale"
Multiprocessing workers: 8


 14%|█▍        | 152/1100 [39:39<2:46:16, 10.52s/it]

Scraping images for class "jewelweed"
Multiprocessing workers: 8


 14%|█▍        | 153/1100 [39:42<2:11:25,  8.33s/it]

Scraping images for class "loft"
Multiprocessing workers: 8


 14%|█▍        | 154/1100 [39:55<2:30:53,  9.57s/it]

Scraping images for class "gooseneck loosestrife"
Multiprocessing workers: 8


 14%|█▍        | 155/1100 [40:02<2:19:49,  8.88s/it]

Scraping images for class "ewe"
Multiprocessing workers: 8


 14%|█▍        | 156/1100 [40:05<1:53:15,  7.20s/it]

Scraping images for class "vanilla pudding"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 3492.0 urls with 2292.0 successes
65.63573883161511% success rate for is_flickr urls 
1.0550635831518322 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 3492.0 urls with 2292.0 successes
65.63573883161511% success rate for all urls 
1.055070810501072 seconds spent per all succesful image download


 14%|█▍        | 157/1100 [40:18<2:18:23,  8.81s/it]

Scraping images for class "garden trowel"
Multiprocessing workers: 8


 14%|█▍        | 158/1100 [40:25<2:12:22,  8.43s/it]

Scraping images for class "eastern indigo snake"
Multiprocessing workers: 8


 14%|█▍        | 159/1100 [40:37<2:27:39,  9.42s/it]

Scraping images for class "black currant"
Multiprocessing workers: 8


 15%|█▍        | 160/1100 [40:51<2:47:16, 10.68s/it]

Scraping images for class "cheese pizza"
Multiprocessing workers: 8


 15%|█▍        | 161/1100 [40:59<2:35:36,  9.94s/it]

Scraping images for class "raccoon"
Multiprocessing workers: 8


 15%|█▍        | 162/1100 [41:12<2:48:20, 10.77s/it]

Scraping images for class "maar"
Multiprocessing workers: 8


 15%|█▍        | 163/1100 [41:32<3:33:55, 13.70s/it]

Scraping images for class "poop deck"
Multiprocessing workers: 8


 15%|█▍        | 164/1100 [41:56<4:21:18, 16.75s/it]

Scraping images for class "showplace"
Multiprocessing workers: 8


 15%|█▌        | 165/1100 [41:59<3:13:42, 12.43s/it]

Scraping images for class "scented fern"


 15%|█▌        | 166/1100 [42:19<3:49:22, 14.73s/it]

Scraping images for class "hovercraft"
Multiprocessing workers: 8


 15%|█▌        | 167/1100 [42:35<3:54:32, 15.08s/it]

Scraping images for class "round file"
Multiprocessing workers: 8


 15%|█▌        | 168/1100 [42:51<4:02:42, 15.62s/it]

Scraping images for class "manhole cover"
Multiprocessing workers: 8


 15%|█▌        | 169/1100 [43:05<3:52:31, 14.99s/it]

Scraping images for class "New World goldfinch"
Multiprocessing workers: 8


 16%|█▋        | 180/1100 [45:05<3:04:45, 12.05s/it]

Scraping images for class "cinnamon"
Multiprocessing workers: 8


 16%|█▋        | 181/1100 [45:28<3:53:42, 15.26s/it]

Scraping images for class "traveler"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 3993.0 urls with 2643.0 successes
66.190833959429% success rate for is_flickr urls 
1.0337000221543993 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 3993.0 urls with 2643.0 successes
66.190833959429% success rate for all urls 
1.033705757009771 seconds spent per all succesful image download


 17%|█▋        | 182/1100 [45:47<4:12:16, 16.49s/it]

Scraping images for class "dent corn"
Multiprocessing workers: 8


 17%|█▋        | 183/1100 [46:26<5:54:08, 23.17s/it]

Scraping images for class "tractor"
Multiprocessing workers: 8


 17%|█▋        | 184/1100 [46:29<4:21:59, 17.16s/it]

Scraping images for class "electronic device"
Multiprocessing workers: 8


 17%|█▋        | 185/1100 [46:58<5:16:59, 20.79s/it]

Scraping images for class "suitor"
Multiprocessing workers: 8


 17%|█▋        | 186/1100 [47:20<5:23:37, 21.24s/it]

Scraping images for class "foot rule"
Multiprocessing workers: 8


 17%|█▋        | 187/1100 [47:33<4:43:17, 18.62s/it]

Scraping images for class "hazelnut"
Multiprocessing workers: 8


 17%|█▋        | 188/1100 [47:46<4:19:14, 17.06s/it]

Scraping images for class "large crabgrass"
Multiprocessing workers: 8


 17%|█▋        | 189/1100 [48:06<4:33:07, 17.99s/it]

Scraping images for class "showy orchis"
Multiprocessing workers: 8


 17%|█▋        | 190/1100 [48:09<3:21:48, 13.31s/it]

Scraping images for class "cheese souffle"
Multiprocessing workers: 8


 17%|█▋        | 191/1100 [48:17<2:59:19, 11.84s/it]

Scraping images for class "battle dress"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 4242.0 urls with 2787.0 successes
65.7001414427157% success rate for is_flickr urls 
1.0431905733799054 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 4242.0 urls with 2787.0 successes
65.7001414427157% success rate for all urls 
1.0431958847162242 seconds spent per all succesful image download


 17%|█▋        | 192/1100 [48:30<3:03:22, 12.12s/it]

Scraping images for class "striped hyena"
Multiprocessing workers: 8


 18%|█▊        | 193/1100 [48:38<2:45:45, 10.97s/it]

Scraping images for class "Guinean"
Multiprocessing workers: 8


 18%|█▊        | 194/1100 [48:42<2:11:09,  8.69s/it]

Scraping images for class "meatball"
Multiprocessing workers: 8


 18%|█▊        | 195/1100 [48:59<2:51:38, 11.38s/it]

Scraping images for class "escapement"
Multiprocessing workers: 8


 18%|█▊        | 196/1100 [49:03<2:14:51,  8.95s/it]

Scraping images for class "sprit"
Multiprocessing workers: 8


 18%|█▊        | 197/1100 [49:11<2:14:07,  8.91s/it]

Scraping images for class "king crab"
Multiprocessing workers: 8


 18%|█▊        | 198/1100 [49:14<1:45:59,  7.05s/it]

Scraping images for class "skin"
Multiprocessing workers: 8


 18%|█▊        | 199/1100 [49:23<1:52:49,  7.51s/it]

Scraping images for class "backstay"
Multiprocessing workers: 8


 18%|█▊        | 200/1100 [49:34<2:10:48,  8.72s/it]

Scraping images for class "Kahlua"
Multiprocessing workers: 8


 18%|█▊        | 201/1100 [49:48<2:33:41, 10.26s/it]

Scraping images for class "socket wrench"
Multiprocessing workers: 8


 18%|█▊        | 202/1100 [50:00<2:42:27, 10.86s/it]

Scraping images for class "thrips"
Multiprocessing workers: 8


 18%|█▊        | 203/1100 [50:23<3:35:26, 14.41s/it]

Scraping images for class "lyme grass"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 4492.0 urls with 2970.0 successes
66.11754229741763% success rate for is_flickr urls 
1.0246108233326614 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 4492.0 urls with 2970.0 successes
66.11754229741763% success rate for all urls 
1.0246168377423528 seconds spent per all succesful image download


 19%|█▊        | 204/1100 [50:55<4:55:31, 19.79s/it]

Scraping images for class "milk float"
Multiprocessing workers: 8


 19%|█▊        | 205/1100 [51:08<4:23:57, 17.70s/it]

Scraping images for class "fire tongs"
Multiprocessing workers: 8


 19%|█▊        | 206/1100 [51:16<3:38:21, 14.66s/it]

Scraping images for class "chamois"
Multiprocessing workers: 8


 19%|█▉        | 207/1100 [51:29<3:32:33, 14.28s/it]

Scraping images for class "row house"
Multiprocessing workers: 8


 19%|█▉        | 208/1100 [51:31<2:38:49, 10.68s/it]

Scraping images for class "banded krait"
Multiprocessing workers: 8


 19%|█▉        | 209/1100 [51:39<2:25:19,  9.79s/it]

Scraping images for class "hand mower"
Multiprocessing workers: 8


 19%|█▉        | 210/1100 [51:52<2:38:33, 10.69s/it]

Scraping images for class "gopher"
Multiprocessing workers: 8


 19%|█▉        | 211/1100 [52:05<2:48:01, 11.34s/it]

Scraping images for class "convolvulus"
Multiprocessing workers: 8


 19%|█▉        | 212/1100 [52:13<2:33:39, 10.38s/it]

Scraping images for class "clarified butter"
Multiprocessing workers: 8


 19%|█▉        | 213/1100 [52:15<1:57:36,  7.96s/it]

Scraping images for class "Coigue"
Multiprocessing workers: 8


 19%|█▉        | 214/1100 [52:22<1:54:09,  7.73s/it]

Scraping images for class "lupine"
Multiprocessing workers: 8


 20%|█▉        | 215/1100 [52:25<1:32:14,  6.25s/it]

Scraping images for class "flowerbed"
Multiprocessing workers: 8


 20%|█▉        | 216/1100 [52:44<2:25:12,  9.86s/it]

Scraping images for class "Mountie"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 4746.0 urls with 3163.0 successes
66.64559629161398% success rate for is_flickr urls 
1.0008422460052508 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 4746.0 urls with 3163.0 successes
66.64559629161398% success rate for all urls 
1.0008437266426677 seconds spent per all succesful image download


 20%|█▉        | 217/1100 [52:46<1:54:29,  7.78s/it]

Scraping images for class "hot sauce"
Multiprocessing workers: 8


 20%|█▉        | 218/1100 [52:50<1:33:27,  6.36s/it]

Scraping images for class "solar dish"
Multiprocessing workers: 8


 20%|█▉        | 219/1100 [52:57<1:40:21,  6.83s/it]

Scraping images for class "great grandchild"
Multiprocessing workers: 8


 20%|██        | 220/1100 [53:20<2:49:38, 11.57s/it]

Scraping images for class "Scandinavian"
Multiprocessing workers: 8


 20%|██        | 221/1100 [53:55<4:32:59, 18.63s/it]

Scraping images for class "ticking"
Multiprocessing workers: 8


 20%|██        | 222/1100 [54:20<4:58:35, 20.40s/it]

Scraping images for class "dishtowel"
Multiprocessing workers: 8


 20%|██        | 223/1100 [54:38<4:49:14, 19.79s/it]

Scraping images for class "palomino"
Multiprocessing workers: 8


 20%|██        | 224/1100 [54:51<4:20:19, 17.83s/it]

Scraping images for class "toasting fork"
Multiprocessing workers: 8


 20%|██        | 225/1100 [54:54<3:12:41, 13.21s/it]

Scraping images for class "water gauge"
Multiprocessing workers: 8


 21%|██        | 226/1100 [55:13<3:38:43, 15.02s/it]

Scraping images for class "garland flower"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 4992.0 urls with 3327.0 successes
66.64663461538461% success rate for is_flickr urls 
0.9966572501976855 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
