In [1]:
import os
import numpy as np
import requests
import argparse
import json
import time
import logging
import csv

from multiprocessing import Pool, Process, Value, Lock

from requests.exceptions import ConnectionError, ReadTimeout, TooManyRedirects, MissingSchema, InvalidURL

In [2]:
# args
data_root = './data'
user_class_list = False
class_list = []
scrape_only_flickr = True # default True
number_of_classes = 1100
images_per_class = 10
use_class_list = False
debug = False
multiprocessing_workers = 8

In [3]:
if debug:
    logging.basicConfig(filename='imagenet_scarper.log', level=logging.DEBUG)

In [4]:
IMAGENET_API_WNID_TO_URLS = lambda wnid: f'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid={wnid}'

current_folder = os.path.abspath('.')

class_info_json_filename = 'imagenet_class_info.json'
class_info_json_filepath = os.path.join(current_folder, class_info_json_filename)

class_info_dict = dict()

In [5]:
with open(class_info_json_filepath) as class_info_json_f:
    class_info_dict = json.load(class_info_json_f)

In [6]:
classes_to_scrape = []

if use_class_list == True:
    for item in class_list:
        classes_to_scrape.append(item)
        if item not in class_info_dict:
            logging.error(f'Class {item} not found in ImageNete')
            exit()
            
elif use_class_list == False:
    potential_class_pool = []
    for key, val in class_info_dict.items():

        if scrape_only_flickr:
            if int(val['flickr_img_url_count']) * 0.9 > images_per_class:
                potential_class_pool.append(key)
        else:
            if int(val['img_url_count']) * 0.8 > images_per_class:
                potential_class_pool.append(key)

    if (len(potential_class_pool) < number_of_classes):
        logging.error(f"With {images_per_class} images per class there are {len(potential_class_pool)} to choose from.")
        logging.error(f"Decrease number of classes or decrease images per class.")
        exit()

    picked_classes_idxes = np.random.choice(len(potential_class_pool), number_of_classes, replace = False)

    for idx in picked_classes_idxes:
        classes_to_scrape.append(potential_class_pool[idx])
            

In [7]:
print("Picked the following clases:")
print([ class_info_dict[class_wnid]['class_name'] for class_wnid in classes_to_scrape ])

Picked the following clases:
['catmint', 'easy chair', 'ski rack', 'common cotton grass', 'Dane', 'marine mussel', 'locoweed', 'hall', 'armor', 'chador', 'guest of honor', 'bonefish', 'rowel', 'cowpea', 'parfait', 'bouillon', 'flying jib', 'White Russian', 'astrolabe', 'glow lamp', 'Melkite', 'hydrometer', 'marsh plant', 'oyster mushroom', 'maple', 'entrepreneur', 'metasequoia', 'level', 'foreground', 'linseed', 'rig', 'engine', 'shunt', 'Easter egg', 'coatdress', 'American wistaria', 'Mexican sunflower', 'pavilion', 'imperialist', 'centrist', 'Minuteman', 'sequencer', 'bread dough', 'alehouse', 'cock of the rock', 'asp', 'gasmask', 'bass guitar', 'poppy', 'panther', 'ridge', 'good-king-henry', 'sculptor', 'wild boar', 'boxcar', 'bitewing', 'buckram', 'valerian', 'television antenna', 'climbing frame', 'cornetfish', 'helminth', 'Tartuffe', 'header', 'push button', 'kneeler', 'impala lily', 'plum sauce', 'drinking vessel', 'common fennel', 'green alder', 'espionage agent', 'autopilot', 

In [10]:
imagenet_images_folder = os.path.join(data_root, 'imagenet_images')
if not os.path.isdir(imagenet_images_folder):
    os.mkdir(imagenet_images_folder)

In [11]:
scraping_stats = dict(
    all=dict(
        tried=0,
        success=0,
        time_spent=0,
    ),
    is_flickr=dict(
        tried=0,
        success=0,
        time_spent=0,
    ),
    not_flickr=dict(
        tried=0,
        success=0,
        time_spent=0,
    )
)

In [12]:
def add_debug_csv_row(row):
    with open('stats.csv', "a") as csv_f:
        csv_writer = csv.writer(csv_f, delimiter=",")
        csv_writer.writerow(row)

class MultiStats():
    def __init__(self):

        self.lock = Lock()

        self.stats = dict(
            all=dict(
                tried=Value('d', 0),
                success=Value('d',0),
                time_spent=Value('d',0),
            ),
            is_flickr=dict(
                tried=Value('d', 0),
                success=Value('d',0),
                time_spent=Value('d',0),
            ),
            not_flickr=dict(
                tried=Value('d', 0),
                success=Value('d', 0),
                time_spent=Value('d', 0),
            )
        )
    def inc(self, cls, stat, val):
        with self.lock:
            self.stats[cls][stat].value += val

    def get(self, cls, stat):
        with self.lock:
            ret = self.stats[cls][stat].value
        return ret

multi_stats = MultiStats()
    
if debug:
    row = [
        "all_tried",
        "all_success",
        "all_time_spent",
        "is_flickr_tried",
        "is_flickr_success",
        "is_flickr_time_spent",
        "not_flickr_tried",
        "not_flickr_success",
        "not_flickr_time_spent"
    ]
    add_debug_csv_row(row)
    
def add_stats_to_debug_csv():
    row = [
        multi_stats.get('all', 'tried'),
        multi_stats.get('all', 'success'),
        multi_stats.get('all', 'time_spent'),
        multi_stats.get('is_flickr', 'tried'),
        multi_stats.get('is_flickr', 'success'),
        multi_stats.get('is_flickr', 'time_spent'),
        multi_stats.get('not_flickr', 'tried'),
        multi_stats.get('not_flickr', 'success'),
        multi_stats.get('not_flickr', 'time_spent'),
    ]
    add_debug_csv_row(row)

def print_stats(cls, print_func):

    actual_all_time_spent = time.time() - scraping_t_start.value
    processes_all_time_spent = multi_stats.get('all', 'time_spent')

    if processes_all_time_spent == 0:
        actual_processes_ratio = 1.0
    else:
        actual_processes_ratio = actual_all_time_spent / processes_all_time_spent

    #print(f"actual all time: {actual_all_time_spent} proc all time {processes_all_time_spent}")

    print_func(f'STATS For class {cls}:')
    print_func(f' tried {multi_stats.get(cls, "tried")} urls with'
               f' {multi_stats.get(cls, "success")} successes')

    if multi_stats.get(cls, "tried") > 0:
        print_func(f'{100.0 * multi_stats.get(cls, "success")/multi_stats.get(cls, "tried")}% success rate for {cls} urls ')
    if multi_stats.get(cls, "success") > 0:
        print_func(f'{multi_stats.get(cls,"time_spent") * actual_processes_ratio / multi_stats.get(cls,"success")} seconds spent per {cls} succesful image download')



lock = Lock()
url_tries = Value('d', 0)
scraping_t_start = Value('d', time.time())
class_folder = ''
class_images = Value('d', 0)

In [15]:
def get_image(img_url):

    #print(f'Processing {img_url}')

    #time.sleep(3)

    if len(img_url) <= 1:
        return


    cls_imgs = 0
    with lock:
        cls_imgs = class_images.value

    if cls_imgs >= images_per_class:
        return

    logging.debug(img_url)

    cls = ''

    if 'flickr' in img_url:
        cls = 'is_flickr'
    else:
        cls = 'not_flickr'
        if scrape_only_flickr:
            return

    t_start = time.time()

    def finish(status):
        t_spent = time.time() - t_start
        multi_stats.inc(cls, 'time_spent', t_spent)
        multi_stats.inc('all', 'time_spent', t_spent)

        multi_stats.inc(cls,'tried', 1)
        multi_stats.inc('all', 'tried', 1)

        if status == 'success':
            multi_stats.inc(cls,'success', 1)
            multi_stats.inc('all', 'success', 1)

        elif status == 'failure':
            pass
        else:
            logging.error(f'No such status {status}!!')
            exit()
        return


    with lock:
        url_tries.value += 1
        if url_tries.value % 250 == 0:
            print(f'\nScraping stats:')
            print_stats('is_flickr', print)
            print_stats('not_flickr', print)
            print_stats('all', print)
            if debug:
                add_stats_to_debug_csv()

    try:
        img_resp = requests.get(img_url, timeout = 1)
    except ConnectionError:
        logging.debug(f"Connection Error for url {img_url}")
        return finish('failure')
    except ReadTimeout:
        logging.debug(f"Read Timeout for url {img_url}")
        return finish('failure')
    except TooManyRedirects:
        logging.debug(f"Too many redirects {img_url}")
        return finish('failure')
    except MissingSchema:
        return finish('failure')
    except InvalidURL:
        return finish('failure')

    if not 'content-type' in img_resp.headers:
        return finish('failure')

    if not 'image' in img_resp.headers['content-type']:
        logging.debug("Not an image")
        return finish('failure')

    if (len(img_resp.content) < 1000):
        return finish('failure')

    logging.debug(img_resp.headers['content-type'])
    logging.debug(f'image size {len(img_resp.content)}')

    img_name = img_url.split('/')[-1]
    img_name = img_name.split("?")[0]

    if (len(img_name) <= 1):
        return finish('failure')

    img_file_path = os.path.join(class_folder, img_name)
    logging.debug(f'Saving image in {img_file_path}')

    with open(img_file_path, 'wb') as img_f:
        img_f.write(img_resp.content)

        with lock:
            class_images.value += 1

        logging.debug(f'Scraping stats')
        print_stats('is_flickr', logging.debug)
        print_stats('not_flickr', logging.debug)
        print_stats('all', logging.debug)

        return finish('success')

from tqdm import tqdm
    
for class_wnid in tqdm(classes_to_scrape):

    class_name = class_info_dict[class_wnid]["class_name"]
    print(f'Scraping images for class \"{class_name}\"')
    url_urls = IMAGENET_API_WNID_TO_URLS(class_wnid)
    
    class_folder = os.path.join(imagenet_images_folder, class_name)
    if not os.path.exists(class_folder):
        os.mkdir(class_folder)
    else:
         continue
    
    time.sleep(0.05)
    try :
        resp = requests.get(url_urls)
    except ConnectionError:
        continue
    
    
    class_images.value = 0

    urls = [url.decode('utf-8') for url in resp.content.splitlines()]

    #for url in  urls:
    #    get_image(url)

    print(f"Multiprocessing workers: {multiprocessing_workers}")
    with Pool(processes=multiprocessing_workers) as p:
        p.map(get_image,urls)



  0%|          | 0/1100 [00:00<?, ?it/s][A

Scraping images for class "catmint"
Scraping images for class "easy chair"
Multiprocessing workers: 8



  0%|          | 2/1100 [00:16<2:27:24,  8.05s/it][A

Scraping images for class "ski rack"
Multiprocessing workers: 8



  0%|          | 3/1100 [00:24<2:28:33,  8.13s/it][A

Scraping images for class "common cotton grass"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 23991.0 urls with 16540.0 successes
68.94252011170856% success rate for is_flickr urls 
0.9888726004810402 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 23991.0 urls with 16540.0 successes
68.94252011170856% success rate for all urls 
0.9888728741723075 seconds spent per all succesful image download



  0%|          | 4/1100 [00:38<3:03:30, 10.05s/it][A

Scraping images for class "Dane"
Multiprocessing workers: 8



  0%|          | 5/1100 [00:50<3:10:22, 10.43s/it][A

Scraping images for class "marine mussel"
Multiprocessing workers: 8



  1%|          | 6/1100 [00:58<2:59:52,  9.86s/it][A

Scraping images for class "locoweed"
Multiprocessing workers: 8



  1%|          | 7/1100 [01:00<2:15:56,  7.46s/it][A

Scraping images for class "hall"
Multiprocessing workers: 8



  1%|          | 8/1100 [01:13<2:46:23,  9.14s/it][A

Scraping images for class "armor"
Multiprocessing workers: 8



  1%|          | 9/1100 [01:21<2:37:34,  8.67s/it][A

Scraping images for class "chador"
Multiprocessing workers: 8



  1%|          | 10/1100 [01:33<2:59:09,  9.86s/it][A

Scraping images for class "guest of honor"
Multiprocessing workers: 8



  1%|          | 11/1100 [01:45<3:10:07, 10.48s/it][A

Scraping images for class "bonefish"
Multiprocessing workers: 8



  1%|          | 12/1100 [01:59<3:25:06, 11.31s/it][A

Scraping images for class "rowel"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 24234.0 urls with 16682.0 successes
68.83717091689363% success rate for is_flickr urls 
0.98786057528082 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 24234.0 urls with 16682.0 successes
68.83717091689363% success rate for all urls 
0.9878614621544868 seconds spent per all succesful image download



  1%|          | 13/1100 [02:52<7:11:42, 23.83s/it][A

Scraping images for class "cowpea"
Multiprocessing workers: 8



  1%|▏         | 14/1100 [03:27<8:16:10, 27.41s/it][A

Scraping images for class "parfait"
Multiprocessing workers: 8



  1%|▏         | 15/1100 [04:11<9:45:19, 32.37s/it][A

Scraping images for class "bouillon"
Multiprocessing workers: 8



  1%|▏         | 16/1100 [04:46<9:55:55, 32.98s/it][A

Scraping images for class "flying jib"
Multiprocessing workers: 8



  2%|▏         | 17/1100 [05:07<8:54:21, 29.60s/it][A

Scraping images for class "White Russian"
Multiprocessing workers: 8



  2%|▏         | 18/1100 [05:19<7:15:34, 24.15s/it][A

Scraping images for class "astrolabe"
Multiprocessing workers: 8



  2%|▏         | 19/1100 [05:22<5:18:45, 17.69s/it][A

Scraping images for class "glow lamp"
Multiprocessing workers: 8



  2%|▏         | 20/1100 [05:33<4:46:10, 15.90s/it][A

Scraping images for class "Melkite"
Multiprocessing workers: 8



  2%|▏         | 21/1100 [05:40<3:58:40, 13.27s/it][A

Scraping images for class "hydrometer"
Multiprocessing workers: 8



  2%|▏         | 22/1100 [05:43<3:00:23, 10.04s/it][A

Scraping images for class "marsh plant"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 24484.0 urls with 16827.0 successes
68.72651527528181% success rate for is_flickr urls 
0.9910123025470574 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 24485.0 urls with 16827.0 successes
68.72370839289361% success rate for all urls 
0.9910182807118496 seconds spent per all succesful image download



  2%|▏         | 23/1100 [05:46<2:21:09,  7.86s/it][A

Scraping images for class "oyster mushroom"
Multiprocessing workers: 8



  2%|▏         | 24/1100 [05:53<2:19:41,  7.79s/it][A

Scraping images for class "maple"
Multiprocessing workers: 8



  2%|▏         | 25/1100 [05:56<1:52:24,  6.27s/it][A

Scraping images for class "entrepreneur"
Multiprocessing workers: 8



  2%|▏         | 26/1100 [06:14<2:54:09,  9.73s/it][A

Scraping images for class "metasequoia"
Multiprocessing workers: 8



  2%|▏         | 27/1100 [06:26<3:09:48, 10.61s/it][A

Scraping images for class "level"
Multiprocessing workers: 8



  3%|▎         | 28/1100 [06:40<3:23:35, 11.40s/it][A

Scraping images for class "foreground"
Multiprocessing workers: 8



  3%|▎         | 29/1100 [06:48<3:06:15, 10.43s/it][A

Scraping images for class "linseed"
Multiprocessing workers: 8



  3%|▎         | 30/1100 [07:00<3:17:36, 11.08s/it][A

Scraping images for class "rig"
Multiprocessing workers: 8



  3%|▎         | 31/1100 [07:22<4:13:56, 14.25s/it][A

Scraping images for class "engine"
Scraping images for class "shunt"
Multiprocessing workers: 8



  3%|▎         | 33/1100 [07:24<3:01:20, 10.20s/it][A

Scraping images for class "Easter egg"
Multiprocessing workers: 8



  3%|▎         | 34/1100 [07:56<4:59:26, 16.85s/it][A

Scraping images for class "coatdress"
Multiprocessing workers: 8



  3%|▎         | 35/1100 [08:15<5:08:53, 17.40s/it][A

Scraping images for class "American wistaria"
Multiprocessing workers: 8



  3%|▎         | 36/1100 [08:33<5:13:03, 17.65s/it][A

Scraping images for class "Mexican sunflower"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 24734.0 urls with 16990.0 successes
68.6908708660144% success rate for is_flickr urls 
0.9918604595090025 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 24734.0 urls with 16990.0 successes
68.6908708660144% success rate for all urls 
0.9918612851613826 seconds spent per all succesful image download



  3%|▎         | 37/1100 [09:00<6:02:28, 20.46s/it][A

Scraping images for class "pavilion"
Multiprocessing workers: 8



  3%|▎         | 38/1100 [09:38<7:36:33, 25.79s/it][A

Scraping images for class "imperialist"
Multiprocessing workers: 8



  4%|▎         | 39/1100 [10:01<7:22:17, 25.01s/it][A

Scraping images for class "centrist"
Multiprocessing workers: 8



  4%|▎         | 40/1100 [10:04<5:21:37, 18.21s/it][A

Scraping images for class "Minuteman"
Multiprocessing workers: 8



  4%|▎         | 41/1100 [10:11<4:23:12, 14.91s/it][A

Scraping images for class "sequencer"
Multiprocessing workers: 8



  4%|▍         | 42/1100 [10:23<4:07:04, 14.01s/it][A

Scraping images for class "bread dough"
Multiprocessing workers: 8



  4%|▍         | 43/1100 [10:26<3:08:00, 10.67s/it][A

Scraping images for class "alehouse"
Multiprocessing workers: 8



  4%|▍         | 44/1100 [10:29<2:27:33,  8.38s/it][A

Scraping images for class "cock of the rock"
Multiprocessing workers: 8



  4%|▍         | 45/1100 [10:36<2:22:02,  8.08s/it][A

Scraping images for class "asp"
Multiprocessing workers: 8



  4%|▍         | 46/1100 [10:38<1:51:00,  6.32s/it][A

Scraping images for class "gasmask"
Multiprocessing workers: 8



  4%|▍         | 47/1100 [10:46<1:58:30,  6.75s/it][A

Scraping images for class "bass guitar"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 24984.0 urls with 17160.0 successes
68.68395773294908% success rate for is_flickr urls 
0.9900430772965882 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 24984.0 urls with 17160.0 successes
68.68395773294908% success rate for all urls 
0.9900435223584965 seconds spent per all succesful image download



  4%|▍         | 48/1100 [10:59<2:31:07,  8.62s/it][A

Scraping images for class "poppy"
Multiprocessing workers: 8



  4%|▍         | 49/1100 [11:12<2:51:32,  9.79s/it][A

Scraping images for class "panther"
Multiprocessing workers: 8



  5%|▍         | 50/1100 [11:20<2:41:54,  9.25s/it][A

Scraping images for class "ridge"
Multiprocessing workers: 8



  5%|▍         | 51/1100 [11:23<2:11:46,  7.54s/it][A

Scraping images for class "good-king-henry"
Multiprocessing workers: 8



  5%|▍         | 52/1100 [11:35<2:34:02,  8.82s/it][A

Scraping images for class "sculptor"
Multiprocessing workers: 8



  5%|▍         | 53/1100 [11:43<2:29:06,  8.54s/it][A

Scraping images for class "wild boar"
Multiprocessing workers: 8



  5%|▍         | 54/1100 [12:07<3:48:59, 13.14s/it][A

Scraping images for class "boxcar"
Multiprocessing workers: 8



  5%|▌         | 55/1100 [12:09<2:54:06, 10.00s/it][A

Scraping images for class "bitewing"
Multiprocessing workers: 8



  5%|▌         | 56/1100 [12:16<2:37:09,  9.03s/it][A

Scraping images for class "buckram"
Multiprocessing workers: 8



  5%|▌         | 57/1100 [12:40<3:53:54, 13.46s/it][A

Scraping images for class "valerian"
Multiprocessing workers: 8



  5%|▌         | 58/1100 [13:00<4:27:15, 15.39s/it][A

Scraping images for class "television antenna"
Multiprocessing workers: 8



  5%|▌         | 59/1100 [13:18<4:39:23, 16.10s/it][A

Scraping images for class "climbing frame"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 25239.0 urls with 17326.0 successes
68.64772772296843% success rate for is_flickr urls 
0.9892493763408112 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 25239.0 urls with 17326.0 successes
68.64772772296843% success rate for all urls 
0.9892495519278521 seconds spent per all succesful image download



  5%|▌         | 60/1100 [14:11<7:52:05, 27.24s/it][A

Scraping images for class "cornetfish"
Multiprocessing workers: 8



  6%|▌         | 61/1100 [14:52<9:05:11, 31.48s/it][A

Scraping images for class "helminth"
Multiprocessing workers: 8



  6%|▌         | 62/1100 [15:30<9:35:59, 33.29s/it][A

Scraping images for class "Tartuffe"
Multiprocessing workers: 8



  6%|▌         | 63/1100 [15:41<7:39:46, 26.60s/it][A

Scraping images for class "header"
Multiprocessing workers: 8



  6%|▌         | 64/1100 [15:52<6:22:16, 22.14s/it][A

Scraping images for class "push button"
Multiprocessing workers: 8



  6%|▌         | 65/1100 [15:56<4:43:51, 16.46s/it][A

Scraping images for class "kneeler"
Multiprocessing workers: 8



  6%|▌         | 66/1100 [16:08<4:20:12, 15.10s/it][A

Scraping images for class "impala lily"
Multiprocessing workers: 8



  6%|▌         | 67/1100 [16:11<3:17:42, 11.48s/it][A

Scraping images for class "plum sauce"
Multiprocessing workers: 8



  6%|▌         | 68/1100 [16:19<3:00:13, 10.48s/it][A

Scraping images for class "drinking vessel"
Multiprocessing workers: 8



  6%|▋         | 69/1100 [16:22<2:21:17,  8.22s/it][A

Scraping images for class "common fennel"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 25484.0 urls with 17472.0 successes
68.56066551561764% success rate for is_flickr urls 
0.9913014950906183 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 25484.0 urls with 17472.0 successes
68.56066551561764% success rate for all urls 
0.9913018761891803 seconds spent per all succesful image download



  6%|▋         | 70/1100 [16:30<2:22:04,  8.28s/it][A

Scraping images for class "green alder"
Multiprocessing workers: 8



  6%|▋         | 71/1100 [16:33<1:52:06,  6.54s/it][A

Scraping images for class "espionage agent"
Multiprocessing workers: 8



  7%|▋         | 72/1100 [16:34<1:28:14,  5.15s/it][A

Scraping images for class "autopilot"
Multiprocessing workers: 8



  7%|▋         | 73/1100 [16:43<1:45:46,  6.18s/it][A

Scraping images for class "motor vehicle"
Multiprocessing workers: 8



  7%|▋         | 74/1100 [16:52<1:59:29,  6.99s/it][A

Scraping images for class "footlocker"
Multiprocessing workers: 8



  7%|▋         | 75/1100 [16:54<1:35:47,  5.61s/it][A

Scraping images for class "halibut"
Multiprocessing workers: 8



  7%|▋         | 76/1100 [16:57<1:21:53,  4.80s/it][A

Scraping images for class "newel post"
Multiprocessing workers: 8



  7%|▋         | 77/1100 [17:05<1:35:45,  5.62s/it][A

Scraping images for class "martello tower"
Multiprocessing workers: 8



  7%|▋         | 78/1100 [17:08<1:23:25,  4.90s/it][A

Scraping images for class "toy"
Multiprocessing workers: 8



  7%|▋         | 79/1100 [17:15<1:34:44,  5.57s/it][A

Scraping images for class "flintlock"
Multiprocessing workers: 8



  7%|▋         | 80/1100 [17:24<1:51:35,  6.56s/it][A

Scraping images for class "Hungarian sauce"
Multiprocessing workers: 8



  7%|▋         | 81/1100 [17:26<1:30:26,  5.32s/it][A

Scraping images for class "smut grass"
Multiprocessing workers: 8



  7%|▋         | 82/1100 [17:39<2:08:02,  7.55s/it][A

Scraping images for class "axseed"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 25737.0 urls with 17667.0 successes
68.64436414500524% success rate for is_flickr urls 
0.9849582590500554 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 25737.0 urls with 17667.0 successes
68.64436414500524% success rate for all urls 
0.9849585746878065 seconds spent per all succesful image download



  8%|▊         | 83/1100 [18:32<5:59:49, 21.23s/it][A

Scraping images for class "blunderbuss"
Multiprocessing workers: 8



  8%|▊         | 84/1100 [19:06<7:00:36, 24.84s/it][A

Scraping images for class "chili"
Multiprocessing workers: 8



  8%|▊         | 85/1100 [19:29<6:54:21, 24.49s/it][A

Scraping images for class "flatfish"
Multiprocessing workers: 8



  8%|▊         | 86/1100 [19:52<6:45:29, 23.99s/it][A

Scraping images for class "lentil"
Multiprocessing workers: 8



  8%|▊         | 87/1100 [19:55<4:59:08, 17.72s/it][A

Scraping images for class "futon"
Multiprocessing workers: 8



  8%|▊         | 88/1100 [20:38<7:07:47, 25.36s/it][A

Scraping images for class "bustle"
Multiprocessing workers: 8



  8%|▊         | 89/1100 [21:02<7:00:42, 24.97s/it][A

Scraping images for class "rotgut"
Multiprocessing workers: 8



  8%|▊         | 90/1100 [21:21<6:25:42, 22.91s/it][A

Scraping images for class "building"
Multiprocessing workers: 8



  8%|▊         | 91/1100 [21:24<4:48:02, 17.13s/it][A

Scraping images for class "pelagic bird"
Multiprocessing workers: 8



  8%|▊         | 92/1100 [21:42<4:53:30, 17.47s/it][A

Scraping images for class "Fauve"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 25984.0 urls with 17825.0 successes
68.59990763546799% success rate for is_flickr urls 
0.990519282366417 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 25984.0 urls with 17825.0 successes
68.59990763546799% success rate for all urls 
0.9905201314308199 seconds spent per all succesful image download



  8%|▊         | 93/1100 [22:32<7:32:37, 26.97s/it][A

Scraping images for class "pumpkin"
Multiprocessing workers: 8



  9%|▊         | 94/1100 [23:17<9:02:36, 32.36s/it][A

Scraping images for class "rockfish"
Multiprocessing workers: 8



  9%|▊         | 95/1100 [23:41<8:22:01, 29.97s/it][A

Scraping images for class "newsstand"
Multiprocessing workers: 8



  9%|▊         | 96/1100 [24:16<8:45:33, 31.41s/it][A

Scraping images for class "generator"
Scraping images for class "cork tree"
Multiprocessing workers: 8



  9%|▉         | 98/1100 [24:50<7:33:22, 27.15s/it][A

Scraping images for class "wild madder"
Multiprocessing workers: 8



  9%|▉         | 99/1100 [25:30<8:35:28, 30.90s/it][A

Scraping images for class "king penguin"
Multiprocessing workers: 8



  9%|▉         | 100/1100 [26:20<10:10:35, 36.64s/it][A

Scraping images for class "domino"
Multiprocessing workers: 8



  9%|▉         | 101/1100 [26:54<9:58:25, 35.94s/it] [A

Scraping images for class "apple mint"
Multiprocessing workers: 8



  9%|▉         | 102/1100 [27:19<9:03:11, 32.66s/it][A

Scraping images for class "cigar smoker"
Multiprocessing workers: 8



  9%|▉         | 103/1100 [28:02<9:53:31, 35.72s/it][A

Scraping images for class "long trousers"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 26239.0 urls with 17975.0 successes
68.50489729029307% success rate for is_flickr urls 
1.0021736670535197 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 26239.0 urls with 17975.0 successes
68.50489729029307% success rate for all urls 
1.0021738271089853 seconds spent per all succesful image download



  9%|▉         | 104/1100 [28:36<9:43:14, 35.13s/it][A

Scraping images for class "fishhook"
Multiprocessing workers: 8



 10%|▉         | 105/1100 [28:49<7:53:02, 28.53s/it][A

Scraping images for class "solar heater"
Multiprocessing workers: 8



 10%|▉         | 106/1100 [29:23<8:19:04, 30.13s/it][A

Scraping images for class "textile screw pine"
Multiprocessing workers: 8



 10%|▉         | 107/1100 [29:33<6:39:21, 24.13s/it][A

Scraping images for class "mandarin"
Multiprocessing workers: 8



 10%|▉         | 108/1100 [29:57<6:37:46, 24.06s/it][A

Scraping images for class "radio compass"
Multiprocessing workers: 8



 10%|▉         | 109/1100 [30:30<7:22:21, 26.78s/it][A

Scraping images for class "darling"
Multiprocessing workers: 8



 10%|█         | 110/1100 [31:05<8:01:25, 29.18s/it][A

Scraping images for class "gazpacho"
Multiprocessing workers: 8



 10%|█         | 111/1100 [32:00<10:12:36, 37.16s/it][A

Scraping images for class "copper"
Multiprocessing workers: 8



 10%|█         | 112/1100 [32:23<9:02:04, 32.92s/it] [A

Scraping images for class "moon shell"
Multiprocessing workers: 8



 10%|█         | 113/1100 [33:12<10:17:36, 37.54s/it][A

Scraping images for class "mechanical device"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 26485.0 urls with 18126.0 successes
68.4387389088163% success rate for is_flickr urls 
1.0114728516331706 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 26485.0 urls with 18126.0 successes
68.4387389088163% success rate for all urls 
1.0114731709320235 seconds spent per all succesful image download



 10%|█         | 114/1100 [34:06<11:38:53, 42.53s/it][A

Scraping images for class "godfather"
Multiprocessing workers: 8



 10%|█         | 115/1100 [34:39<10:52:39, 39.76s/it][A

Scraping images for class "stun gun"
Multiprocessing workers: 8



 11%|█         | 116/1100 [35:39<12:29:16, 45.69s/it][A

Scraping images for class "cyme"



 11%|█         | 117/1100 [35:59<10:22:48, 38.01s/it][A

Scraping images for class "common matrimony vine"
Multiprocessing workers: 8



 11%|█         | 118/1100 [36:33<10:03:48, 36.89s/it][A

Scraping images for class "pants suit"
Multiprocessing workers: 8



 11%|█         | 119/1100 [36:47<8:08:58, 29.91s/it] [A

Scraping images for class "parasail"
Multiprocessing workers: 8



 11%|█         | 120/1100 [37:10<7:37:00, 27.98s/it][A

Scraping images for class "roughcast"
Multiprocessing workers: 8



 11%|█         | 121/1100 [37:43<8:00:54, 29.47s/it][A

Scraping images for class "sailfish"
Multiprocessing workers: 8



 11%|█         | 122/1100 [38:27<9:09:27, 33.71s/it][A

Scraping images for class "yellowwood"
Multiprocessing workers: 8



 11%|█         | 123/1100 [39:10<9:57:14, 36.68s/it][A

Scraping images for class "scholar"
Multiprocessing workers: 8



 11%|█▏        | 124/1100 [39:41<9:26:32, 34.83s/it][A

Scraping images for class "beeper"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 26734.0 urls with 18265.0 successes
68.32123887184858% success rate for is_flickr urls 
1.0256378097727368 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 26734.0 urls with 18265.0 successes
68.32123887184858% success rate for all urls 
1.0256383677883862 seconds spent per all succesful image download



 11%|█▏        | 125/1100 [40:19<9:40:03, 35.70s/it][A

Scraping images for class "snow bunting"
Multiprocessing workers: 8



 11%|█▏        | 126/1100 [40:52<9:30:21, 35.14s/it][A

Scraping images for class "earphone"
Multiprocessing workers: 8



 12%|█▏        | 127/1100 [41:18<8:43:57, 32.31s/it][A

Scraping images for class "sandpiper"



 12%|█▏        | 128/1100 [41:38<7:44:06, 28.65s/it][A

Scraping images for class "goldfields"
Multiprocessing workers: 8



 12%|█▏        | 129/1100 [41:51<6:26:22, 23.87s/it][A

Scraping images for class "ditty bag"
Multiprocessing workers: 8



 12%|█▏        | 130/1100 [42:13<6:16:26, 23.29s/it][A

Scraping images for class "schnapps"
Multiprocessing workers: 8



 12%|█▏        | 131/1100 [42:46<7:02:39, 26.17s/it][A

Scraping images for class "queen"
Multiprocessing workers: 8



 12%|█▏        | 132/1100 [42:58<5:54:50, 21.99s/it][A

Scraping images for class "cittern"
Multiprocessing workers: 8



 12%|█▏        | 133/1100 [43:31<6:47:49, 25.31s/it][A

Scraping images for class "sunflower"
Multiprocessing workers: 8



 12%|█▏        | 134/1100 [44:01<7:07:27, 26.55s/it][A

Scraping images for class "analog watch"
Multiprocessing workers: 8



 12%|█▏        | 135/1100 [44:50<8:56:12, 33.34s/it][A

Scraping images for class "reptile"
Multiprocessing workers: 8



 12%|█▏        | 136/1100 [45:34<9:46:38, 36.51s/it][A

Scraping images for class "yellowlegs"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 26984.0 urls with 18427.0 successes
68.28861547583753% success rate for is_flickr urls 
1.0352695975849915 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 26985.0 urls with 18427.0 successes
68.28608486196035% success rate for all urls 
1.0352704163749704 seconds spent per all succesful image download



 12%|█▏        | 137/1100 [45:47<7:54:40, 29.57s/it][A

Scraping images for class "ruddy duck"
Multiprocessing workers: 8



 13%|█▎        | 138/1100 [46:10<7:24:32, 27.73s/it][A

Scraping images for class "jaguarundi"
Multiprocessing workers: 8



 13%|█▎        | 139/1100 [46:34<7:02:28, 26.38s/it][A

Scraping images for class "bench press"
Multiprocessing workers: 8



 13%|█▎        | 140/1100 [46:47<5:57:13, 22.33s/it][A

Scraping images for class "mocha"
Multiprocessing workers: 8



 13%|█▎        | 141/1100 [47:19<6:44:06, 25.28s/it][A

Scraping images for class "wild rosemary"
Multiprocessing workers: 8



 13%|█▎        | 142/1100 [47:39<6:18:48, 23.73s/it][A

Scraping images for class "altarpiece"
Multiprocessing workers: 8



 13%|█▎        | 143/1100 [48:15<7:18:06, 27.47s/it][A

Scraping images for class "wimp"
Multiprocessing workers: 8



 13%|█▎        | 144/1100 [49:20<10:16:08, 38.67s/it][A

Scraping images for class "beige"
Multiprocessing workers: 8



 13%|█▎        | 145/1100 [49:54<9:53:10, 37.27s/it] [A

Scraping images for class "framework"
Multiprocessing workers: 8



 13%|█▎        | 146/1100 [50:27<9:33:27, 36.07s/it][A

Scraping images for class "tree sparrow"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 27235.0 urls with 18569.0 successes
68.18064989902699% success rate for is_flickr urls 
1.0425550645680026 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 27235.0 urls with 18569.0 successes
68.18064989902699% success rate for all urls 
1.0425553864054793 seconds spent per all succesful image download



 13%|█▎        | 147/1100 [50:42<7:51:06, 29.66s/it][A

Scraping images for class "cabin car"
Multiprocessing workers: 8



 13%|█▎        | 148/1100 [51:01<7:01:09, 26.54s/it][A

Scraping images for class "Leonberg"
Multiprocessing workers: 8



 14%|█▎        | 149/1100 [51:05<5:12:38, 19.72s/it][A

Scraping images for class "stateroom"
Multiprocessing workers: 8



 14%|█▎        | 150/1100 [51:23<5:06:46, 19.37s/it][A

Scraping images for class "appointee"
Multiprocessing workers: 8



 14%|█▎        | 151/1100 [51:27<3:50:25, 14.57s/it][A

Scraping images for class "bottle-nosed whale"
Multiprocessing workers: 8



 14%|█▍        | 152/1100 [51:59<5:13:08, 19.82s/it][A

Scraping images for class "jewelweed"
Multiprocessing workers: 8



 14%|█▍        | 153/1100 [52:12<4:43:11, 17.94s/it][A

Scraping images for class "loft"
Multiprocessing workers: 8



 14%|█▍        | 154/1100 [52:20<3:53:31, 14.81s/it][A

Scraping images for class "gooseneck loosestrife"
Multiprocessing workers: 8



 14%|█▍        | 155/1100 [52:33<3:45:23, 14.31s/it][A

Scraping images for class "ewe"
Multiprocessing workers: 8



 14%|█▍        | 156/1100 [52:47<3:41:57, 14.11s/it][A

Scraping images for class "vanilla pudding"
Multiprocessing workers: 8



 14%|█▍        | 157/1100 [52:54<3:09:53, 12.08s/it][A

Scraping images for class "garden trowel"
Multiprocessing workers: 8



 14%|█▍        | 158/1100 [53:17<3:59:10, 15.23s/it][A

Scraping images for class "eastern indigo snake"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 27491.0 urls with 18749.0 successes
68.20050198246699% success rate for is_flickr urls 
1.0415477622851759 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 27491.0 urls with 18749.0 successes
68.20050198246699% success rate for all urls 
1.0415480804351802 seconds spent per all succesful image download



 14%|█▍        | 159/1100 [53:28<3:41:55, 14.15s/it][A

Scraping images for class "black currant"
Multiprocessing workers: 8



 15%|█▍        | 160/1100 [54:03<5:20:01, 20.43s/it][A

Scraping images for class "cheese pizza"
Multiprocessing workers: 8



 15%|█▍        | 161/1100 [54:44<6:52:22, 26.35s/it][A

Scraping images for class "raccoon"
Multiprocessing workers: 8



 15%|█▍        | 162/1100 [55:32<8:37:16, 33.09s/it][A

Scraping images for class "maar"
Multiprocessing workers: 8



 15%|█▍        | 163/1100 [55:44<6:57:20, 26.72s/it][A

Scraping images for class "poop deck"
Multiprocessing workers: 8



 15%|█▍        | 164/1100 [56:09<6:48:33, 26.19s/it][A

Scraping images for class "showplace"
Multiprocessing workers: 8



 15%|█▌        | 165/1100 [56:22<5:44:14, 22.09s/it][A

Scraping images for class "scented fern"
Multiprocessing workers: 8



 16%|█▌        | 172/1100 [58:25<5:20:45, 20.74s/it][A

Scraping images for class "skewer"
Multiprocessing workers: 8



 16%|█▌        | 173/1100 [59:12<7:17:53, 28.34s/it][A

Scraping images for class "club moss"
Multiprocessing workers: 8



 16%|█▌        | 174/1100 [1:00:17<10:09:29, 39.49s/it][A

Scraping images for class "cauliflower"
Scraping images for class "tautog"
Multiprocessing workers: 8



 16%|█▌        | 176/1100 [1:00:51<8:23:58, 32.73s/it] [A

Scraping images for class "wall unit"
Multiprocessing workers: 8



 16%|█▌        | 177/1100 [1:01:34<9:10:49, 35.81s/it][A

Scraping images for class "spritsail"
Multiprocessing workers: 8



 16%|█▌        | 178/1100 [1:01:37<6:38:57, 25.96s/it][A

Scraping images for class "angelica"
Multiprocessing workers: 8



 16%|█▋        | 179/1100 [1:01:51<5:43:40, 22.39s/it][A

Scraping images for class "stealth aircraft"
Multiprocessing workers: 8



 16%|█▋        | 180/1100 [1:01:59<4:37:54, 18.12s/it][A

Scraping images for class "cinnamon"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 27987.0 urls with 19050.0 successes
68.06731696859256% success rate for is_flickr urls 
1.0525533380283145 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 27987.0 urls with 19050.0 successes
68.06731696859256% success rate for all urls 
1.0525536612012567 seconds spent per all succesful image download



 16%|█▋        | 181/1100 [1:02:12<4:14:11, 16.60s/it][A

Scraping images for class "traveler"
Multiprocessing workers: 8



 17%|█▋        | 182/1100 [1:02:36<4:46:29, 18.72s/it][A

Scraping images for class "dent corn"
Multiprocessing workers: 8



 17%|█▋        | 183/1100 [1:02:44<3:56:53, 15.50s/it][A

Scraping images for class "tractor"
Multiprocessing workers: 8



 17%|█▋        | 184/1100 [1:02:58<3:49:35, 15.04s/it][A

Scraping images for class "electronic device"
Multiprocessing workers: 8



 17%|█▋        | 185/1100 [1:03:18<4:13:08, 16.60s/it][A

Scraping images for class "suitor"
Multiprocessing workers: 8



 17%|█▋        | 186/1100 [1:03:41<4:41:36, 18.49s/it][A

Scraping images for class "foot rule"
Multiprocessing workers: 8



 17%|█▋        | 187/1100 [1:03:54<4:18:35, 16.99s/it][A

Scraping images for class "hazelnut"
Multiprocessing workers: 8



 17%|█▋        | 188/1100 [1:04:21<5:02:08, 19.88s/it][A

Scraping images for class "large crabgrass"
Multiprocessing workers: 8



 17%|█▋        | 189/1100 [1:04:44<5:14:46, 20.73s/it][A

Scraping images for class "showy orchis"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 28234.0 urls with 19190.0 successes
67.96769851951548% success rate for is_flickr urls 
1.054519914574397 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 28234.0 urls with 19190.0 successes
67.96769851951548% success rate for all urls 
1.054520197744603 seconds spent per all succesful image download



 17%|█▋        | 190/1100 [1:05:22<6:35:46, 26.10s/it][A

Scraping images for class "cheese souffle"
Multiprocessing workers: 8



 17%|█▋        | 191/1100 [1:06:07<7:59:13, 31.63s/it][A

Scraping images for class "battle dress"
Multiprocessing workers: 8



 17%|█▋        | 192/1100 [1:06:51<8:57:00, 35.49s/it][A

Scraping images for class "striped hyena"
Multiprocessing workers: 8



 18%|█▊        | 193/1100 [1:06:55<6:32:05, 25.94s/it][A

Scraping images for class "Guinean"
Multiprocessing workers: 8



 18%|█▊        | 194/1100 [1:07:26<6:54:39, 27.46s/it][A

Scraping images for class "meatball"
Multiprocessing workers: 8



 18%|█▊        | 195/1100 [1:07:29<5:02:53, 20.08s/it][A

Scraping images for class "escapement"
Multiprocessing workers: 8



 18%|█▊        | 196/1100 [1:07:37<4:09:37, 16.57s/it][A

Scraping images for class "sprit"
Multiprocessing workers: 8



 18%|█▊        | 197/1100 [1:07:49<3:48:53, 15.21s/it][A

Scraping images for class "king crab"
Multiprocessing workers: 8



 18%|█▊        | 198/1100 [1:07:52<2:53:04, 11.51s/it][A

Scraping images for class "skin"
Multiprocessing workers: 8



 18%|█▊        | 199/1100 [1:08:12<3:28:26, 13.88s/it][A

Scraping images for class "backstay"
Multiprocessing workers: 8



 18%|█▊        | 200/1100 [1:08:15<2:38:50, 10.59s/it][A

Scraping images for class "Kahlua"
Multiprocessing workers: 8



 18%|█▊        | 201/1100 [1:08:22<2:25:09,  9.69s/it][A

Scraping images for class "socket wrench"
Multiprocessing workers: 8

Scraping stats:
STATS For class is_flickr:
 tried 28484.0 urls with 19368.0 successes
67.99606796798203% success rate for is_flickr urls 
1.0550331770388954 seconds spent per is_flickr succesful image download
STATS For class not_flickr:
 tried 0.0 urls with 0.0 successes
STATS For class all:
 tried 28484.0 urls with 19368.0 successes
67.99606796798203% success rate for all urls 
1.055033887186015 seconds spent per all succesful image download



 18%|█▊        | 202/1100 [1:08:43<3:16:37, 13.14s/it][A

Scraping images for class "thrips"
Multiprocessing workers: 8



 18%|█▊        | 203/1100 [1:08:45<2:26:13,  9.78s/it][A

Scraping images for class "lyme grass"
Multiprocessing workers: 8



100%|██████████| 1100/1100 [1:08:57<00:00,  3.76s/it] [A

Scraping images for class "milk float"
Scraping images for class "fire tongs"
Scraping images for class "chamois"
Scraping images for class "row house"
Scraping images for class "banded krait"
Scraping images for class "hand mower"
Scraping images for class "gopher"
Scraping images for class "convolvulus"
Scraping images for class "clarified butter"
Scraping images for class "Coigue"
Scraping images for class "lupine"
Scraping images for class "flowerbed"
Scraping images for class "Mountie"
Scraping images for class "hot sauce"
Scraping images for class "solar dish"
Scraping images for class "great grandchild"
Scraping images for class "Scandinavian"
Scraping images for class "ticking"
Scraping images for class "dishtowel"
Scraping images for class "palomino"
Scraping images for class "toasting fork"
Scraping images for class "water gauge"
Scraping images for class "garland flower"
Scraping images for class "Wilson's snipe"
Scraping images for class "European swift"
Scraping images for


