# Download ImageNet images by Wordnet ID
Modified from https://github.com/kjaisingh/hardhat-detector/blob/master/download-neg-images.py
Added 
-  Count of images and reporting
-  Added continue on bad image (poisoned my .rec image file!)
-  Parameterise the root folder and class for images


In [2]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Feb 18 11:42:56 2018
@author: KaranJaisingh
Updated by Steve Hunter Fri Aug 24
 Count of images and reporting
 Added continue on bad image (poisoned my .rec image file!)
 Parameterise the root folder and class for images
 Exclude small images (likely to be no longer available)
"""

import cv2
from six.moves import urllib
import numpy as np
import os

def store_raw_images(pic_limit, img_path, img_class, imagenet_wnid):
    # Get wnid from Imagenet
    image_link = 'http://image-net.org/api/text/imagenet.synset.geturls?wnid='+imagenet_wnid
    image_urls = urllib.request.urlopen(image_link).read().decode()
    #name of path

    if not os.path.exists(img_path+img_class):
        os.makedirs(img_path+img_class)
   
    pic_num = 1
    while(pic_num < pic_limit):
        for i in image_urls.split('\n'):
            try:
                print(i)
                # next line seems to protect against bad reads, otherwise
                # urlretrieve hangs
                print(pic_num,". ", urllib.request.urlopen(i, data=None, timeout=1))
                urllib.request.urlretrieve(i, img_path+img_class +str(pic_num)+'.jpg')
                img = cv2.imread(img_path+img_class +str(pic_num)+'.jpg')
                resized_image = cv2.resize(img, (300, 300))
                cv2.imwrite(img_path+img_class+str(pic_num)+'.jpg', resized_image)
                pic_num += 1

            except Exception as e:
                print(str(e))
            if (pic_num > pic_limit):
                break
    print("Downloaded", pic_limit, "images written to ", img_path+img_class)
    # delete images that no longer exist,usually <8k bytes
    # See https://superuser.com/questions/644272/how-do-i-delete-all-files-smaller-than-a-certain-size-in-all-subfolders
    # '-8k' means 'less than 8k bytes'
    print("Deleted these files under 8k, may no longer exist at the URL given by Imagenet:")
    !find "$img_path"/"$img_class" -name "*.*" -size -8k 
    !find "$img_path"/"$img_class" -name "*.*" -size -8k -delete




In [3]:
# Get images
pic_limit = 1000
img_path = "./images/"
img_class = "nohelmet/"
# 'People' in Imagenet, http://image-net.org/synset?wnid=n07942152

imagenet_wnid="n07942152" # people (not wearing helmets)


store_raw_images(pic_limit, img_path, img_class, imagenet_wnid)






http://farm4.static.flickr.com/3186/2606823711_b07495378f.jpg
1 .  <http.client.HTTPResponse object at 0x7f3653f94390>
http://farm4.static.flickr.com/3360/3236502886_6a505502c5.jpg
2 .  <http.client.HTTPResponse object at 0x7f3654007dd8>
http://farm5.static.flickr.com/4022/4322083945_bc2db8ccf3.jpg
3 .  <http.client.HTTPResponse object at 0x7f3653f943c8>
http://farm4.static.flickr.com/3121/2556445732_0f905b11e4.jpg
4 .  <http.client.HTTPResponse object at 0x7f3653fa3588>
http://farm4.static.flickr.com/3551/3443533285_2a70b92f2c.jpg
5 .  <http.client.HTTPResponse object at 0x7f3653fa3c88>
http://farm1.static.flickr.com/224/491407069_8dbdc006c4.jpg
6 .  <http.client.HTTPResponse object at 0x7f3653fa3e48>
http://farm1.static.flickr.com/33/42539714_c899059d27.jpg
7 .  <http.client.HTTPResponse object at 0x7f3653fa36a0>
http://farm4.static.flickr.com/3512/3248952529_0bda5b70eb.jpg
8 .  <http.client.HTTPResponse object at 0x7f3653fa3470>
http://farm3.static.flickr.com/2461/3707202944_74e7a04

http://farm5.static.flickr.com/4065/4311934540_643617ec01.jpg
70 .  <http.client.HTTPResponse object at 0x7f3653fa3518>
http://farm3.static.flickr.com/2249/1805035317_7d0e479673.jpg
71 .  <http.client.HTTPResponse object at 0x7f3653fa30b8>
http://farm1.static.flickr.com/160/437529048_db52384cf0.jpg
72 .  <http.client.HTTPResponse object at 0x7f3654007e10>
http://farm3.static.flickr.com/2280/1838798756_e920544cc9.jpg
73 .  <http.client.HTTPResponse object at 0x7f3654007d68>
http://farm4.static.flickr.com/3419/3848514395_33416dc441.jpg
74 .  <http.client.HTTPResponse object at 0x7f3653fa3240>
http://farm3.static.flickr.com/2470/3927670733_72a58c48d1.jpg
75 .  <http.client.HTTPResponse object at 0x7f3653f943c8>
http://farm3.static.flickr.com/2110/3536285640_43376f8f1a.jpg
76 .  <http.client.HTTPResponse object at 0x7f3653f94320>
http://farm5.static.flickr.com/4043/4261773846_20c25acbec.jpg
77 .  <http.client.HTTPResponse object at 0x7f3653fa3978>
http://farm1.static.flickr.com/13/15775879

http://farm3.static.flickr.com/2223/1524259956_8848536a05.jpg
138 .  <http.client.HTTPResponse object at 0x7f3653fa3080>
http://farm1.static.flickr.com/181/434732576_7764a66567.jpg
139 .  <http.client.HTTPResponse object at 0x7f3653fa3b00>
http://farm4.static.flickr.com/3056/2555520860_94cdd63d2b.jpg
140 .  <http.client.HTTPResponse object at 0x7f3653fa39e8>
http://farm3.static.flickr.com/2459/3735846377_74889226dc.jpg
141 .  <http.client.HTTPResponse object at 0x7f3653fa3518>
http://farm3.static.flickr.com/2720/4034752817_b51eb680d9.jpg
142 .  <http.client.HTTPResponse object at 0x7f3653fa30b8>
http://farm4.static.flickr.com/3195/3060324613_695cfd28b8.jpg
143 .  <http.client.HTTPResponse object at 0x7f3654007e48>
http://farm4.static.flickr.com/3276/2735117083_b13ac31ebc.jpg
144 .  <http.client.HTTPResponse object at 0x7f3654007d68>
http://files.splinder.com/3432b9a6f609ce16a053d0933ad5c011.jpg
<urlopen error [Errno 111] Connection refused>
http://farm3.static.flickr.com/2604/392767143

http://farm4.static.flickr.com/3144/3077097994_b3b6c5328a.jpg
205 .  <http.client.HTTPResponse object at 0x7f3653fa3e80>
http://farm1.static.flickr.com/43/87079379_34e5b4b7ea.jpg
206 .  <http.client.HTTPResponse object at 0x7f3653fa30b8>
http://farm4.static.flickr.com/3490/3796845677_e49c8f28a2.jpg
207 .  <http.client.HTTPResponse object at 0x7f3653fa3b00>
http://farm4.static.flickr.com/3431/3695033249_ab91b25658.jpg
208 .  <http.client.HTTPResponse object at 0x7f3653fa35f8>
http://farm4.static.flickr.com/3005/2580233222_3edcf49f6d.jpg
209 .  <http.client.HTTPResponse object at 0x7f3653fa3828>
http://farm3.static.flickr.com/2180/2485968176_f38a07520c.jpg
210 .  <http.client.HTTPResponse object at 0x7f3653fa38d0>
http://farm4.static.flickr.com/3655/3346428445_ddda41a885.jpg
211 .  <http.client.HTTPResponse object at 0x7f3653fa3550>
http://farm1.static.flickr.com/67/183015584_5ae501b3ec.jpg
212 .  <http.client.HTTPResponse object at 0x7f3653fa3a90>
http://farm1.static.flickr.com/21/28427

274 .  <http.client.HTTPResponse object at 0x7f3653fa3860>
http://farm4.static.flickr.com/3577/3389787804_95e03ce40c.jpg
275 .  <http.client.HTTPResponse object at 0x7f3653fa3160>
http://www.dolnykubin.com/images/kubincan1.jpg
<urlopen error [Errno -3] Temporary failure in name resolution>
http://www.meldonline.org/newblog/wp-content/uploads/2009/03/img_1299.jpg
HTTP Error 403: Forbidden
http://farm3.static.flickr.com/2057/2221306865_5721759c62.jpg
276 .  <http.client.HTTPResponse object at 0x7f3653fa35f8>
http://farm4.static.flickr.com/3057/2575042659_43b3d3530d.jpg
277 .  <http.client.HTTPResponse object at 0x7f3653fa3438>
http://farm4.static.flickr.com/3141/2662373115_bcf57495c4.jpg
278 .  <http.client.HTTPResponse object at 0x7f3653fa3860>
http://farm4.static.flickr.com/3174/2575069119_50e9d43187.jpg
279 .  <http.client.HTTPResponse object at 0x7f3653fa3dd8>
http://farm3.static.flickr.com/2110/3533836494_f50145b3f3.jpg
280 .  <http.client.HTTPResponse object at 0x7f3653fa3860>
http

341 .  <http.client.HTTPResponse object at 0x7f3653fa3a20>
http://farm1.static.flickr.com/67/180979737_006f5c7d43.jpg
342 .  <http.client.HTTPResponse object at 0x7f3653fa3b38>
http://farm4.static.flickr.com/3004/2579415555_7d6ac5af06.jpg
343 .  <http.client.HTTPResponse object at 0x7f3653fa32e8>
http://farm3.static.flickr.com/2780/4191844479_ee7c26f10e.jpg
344 .  <http.client.HTTPResponse object at 0x7f3653fa32b0>
http://farm3.static.flickr.com/2521/3689633275_e1a048a6d4.jpg
345 .  <http.client.HTTPResponse object at 0x7f3653fa3710>
http://farm3.static.flickr.com/2300/2081669162_93eb985ee0.jpg
346 .  <http.client.HTTPResponse object at 0x7f3653fa32b0>
http://farm3.static.flickr.com/2575/3885366714_766f390e19.jpg
347 .  <http.client.HTTPResponse object at 0x7f3653fa3550>
http://farm4.static.flickr.com/3266/3082602094_59147bb016.jpg
348 .  <http.client.HTTPResponse object at 0x7f3653fa3828>
http://farm3.static.flickr.com/2552/3828284255_6fa2bd3248.jpg
349 .  <http.client.HTTPResponse ob

http://farm4.static.flickr.com/3163/2861121410_5c4525c9bd.jpg
410 .  <http.client.HTTPResponse object at 0x7f3653fa3438>
http://farm4.static.flickr.com/3111/3666298069_38fdcf77ac.jpg
411 .  <http.client.HTTPResponse object at 0x7f3654007dd8>
http://farm4.static.flickr.com/3273/2722550210_4c90d876ec.jpg
412 .  <http.client.HTTPResponse object at 0x7f3653fa3240>
http://farm4.static.flickr.com/3152/2641596983_1e9543dabc.jpg
413 .  <http.client.HTTPResponse object at 0x7f3653fa33c8>
http://farm4.static.flickr.com/3588/3686895268_c559a611c0.jpg
414 .  <http.client.HTTPResponse object at 0x7f3653fa3080>
http://farm4.static.flickr.com/3088/3165463437_6b0a65c627.jpg
415 .  <http.client.HTTPResponse object at 0x7f3654007dd8>
http://farm3.static.flickr.com/2584/3898438324_4aea4da521.jpg
416 .  <http.client.HTTPResponse object at 0x7f3653f94278>
http://farm4.static.flickr.com/3086/2575041281_ae4256c801.jpg
417 .  <http.client.HTTPResponse object at 0x7f3653fa38d0>
http://farm3.static.flickr.com/2

http://farm1.static.flickr.com/46/187499006_56ab1d445f.jpg
479 .  <http.client.HTTPResponse object at 0x7f3653fa39b0>
http://farm3.static.flickr.com/2581/3707323008_fc142c1135.jpg
480 .  <http.client.HTTPResponse object at 0x7f3653fa3b38>
http://farm3.static.flickr.com/2331/2080857675_99e47e948b.jpg
481 .  <http.client.HTTPResponse object at 0x7f3653fa3be0>
http://farm4.static.flickr.com/3014/2579408429_f32642c88d.jpg
482 .  <http.client.HTTPResponse object at 0x7f3653fa34a8>
http://farm1.static.flickr.com/131/321915659_e4b69f399c.jpg
483 .  <http.client.HTTPResponse object at 0x7f3653fa3f98>
http://farm3.static.flickr.com/2198/2081668276_14b1c7ea99.jpg
484 .  <http.client.HTTPResponse object at 0x7f3653fa37f0>
http://farm3.static.flickr.com/2038/2080884143_4d17cdafc8.jpg
485 .  <http.client.HTTPResponse object at 0x7f3653fa3fd0>
http://farm5.static.flickr.com/4024/4307683564_84942f92cb.jpg
486 .  <http.client.HTTPResponse object at 0x7f3653fa3748>
http://farm4.static.flickr.com/3222/3

546 .  <http.client.HTTPResponse object at 0x7f3653fa3668>
http://farm4.static.flickr.com/3579/3678811994_9996dd4f55.jpg
547 .  <http.client.HTTPResponse object at 0x7f3653fa3fd0>
http://farm5.static.flickr.com/4043/4287666619_56e3488e21.jpg
548 .  <http.client.HTTPResponse object at 0x7f3653fa3b70>
http://farm4.static.flickr.com/3024/2575054093_5d2e367f8e.jpg
549 .  <http.client.HTTPResponse object at 0x7f3653fa3550>
http://farm4.static.flickr.com/3517/4008968375_d074187258.jpg
550 .  <http.client.HTTPResponse object at 0x7f3653fa3550>
http://farm3.static.flickr.com/2606/4307696982_4d7e322870.jpg
551 .  <http.client.HTTPResponse object at 0x7f3653f94278>
http://farm4.static.flickr.com/3083/3378066037_52419cd53c.jpg
552 .  <http.client.HTTPResponse object at 0x7f3653fa3c88>
http://farm4.static.flickr.com/3184/2978640391_a1056f24b0.jpg
553 .  <http.client.HTTPResponse object at 0x7f3653fa32b0>
http://farm4.static.flickr.com/3612/3615372478_57f413d8d6.jpg
554 .  <http.client.HTTPResponse

http://farm4.static.flickr.com/3057/2612835301_498c17f479.jpg
615 .  <http.client.HTTPResponse object at 0x7f3653fa3208>
http://farm4.static.flickr.com/3620/3546266865_7ef810de11.jpg
616 .  <http.client.HTTPResponse object at 0x7f3653fa3c50>
http://farm3.static.flickr.com/2661/4192603876_0d7224ab0a.jpg
617 .  <http.client.HTTPResponse object at 0x7f3653fa3da0>
http://farm4.static.flickr.com/3016/2316991560_27d0f23c02.jpg
618 .  <http.client.HTTPResponse object at 0x7f3653fa3470>
http://farm4.static.flickr.com/3185/2686469946_a32b70065c.jpg
619 .  <http.client.HTTPResponse object at 0x7f3653fa3898>
http://farm4.static.flickr.com/3333/3556988227_590db06fa7.jpg
620 .  <http.client.HTTPResponse object at 0x7f3653fa3c18>
http://farm1.static.flickr.com/28/92658584_a9a1df6b63.jpg
621 .  <http.client.HTTPResponse object at 0x7f3653f94278>
http://farm3.static.flickr.com/2678/4269270360_e62f9fc0a6.jpg
622 .  <http.client.HTTPResponse object at 0x7f3654007e80>
http://farm4.static.flickr.com/3135/

684 .  <http.client.HTTPResponse object at 0x7f3653f94278>
http://farm3.static.flickr.com/2101/2382565469_ca6b2ca57e.jpg
685 .  <http.client.HTTPResponse object at 0x7f3653fa3128>
http://farm4.static.flickr.com/3172/2868124018_292b286227.jpg
686 .  <http.client.HTTPResponse object at 0x7f3653fa3c18>
http://farm4.static.flickr.com/3271/3082601852_bd0b2771e6.jpg
687 .  <http.client.HTTPResponse object at 0x7f3653fa37f0>
http://farm4.static.flickr.com/3608/3335353708_d7029cca41.jpg
688 .  <http.client.HTTPResponse object at 0x7f3653fa3550>
http://farm3.static.flickr.com/2328/2209597142_717ac73a11.jpg
689 .  <http.client.HTTPResponse object at 0x7f3653fa3a58>
http://farm3.static.flickr.com/2562/3678000623_19e1990b84.jpg
690 .  <http.client.HTTPResponse object at 0x7f3653fa3e80>
http://farm4.static.flickr.com/3119/2777394084_25695b2efe.jpg
691 .  <http.client.HTTPResponse object at 0x7f3653fa3c88>
http://farm3.static.flickr.com/2116/2252829542_79cb6e5641.jpg
692 .  <http.client.HTTPResponse

752 .  <http.client.HTTPResponse object at 0x7f3653fa3588>
http://farm3.static.flickr.com/2618/3860211322_35627154fc.jpg
753 .  <http.client.HTTPResponse object at 0x7f3653fa3da0>
http://farm1.static.flickr.com/31/61042957_a044f2e22e.jpg
754 .  <http.client.HTTPResponse object at 0x7f3653fa3dd8>
http://farm1.static.flickr.com/29/59672744_c86a34a6fd.jpg
755 .  <http.client.HTTPResponse object at 0x7f3653fa32e8>
http://farm1.static.flickr.com/121/280873980_bad73d932f.jpg
756 .  <http.client.HTTPResponse object at 0x7f3653fa32b0>
http://farm2.static.flickr.com/1044/1486260902_8d0a14ffb7.jpg
757 .  <http.client.HTTPResponse object at 0x7f3653fa3eb8>
http://farm3.static.flickr.com/2175/2613669086_ab20575028.jpg
758 .  <http.client.HTTPResponse object at 0x7f3653fa3fd0>
http://farm4.static.flickr.com/3394/3335714092_bc04a7ac0a.jpg
759 .  <http.client.HTTPResponse object at 0x7f3653fa34a8>
http://farm3.static.flickr.com/2137/2089424827_cc01d106e5.jpg
760 .  <http.client.HTTPResponse object at

820 .  <http.client.HTTPResponse object at 0x7f3653fa3c18>
http://farm4.static.flickr.com/3046/2550968796_c7e84cbf79.jpg
821 .  <http.client.HTTPResponse object at 0x7f3653fa3518>
http://farm3.static.flickr.com/2052/2081640198_c87889af66.jpg
822 .  <http.client.HTTPResponse object at 0x7f3653fa3160>
http://farm4.static.flickr.com/3203/3152227012_ed196b6bb5.jpg
823 .  <http.client.HTTPResponse object at 0x7f3653fa3828>
http://farm1.static.flickr.com/35/106758854_1dfb9500fc.jpg
824 .  <http.client.HTTPResponse object at 0x7f3653fa3c88>
http://farm3.static.flickr.com/2620/3868849784_cd7da35402.jpg
825 .  <http.client.HTTPResponse object at 0x7f3653fa3d30>
http://farm4.static.flickr.com/3055/2550145795_966cefb3de.jpg
826 .  <http.client.HTTPResponse object at 0x7f3653fa3748>
http://farm4.static.flickr.com/3092/2555515342_58363e2f28.jpg
827 .  <http.client.HTTPResponse object at 0x7f3654007cf8>
http://farm3.static.flickr.com/2351/2138326772_b2b347bd22.jpg
828 .  <http.client.HTTPResponse ob

888 .  <http.client.HTTPResponse object at 0x7f3653fa3e48>
http://farm3.static.flickr.com/2702/4094857667_cabeed2425.jpg
889 .  <http.client.HTTPResponse object at 0x7f3653f94278>
http://farm3.static.flickr.com/2256/2085676782_e0720cf861.jpg
890 .  <http.client.HTTPResponse object at 0x7f3653fa3ef0>
http://farm3.static.flickr.com/2280/2513665656_b6bd6896ac.jpg
891 .  <http.client.HTTPResponse object at 0x7f3653fa3cf8>
http://farm4.static.flickr.com/3298/3334474157_34293efb13.jpg
892 .  <http.client.HTTPResponse object at 0x7f3653fa39b0>
http://farm5.static.flickr.com/4002/4324178333_880a5a4ba5.jpg
893 .  <http.client.HTTPResponse object at 0x7f3653fa3d30>
http://farm3.static.flickr.com/2007/2436374382_f188d4761c.jpg
894 .  <http.client.HTTPResponse object at 0x7f3653fa3dd8>
http://farm4.static.flickr.com/3261/2840887246_b034665456.jpg
895 .  <http.client.HTTPResponse object at 0x7f3653fa3a58>
http://farm1.static.flickr.com/126/405740696_78ae41c416.jpg
896 .  <http.client.HTTPResponse o

956 .  <http.client.HTTPResponse object at 0x7f3654007dd8>
http://farm3.static.flickr.com/2492/4136835109_a50cdee239.jpg
957 .  <http.client.HTTPResponse object at 0x7f3653fa36a0>
http://farm1.static.flickr.com/29/42472853_9f83c5aa7a.jpg
958 .  <http.client.HTTPResponse object at 0x7f3653fa3710>
http://farm5.static.flickr.com/4071/4307699880_7b904004ee.jpg
959 .  <http.client.HTTPResponse object at 0x7f3653fa30f0>
http://farm4.static.flickr.com/3338/3408403039_6504e2d7d0.jpg
960 .  <http.client.HTTPResponse object at 0x7f3653fa3780>
http://farm3.static.flickr.com/2510/3886693867_8f34244cec.jpg
961 .  <http.client.HTTPResponse object at 0x7f3653fa3a20>
http://farm1.static.flickr.com/24/61045168_2690d63fe6.jpg
962 .  <http.client.HTTPResponse object at 0x7f3653f94278>
http://farm3.static.flickr.com/2236/2080860695_ab585ca2ff.jpg
963 .  <http.client.HTTPResponse object at 0x7f3653fa3c18>
http://farm1.static.flickr.com/35/68306362_cbe93f7234.jpg
964 .  <http.client.HTTPResponse object at 0

In [None]:
# Get images 
pic_limit = 1000
img_path = "./train/"
img_class = "helmet/"
# 'Safety hat' in Imagenet, http://image-net.org/synset?wnid=n03492922

imagenet_wnid="n03492922" # of people (wearing helmets)


store_raw_images(pic_limit, img_path, img_class, imagenet_wnid)


