# Download ImageNet images by Wordnet ID
Modified from https://github.com/kjaisingh/hardhat-detector/blob/master/download-neg-images.py
Added 
-  Count of images and reporting
-  Added continue on bad image (poisoned my .rec image file!)
-  Parameterise the root folder and class for images


In [76]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Feb 18 11:42:56 2018
@author: KaranJaisingh
Updated by Steve Hunter Fri Aug 24
 Count of images and reporting
 Added continue on bad image (poisoned my .rec image file!)
 Parameterise the root folder and class for images
 Exclude small images (likely to be no longer available)
"""

import cv2
from six.moves import urllib
import numpy as np
import os

def store_raw_images(pic_limit, img_path, img_class, imagenet_wnid):
    # Get wnid from Imagenet
    image_link = 'http://image-net.org/api/text/imagenet.synset.geturls?wnid='+imagenet_wnid
    image_urls = urllib.request.urlopen(image_link).read().decode()
    #name of path

    if not os.path.exists(img_path+img_class):
        os.makedirs(img_path+img_class)
   
    pic_num = 1
    while(pic_num < pic_limit):
        for i in image_urls.split('\n'):
            try:
                print(i)
                # next line seems to protect against bad reads, otherwise
                # urlretrieve hangs
                print(urllib.request.urlopen(i, data=None, timeout=1))
                urllib.request.urlretrieve(i, img_path+img_class +str(pic_num)+'.jpg')
                img = cv2.imread(img_path+img_class +str(pic_num)+'.jpg')
                resized_image = cv2.resize(img, (300, 300))
                cv2.imwrite(img_path+img_class+str(pic_num)+'.jpg', resized_image)
                pic_num += 1

            except Exception as e:
                print(str(e))
            if (pic_num > pic_limit):
                break
    print("Completed", pic_limit, "images written to ", img_path+img_class)




http://farm4.static.flickr.com/3186/2606823711_b07495378f.jpg
<http.client.HTTPResponse object at 0x7f577b986b38>
http://farm4.static.flickr.com/3360/3236502886_6a505502c5.jpg
<http.client.HTTPResponse object at 0x7f577b986550>
http://farm5.static.flickr.com/4022/4322083945_bc2db8ccf3.jpg
<http.client.HTTPResponse object at 0x7f577b986278>
http://farm4.static.flickr.com/3121/2556445732_0f905b11e4.jpg
<http.client.HTTPResponse object at 0x7f577b987668>
http://farm4.static.flickr.com/3551/3443533285_2a70b92f2c.jpg
<http.client.HTTPResponse object at 0x7f577b987ba8>
http://farm1.static.flickr.com/224/491407069_8dbdc006c4.jpg
<http.client.HTTPResponse object at 0x7f577b987320>
http://farm1.static.flickr.com/33/42539714_c899059d27.jpg
<http.client.HTTPResponse object at 0x7f577b986710>
http://farm4.static.flickr.com/3512/3248952529_0bda5b70eb.jpg
<http.client.HTTPResponse object at 0x7f577b9868d0>
http://farm3.static.flickr.com/2461/3707202944_74e7a04f77.jpg
<http.client.HTTPResponse object

In [85]:
# Get images of people (not wearing helmets)
pic_limit = 10
img_path = "./train/"
img_class = "nohelmet/"
# 'People' in Imagenet, http://image-net.org/synset?wnid=n07942152
imagenet_wnid="n07942152"

store_raw_images(pic_limit, img_path, img_class, imagenet_wnid)

# delete images that no longer exist,usually <8k bytes
# See https://superuser.com/questions/644272/how-do-i-delete-all-files-smaller-than-a-certain-size-in-all-subfolders
# '-8k' means 'less than 8k bytes'
!find "$img_path"/"$img_class" -name "*.*" -size -8k delete



In [111]:
# Get images of people (wearing helmets)
pic_limit = 10
img_path = "./train/"
img_class = "helmet/"
# 'Safety hat' in Imagenet, http://image-net.org/synset?wnid=n03492922
imagenet_wnid="n03492922"

store_raw_images(pic_limit, img_path, img_class, imagenet_wnid)

# delete images that no longer exist,usually <8k bytes
# See https://superuser.com/questions/644272/how-do-i-delete-all-files-smaller-than-a-certain-size-in-all-subfolders
# '-8k' means 'less than 8k bytes'
!find "$img_path"/"$img_class" -name "*.*" -size -8k delete

http://farm4.static.flickr.com/3644/3349801965_d0268de7f5.jpg
<http.client.HTTPResponse object at 0x7f577b986780>
http://farm1.static.flickr.com/15/22639560_8edd3afe6f.jpg
<http.client.HTTPResponse object at 0x7f577b986588>
http://farm1.static.flickr.com/94/223377572_1a2590c521.jpg
<http.client.HTTPResponse object at 0x7f577b986898>
http://farm3.static.flickr.com/2325/2246167212_ff39c4d699.jpg
<http.client.HTTPResponse object at 0x7f577b986b38>
http://fotosa.ru/stock_photo/Westend61_RM/p_2838903.jpg
HTTP Error 403: Forbidden
http://farm2.static.flickr.com/1160/536319721_cc51452ad5.jpg
<http.client.HTTPResponse object at 0x7f577b986588>
http://farm4.static.flickr.com/3086/2336674146_71fa578fb5.jpg
<http.client.HTTPResponse object at 0x7f577b986240>
http://farm1.static.flickr.com/181/461311965_b430187841.jpg
<http.client.HTTPResponse object at 0x7f577b987898>
http://farm4.static.flickr.com/3143/3083759691_ebdc397703.jpg
<http.client.HTTPResponse object at 0x7f577b987a58>
http://farm4.sta

In [90]:
!ls train/helmet


10.jpg	1.jpg  2.jpg  3.jpg  4.jpg  5.jpg  6.jpg  7.jpg  8.jpg	9.jpg
