In [1]:
import csv
import os
import re
import multiprocessing
from urllib.request import urlretrieve

In [2]:
path = 'data/observations-182134.csv'

## Read entry data from CSV

In [3]:
licences = {}
scientific_names = {}
image_urls = []

with open(path) as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        image_urls.append({
            'id': row['id'],
            'scientific_name': row['scientific_name'],
            'image_url': row['image_url']
        })
        if not row['license'] in licences:
            licences[row['license']] = 0
        licences[row['license']] += 1
        if not row['scientific_name'] in scientific_names:
            scientific_names[row['scientific_name']] = 0
        scientific_names[row['scientific_name']] += 1

### Licencing summary

In [4]:
licences

{'': 6443,
 'CC-BY-NC': 22707,
 'CC-BY': 22122,
 'CC-BY-SA': 1100,
 'CC-BY-NC-SA': 828,
 'CC-BY-NC-ND': 486,
 'CC0': 1116,
 'CC-BY-ND': 16}

### Scientific name summary

In [5]:
scientific_names

{'Entoloma virescens': 77,
 'Trametes coccinea': 1368,
 'Favolaschia calocera': 1112,
 'Auricularia cornea': 139,
 'Omphalotus nidiformis': 876,
 'Pulchrocladia retipora': 135,
 'Stereum versicolor': 756,
 'Microporus xanthopus': 248,
 'Lysurus mokusin': 152,
 'Clavulinopsis sulcata': 228,
 'Cladonia floerkeana': 61,
 'Podoscypha petalodes petalodes': 4,
 'Chlorophyllum brunneum': 136,
 'Coltricia australica': 170,
 'Nothojafnea cryptotricha': 3,
 'Lichenomphalia chromacea': 798,
 'Coprinellus micaceus': 61,
 'Cladonia confusa': 11,
 'Hygrocybe schistophila': 6,
 'Hypholoma brunneum': 129,
 'Aurantiporus pulcherrimus': 84,
 'Teloschistes chrysophthalmus': 192,
 'Laetiporus portentosus': 409,
 'Scutellinia scutellata': 8,
 'Rhizopus': 1,
 'Russula persanguinea': 385,
 'Aseroe rubra': 525,
 'Schizophyllum commune': 774,
 'Cantharellus concinnus': 253,
 'Amanita muscaria': 1307,
 'Sticta filix': 1,
 'Hericium coralloides': 135,
 'Macrolepiota clelandii': 453,
 'Leratiomyces ceres': 635,
 

In [6]:
len(scientific_names)

1156

In [7]:
len({k:v for k,v in scientific_names.items() if v >= 10})

481

In [8]:
len({k:v for k,v in scientific_names.items() if v >= 100})

138

In [9]:
sum([v for k,v in scientific_names.items() if v >= 10])

52986

We have 481 species with 10 or more images.
Let's 

Let's fetch only those species with 10 or more images.
That's 481 species and about 53k images.
Looks like we're seeing a long tail here - about half the species have only a few images.

## Filter entries to populated scientific names

In [10]:
popular_scientific_names = {k for k,v in scientific_names.items() if v >= 10}

In [11]:
populated_image_urls = [e for e in image_urls if e['scientific_name'] in popular_scientific_names]

In [12]:
len(populated_image_urls), len(image_urls)

(52986, 54818)

## Fetch images

In [13]:
# Directory scheme:
# data/scientific_name/inat_id.jpg

In [14]:
from urllib.parse import urlparse

In [15]:
def save_image(image_data):
    url = urlparse(image_data['image_url'])
    ext = re.search(r'\.(.*?)$', url.path).group(0).lower()
    filename = f'{image_data["id"]}{ext}'
    scientific_name = image_data['scientific_name'].lower().replace(' ', '_')
    outpath = os.path.join('data', scientific_name)
    outfile = os.path.join('data', scientific_name, filename)
    print(outfile)
    os.makedirs(outpath, exist_ok=True)
    urlretrieve(image_data['image_url'], outfile)

In [16]:
pool = multiprocessing.Pool(80)

data/lachnum_pteridophyllum/47958568.jpeg
data/tremella_fuciformis/47950679.jpg
data/hypholoma_brunneum/48073738.jpeg
data/gymnopilus_junonius/48090538.jpeg
data/roridomyces_austrororidus/48221485.jpeg
data/omphalotus_nidiformis/48107174.jpg
data/psilocybe_subaeruginosa/48216753.jpg
data/cortinarius_rotundisporus/48099048.jpeg
data/amanita_muscaria/48121954.jpeg
data/boletellus_obscurecoccineus/48082312.jpg
data/oudemansiella_gigaspora/48200718.jpg
data/bolbitius_titubans/47949286.jpeg
data/hymenoscyphus_berggrenii/47960840.jpeg
data/amanita_muscaria/48301243.jpeg
data/clathrus_archeri/48293211.jpg
data/rickenella_fibula/48237209.jpeg
data/lactarius_eucalypti/47954988.jpg
data/battarrea_phalloides/48210056.jpg
data/cortinarius_austrovenetus/48417018.jpeg
data/russula_lenkunya/48337978.jpeg
data/ramaria_lorithamnus/47980664.jpg
data/rhodofomitopsis_lilacinogilva/48420198.jpeg
data/heterotextus_peziziformis/48213831.jpeg
data/leucopaxillus_lilacinus/48311185.jpg
data/amanita_muscaria/484

data/ryvardenia_campyla/48966211.jpeg
data/ramariopsis_simplex/48424123.jpeg
data/gymnopilus_allantopus/48062617.jpeg
data/stereum_versicolor/48212445.jpeg
data/stereum_hirsutum/48624432.jpeg
data/urnula_campylospora/48398986.jpg
data/amanita_xanthocephala/48883353.jpg
data/cortinarius_archeri/48237234.jpeg
data/leotia_lubrica/48083964.jpg
data/gymnopilus_allantopus/48417024.jpeg
data/scytinotus_longinquus/48217245.jpeg
data/cruentomycena_viscidocruenta/48739518.jpg
data/leucopaxillus_eucalyptorum/48509795.jpg
data/lactarius_deliciosus/48633013.jpeg
data/lepista_nuda/48969591.jpg
data/russula_persanguinea/48320075.jpeg
data/lasiosphaeria_ovina/48762067.jpeg
data/artomyces_austropiperatus/48298814.jpeg
data/schizophyllum_commune/48425821.jpeg
data/clavaria_amoena/48630543.jpeg
data/lentinellus_pulvinulus/49002214.jpeg
data/ramariopsis_pulchella/48764503.jpeg
data/geastrum_triplex/48895998.jpeg
data/galerina_hypnorum/48524294.jpeg
data/gymnopilus_junonius/47955517.jpeg
data/stereum_illud

data/omphalotus_nidiformis/48243080.jpg
data/mycena_subgalericulata/49010202.jpeg
data/suillus_granulatus/48974661.jpg
data/phaeohelotium_baileyanum/48375967.jpeg
data/trametes_coccinea/48531886.jpeg
data/hypoxylon_howeanum/48215277.jpeg
data/geastrum_triplex/48630695.jpeg
data/russula_viridis/48489903.jpg
data/gliophorus_graminicolor/48427950.jpeg
data/lepista_sublilacina/48067830.jpg
data/amanita_xanthocephala/48326621.jpeg
data/podoscypha_petalodes/48625383.jpeg
data/mycena_interrupta/48633026.jpeg
data/pseudohydnum_gelatinosum/49003099.jpeg
data/oudemansiella_gigaspora/48605562.jpeg
data/mycena_epipterygia/48091427.jpg
data/mycena_mulawaestris/48323721.jpeg
data/sphaerobolus_stellatus/48315494.jpg
data/flammulina_velutipes/48086123.jpg
data/mycena_kuurkacea/48884675.jpeg
data/phaeotremella_fimbriata/48889078.jpeg
data/clavulinopsis_sulcata/48322074.jpeg
data/gymnopilus_allantopus/48075661.jpeg
data/omphalotus_nidiformis/47952886.jpg
data/ramaria_flaccida/48525988.jpeg
data/geastrum

data/mycena_clarkeana/48740106.jpg
data/collybia_eucalyptorum/48899530.jpeg
data/rickenella_fibula/48103368.jpg
data/stereum_versicolor/48633030.jpeg
data/aleuria_aurantia/48755306.jpeg
data/coltricia_australica/48110239.jpg
data/omphalotus_nidiformis/48307762.jpg
data/aurantiporus_pulcherrimus/48815220.jpg
data/trametes_coccinea/48205288.jpeg
data/volvopluteus_gloiocephalus/48277645.jpg
data/roridomyces_austrororidus/48398965.jpg
data/artomyces_austropiperatus/48630903.jpeg
data/cortinarius_austrovenetus/48743580.jpeg
data/pseudohydnum_gelatinosum/48632335.jpeg
data/cortinarius_kioloensis/48748950.jpg
data/kuehneromyces_brunneoalbescens/48186766.jpeg
data/clathrus_ruber/48407838.jpg
data/gymnopilus_allantopus/48492298.jpg
data/gymnopilus_ferruginosus/48421332.jpeg
data/inocybe_fibrillosibrunnea/48621259.jpeg
data/scleroderma_cepa/48298832.jpeg
data/deconica_horizontalis/48316217.jpeg
data/ramariopsis_crocea/48998904.jpeg
data/boletellus_emodensis/48629514.jpg
data/clavaria_amoena/4875

data/macrotyphula_juncea/48526172.jpeg
data/coltricia_australica/48748966.jpg
data/pholiota_squarrosipes/47954196.jpeg
data/heterotextus_peziziformis/49071106.jpeg
data/heterotextus_peziziformis/48621591.jpeg
data/phellodon_niger/48886703.jpeg
data/ramaria_lorithamnus/47949830.jpeg
data/phellodon_niger/48376548.jpeg
data/urnula_campylospora/47959966.jpeg
data/amanita_xanthocephala/48740416.jpg
data/leotia_lubrica/48525261.jpeg
data/armillaria_luteobubalina/48075967.jpeg
data/omphalotus_nidiformis/48093779.jpg
data/ascocoryne_sarcoides/48759755.jpeg
data/clavulinopsis_sulcata/48421338.jpeg
data/clavaria_amoena/48755940.jpeg
data/omphalotus_nidiformis/48219939.jpeg
data/volvopluteus_gloiocephalus/48530376.jpg
data/xanthoria_parietina/49004184.jpeg
data/cortinarius_archeri/48639331.jpg
data/lachnum_pteridophyllum/48213504.jpeg
data/pluteus_atromarginatus/49010908.jpeg
data/hymenotorrendiella_eucalypti/48324076.jpeg
data/ramariopsis_crocea/48318787.jpeg
data/pleurotus_purpureo-olivaceus/48

data/trametes_coccinea/48735941.jpg
data/mycena_subgalericulata/48425330.jpeg
data/lentinus_arcularius/48901561.jpeg
data/trametes_coccinea/48740655.jpg
data/tremella_mesenterica/48753355.jpeg
data/leratiomyces_ceres/48762866.jpeg
data/oudemansiella_gigaspora/48316473.jpg
data/porpolomopsis_lewelliniae/48757384.jpeg
data/amanita_muscaria/48208501.jpeg
data/panellus_pusillus/48398974.jpg
data/mucronella_pendula/49105003.jpeg
data/lichenomphalia_chromacea/48890202.jpeg
data/trametes_coccinea/48216578.jpg
data/aleurina_ferruginea/48511732.jpg
data/galerina_patagonica/48877331.jpg
data/stereum_versicolor/48190757.jpg
data/macrolepiota_clelandii/48613457.jpg
data/cordyceps_tenuipes/47950574.jpeg
data/lysurus_mokusin/48722465.jpg
data/amanita_muscaria/48310105.jpg
data/mycena_clarkeana/48107119.jpg
data/cheilymenia_raripila/47958160.jpg
data/oudemansiella_gigaspora/48602384.jpg
data/armillaria_luteobubalina/48419519.jpg
data/chlorophyllum_brunneum/48736732.jpg
data/ramaria_flaccida/48527206.

data/trametes_coccinea/49204009.jpeg
data/armillaria_luteobubalina/48894965.jpg
data/mycena_kuurkacea/48968410.jpeg
data/mycena_subvulgaris/48322963.jpeg
data/macrolepiota_clelandii/49307764.jpeg
data/xylaria_filiformis/49310349.jpeg
data/hymenoscyphus_berggrenii/47958563.jpeg
data/collybia_eucalyptorum/48321634.jpeg
data/podoserpula_pusio/48398978.jpg
data/omphalotus_nidiformis/48618250.jpg
data/melanophyllum_haematospermum/48468588.jpg
data/urnula_campylospora/48757454.jpeg
data/pluteus_romellii/49312851.jpeg
data/schizophyllum_commune/49178242.jpg
data/inocybe_fibrillosibrunnea/48528507.jpeg
data/xylaria_apiculata/49315176.jpeg
data/gymnopilus_eucalyptorum/49210283.jpeg
data/clavaria_amoena/49107266.jpg
data/clavulinopsis_sulcata/49204012.jpeg
data/neobarya_agaricicola/49107926.jpeg
data/collybia_eucalyptorum/49317131.jpeg
data/deconica_horizontalis/49000473.jpeg
data/xylaria_castorea/49321957.jpeg
data/lachnum_varians/49104000.jpeg
data/favolaschia_calocera/48427115.jpeg
data/desco

data/mycena_albidofusca/49552588.jpg
data/cortinarius_rotundisporus/49270732.jpg
data/leratiomyces_ceres/49301847.jpeg
data/entoloma_viridomarginatum/49935786.jpeg
data/mycena_interrupta/49204540.jpeg
data/amanita_flavella/49325064.jpg
data/stereum_versicolor/48895328.jpeg
data/lichenomphalia_chromacea/49945557.jpg
data/favolaschia_calocera/49107302.jpeg
data/galerina_patagonica/49695203.jpg
data/fomitiporia_robusta/49206417.jpeg
data/leucopaxillus_eucalyptorum/49803087.jpg
data/clavulinopsis_sulcata/48757509.jpeg
data/gliophorus_graminicolor/49804576.jpg
data/leratiomyces_ceres/48994514.jpg
data/armillaria_luteobubalina/49198959.jpeg
data/armillaria_luteobubalina/48419528.jpg
data/coprinellus_disseminatus/50012647.jpeg
data/amanita_muscaria/49554830.jpg
data/mycena_interrupta/49392404.jpeg
data/stereum_rugosum/49703402.jpeg
data/lactarius_eucalypti/49313330.jpeg
data/rickenella_fibula/49418208.jpg
data/coprinellus_disseminatus/48513468.jpeg
data/flammulina_velutipes/49315824.jpeg
data

data/scleroderma_cepa/49815834.jpeg
data/psilocybe_subaeruginosa/50100525.jpg
data/rigidoporus_laetus/49421145.jpeg
data/rickenella_fibula/49696370.jpeg
data/mycena_carmeliana/49308527.jpeg
data/omphalotus_nidiformis/49827244.jpeg
data/mycena_lazulina/49112906.jpeg
data/stereum_versicolor/50045286.jpeg
data/aseroe_rubra/49101538.jpeg
data/oudemansiella_gigaspora/49805547.jpg
data/volvopluteus_gloiocephalus/49193606.jpg
data/psilocybe_subaeruginosa/49803373.jpg
data/coprinus_comatus/49281555.jpg
data/lichenomphalia_chromacea/50021596.jpg
data/mycena_interrupta/49114829.jpeg
data/cortinarius_rotundisporus/49415070.jpeg
data/tremella_fuciformis/49105814.jpeg
data/hymenotorrendiella_eucalypti/49210755.jpeg
data/phlebopus_marginatus/49983209.jpg
data/ascocoryne_sarcoides/49697543.jpg
data/stereum_illudens/49105720.jpeg
data/clavulinopsis_sulcata/50025822.jpeg
data/coltricia_australica/49821652.jpeg
data/geastrum_tenuipes/49506150.jpg
data/pholiota_communis/50016620.jpeg
data/cerrena_zonata/

data/favolaschia_calocera/49803668.jpg
data/fistulina_hepatica/49726054.jpeg
data/cortinarius_austrovenetus/50025824.jpeg
data/leratiomyces_ceres/49553301.jpg
data/gymnopilus_junonius/49410923.jpeg
data/cruentomycena_viscidocruenta/50122057.jpeg
data/oudemansiella_gigaspora/49524147.jpeg
data/clavaria_amoena/49924648.jpeg
data/stereum_illudens/49103840.jpeg
data/amanita_farinacea/49816303.jpeg
data/aleurina_ferruginea/49539945.jpg
data/lentinus_sajor-caju/49998681.jpg
data/hericium_coralloides/50070752.jpeg
data/coltricia_australica/49549992.jpeg
data/cymatoderma_elegans/49613276.jpg
data/cruentomycena_viscidocruenta/49918029.jpg
data/urnula_campylospora/50029180.jpeg
data/cerrena_zonata/49415166.jpg
data/schizophyllum_commune/49425376.jpeg
data/mycena_interrupta/49211583.jpeg
data/bolbitius_titubans/49671804.jpeg
data/geastrum_triplex/49113209.jpeg
data/podoserpula_pusio/49384675.jpg
data/leotia_lubrica/49313557.jpeg
data/peziza_varia/49526409.jpg
data/stereum_illudens/49703666.jpg
da

data/microporus_xanthopus/50142158.jpeg
data/tremella_fuciformis/49295980.jpeg
data/hexagonia_vesparia/49679520.jpg
data/mycena_mijoi/49414097.jpeg
data/crepidotus_nephrodes/49103848.jpeg
data/cortinarius_austrovenetus/49411564.jpg
data/ramaria_flaccida/49698744.jpeg
data/chlorophyllum_brunneum/49919675.jpeg
data/hypholoma_fasciculare_armeniacum/49553820.jpeg
data/chlorociboria/49937037.jpeg
data/pseudohydnum_gelatinosum/50037495.jpeg
data/flammulina_velutipes/50136191.jpeg
data/cortinarius_fibrillosus/49416158.jpeg
data/collybia_eucalyptorum/49105786.jpeg
data/cortinarius_archeri/49924657.jpeg
data/cortinarius_austrovenetus/49105740.jpeg
data/mycena_interrupta/49105823.jpeg
data/cruentomycena_viscidocruenta/49204956.jpeg
data/fomitiporia_robusta/49404845.jpg
data/cortinarius_rotundisporus/49427146.jpg
data/coltricia_australica/49696384.jpeg
data/mycena_interrupta/49309931.jpeg
data/sowerbyella_rhenana/50203983.jpeg
data/mycena_vinacea/49702439.jpeg
data/lichenomphalia_chromacea/495437

data/gymnopilus_allantopus/49904993.jpg
data/calycina_claroflava/49107588.jpeg
data/boletellus_obscurecoccineus/49930527.jpg
data/rigidoporus_laetus/49111426.jpeg
data/cruentomycena_viscidocruenta/49307355.jpg
data/psilocybe_subaeruginosa/49314323.jpeg
data/armillaria_luteobubalina/49551588.jpg
data/favolaschia_calocera/49310347.jpeg
data/trametes_versicolor/49705629.jpeg
data/leotia_lubrica/50243048.jpeg
data/galerina_hypnorum/49414131.jpg
data/cortinarius_fibrillosus/50029630.jpeg
data/amanita_xanthocephala/50027005.jpg
data/rickenella_fibula/49816780.jpeg
data/cortinarius_austrovenetus/49219799.jpg
data/cruentomycena_viscidocruenta/50245725.jpeg
data/geastrum_triplex/49829048.jpeg
data/gymnopilus_allantopus/49547328.jpg
data/flammulina_velutipes/49942206.jpg
data/leratiomyces_ceres/49512484.jpg
data/stereum_illudens/49564152.jpeg
data/favolaschia_calocera/49702445.jpeg
data/macrolepiota_clelandii/49708101.jpeg
data/amanita_muscaria/50139251.jpg
data/ramariopsis_crocea/49699125.jpg
d

data/chlorociboria/49805120.jpg
data/clavaria_amoena/49697171.jpeg
data/podoserpula_pusio/49558065.jpeg
data/stereum_versicolor/50093874.jpg
data/singerocybe_clitocyboides/50524184.jpeg
data/cortinarius_rotundisporus/50530131.jpeg
data/fomitiporia_robusta/50377244.jpeg
data/stereum_versicolor/50025213.jpeg
data/mycena_cystidiosa/50532935.jpeg
data/porpolomopsis_lewelliniae/49804260.jpg
data/coltriciella_dependens/50369564.jpg
data/gymnopilus_allantopus/49928317.jpeg
data/tremella_fuciformis/50360262.jpg
data/cymatoderma_elegans/49569246.jpeg
data/postia_pelliculosa/50380589.jpeg
data/tubaria_rufofulva/49817312.jpeg
data/amanita_xanthocephala/49708329.jpeg
data/hypholoma_fasciculare/50369677.jpg
data/gymnopilus_ferruginosus/49569388.jpeg
data/phaeotrametes_decipiens/50596461.jpeg
data/clavaria_amoena/50515478.jpeg
data/collybia_eucalyptorum/49307394.jpeg
data/phaeolus_schweinitzii/49552131.jpg
data/ramaria_filicicola/49829809.jpeg
data/hygrocybe_rodwayi/50533824.jpeg
data/geastrum_tripl

data/ramaria_anziana/50965098.jpeg
data/lactarius_deliciosus/50124403.jpg
data/clavulina_coralloides/50244251.jpeg
data/crepidotus_eucalyptorum/50622178.jpeg
data/apioperdon_pyriforme/50238280.jpeg
data/mycena_austrofilopes/50968352.jpeg
data/phylloporus_rhodoxanthus/50034478.jpg
data/mycena_kurramulla/50236884.jpeg
data/schizophyllum_commune/49922900.jpeg
data/armillaria_luteobubalina/50524281.jpg
data/ramaria_filicicola/50255257.jpeg
data/hypholoma_fasciculare/49804403.jpg
data/mycena_interrupta/50970760.jpg
data/amanita_muscaria/50208827.jpg
data/lactarius_eucalypti/50504263.jpeg
data/astraeus_hygrometricus/50650013.jpg
data/hymenotorrendiella_clelandii/49930652.jpeg
data/omphalotus_nidiformis/50363555.jpeg
data/podoscypha_petalodes/50973168.jpeg
data/pseudohydnum_gelatinosum/50246735.jpeg
data/protostropharia_semiglobata/50745490.jpeg
data/cortinarius_rotundisporus/50151410.jpg
data/beauveria_bassiana/50867626.jpg
data/russula_persanguinea/49811233.jpg
data/heterotextus_miltinus/50

data/cortinarius_archeri/51344528.jpeg
data/phaeohelotium_baileyanum/50971508.jpeg
data/clavaria_amoena/50761144.jpeg
data/stereum_versicolor/50516603.jpeg
data/macrolepiota_clelandii/50756102.jpeg
data/aleurina_ferruginea/50855054.jpg
data/oudemansiella_gigaspora/51208997.jpeg
data/microporus_affinis/51071859.jpeg
data/amanita_farinacea/50754929.jpeg
data/scleroderma_cepa/50473871.jpeg
data/omphalotus_nidiformis/51270515.jpg
data/rhodofomitopsis_lilacinogilva/50370829.jpg
data/schizophyllum_commune/50968360.jpeg
data/cruentomycena_viscidocruenta/50734612.jpg
data/russula_persanguinea/50237305.jpeg
data/lichenomphalia_chromacea/50222077.jpeg
data/clavulina_subrugosa/50489255.jpeg
data/mycena_interrupta/50500514.jpg
data/leucopaxillus_eucalyptorum/50522925.jpeg
data/lepista_sublilacina/50244941.jpg
data/urnula_campylospora/50611866.jpeg
data/cortinarius_austrovenetus/50382145.jpeg
data/marasmius_alveolaris/50395600.jpeg
data/volvopluteus_gloiocephalus/50638551.jpg
data/crepidotus_variab

data/phellodon_niger/50975536.jpeg
data/tremella_fuciformis/50650368.jpeg
data/phlebopus_marginatus/50223036.jpeg
data/austeria_citrea/50523389.jpeg
data/chrysothrix_candelaris/50756369.jpeg
data/leotia_lubrica/51075612.jpeg
data/ramaria_flaccida/50509001.jpeg
data/humidicutis_arcohastata/50644680.jpeg
data/amanita_muscaria/51181152.jpeg
data/galerina_hypnorum/51444300.jpg
data/clavaria_amoena/50251844.jpeg
data/fomitiporia_robusta/51441349.jpg
data/stereum_ostrea/51299855.jpg
data/clavulina_subrugosa/51347820.jpeg
data/trametes_versicolor/50388731.jpg
data/lichenomphalia_chromacea/50245254.jpg
data/amanita_ananiceps/51355633.jpg
data/clavaria_amoena/51065278.jpeg
data/stereum_hirsutum/51329235.jpg
data/clavulinopsis_corallinorosacea/50856177.jpg
data/psathyrella_echinata/50968783.jpeg
data/auricularia_cornea/50218946.jpg
data/amanita_xanthocephala/50874164.jpeg
data/rickenella_fibula/51443158.jpg
data/lichenomphalia_chromacea/51054419.jpg
data/urnula_campylospora/50312938.jpg
data/ent

data/clavulina_cinerea/50366607.jpg
data/clavulinopsis_corallinorosacea/50964780.jpeg
data/mycena_interrupta/50655442.jpg
data/fomitiporia_robusta/50353797.jpg
data/heterotextus_peziziformis/51208327.jpeg
data/sphaerobolus_stellatus/50853199.jpg
data/gymnopilus_eucalyptorum/51444345.jpeg
data/amanita_xanthocephala/50571107.jpeg
data/mycena_piringa/50981681.jpeg
data/galerina_hypnorum/50612933.jpeg
data/thelephora_terrestris/51077455.jpeg
data/schizophyllum_commune/50641360.jpg
data/volvopluteus_gloiocephalus/51332496.jpg
data/descolea_recedens/51088765.jpg
data/anthracophyllum_archeri/50517724.jpg
data/ophiocordyceps_robertsii/51075649.jpeg
data/clavulinopsis_fusiformis/50512188.jpeg
data/ileodictyon_gracile/50749289.jpeg
data/cortinarius_austrovenetus/50239109.jpeg
data/amanita_ananiceps/51446718.jpeg
data/scleroderma_cepa/51443395.jpeg
data/cortinarius_rotundisporus/50531969.jpeg
data/clavulinopsis_sulcata/51184346.jpg
data/ramariopsis_crocea/50755460.jpeg
data/cordierites_frondosus/

data/lactarius_deliciosus/50730868.jpg
data/psilocybe_subaeruginosa/51444960.jpg
data/psilocybe_subaeruginosa/51556071.jpeg
data/amanita_xanthocephala/51431721.jpg
data/geastrum_triplex/50532814.jpeg
data/gymnopilus_junonius/51445216.jpeg
data/ileodictyon_gracile/51432442.jpg
data/podaxis_pistillaris/51445517.jpeg
data/clavulinopsis_fusiformis/51443404.jpeg
data/stereum_versicolor/50650683.jpg
data/tremella_fuciformis/50971895.jpeg
data/mycena_carmeliana/51210042.jpeg
data/cortinarius_rotundisporus/51464506.jpeg
data/gymnopilus_allantopus/51447912.jpg
data/singerocybe_clitocyboides/50763943.jpeg
data/mycena_subgalericulata/50379196.jpeg
data/phaeolus_schweinitzii/50581174.jpeg
data/mycena_interrupta/50359705.jpg
data/psilocybe_subaeruginosa/50644852.jpg
data/amanita_xanthocephala/50404063.jpg
data/clavulina_vinaceocervina/51213041.jpeg
data/cordyceps_cranstounii/51074184.jpeg
data/russula_clelandii/50924583.jpeg
data/cortinarius_austrovenetus/50631257.jpeg
data/ramaria_lorithamnus/5134

data/clavulinopsis_sulcata/50875314.jpeg
data/clavulina_rugosa/50976498.jpeg
data/macrolepiota_clelandii/50532933.jpeg
data/clavaria_zollingeri/51442869.jpeg
data/clavulinopsis_sulcata/51213842.jpeg
data/psilocybe_subaeruginosa/51556074.jpg
data/mycena_subgalericulata/51323752.jpg
data/ramaria_filicicola/50970617.jpeg
data/clavulinopsis_sulcata/50643721.jpeg
data/galerina_hypnorum/51411306.jpeg
data/laetiporus_portentosus/51205359.jpeg
data/mycena_interrupta/50248031.jpeg
data/nidula_niveotomentosa/50929467.jpeg
data/stereum_illudens/51074729.jpeg
data/leotia_lubrica/50254415.jpeg
data/russula_clelandii/51348736.jpg
data/chromocyphella_muscicola/51453633.jpeg
data/clavulinopsis_sulcata/50759587.jpeg
data/amanita_xanthocephala/51562893.jpg
data/lichenomphalia_chromacea/51675207.jpg
data/deconica_horizontalis/50765777.jpeg
data/coltriciella_dependens/50867320.jpeg
data/heterotextus_peziziformis/50933501.jpeg
data/pluteus_romellii/51071663.jpeg
data/conocybe_filaris/51680746.jpeg
data/rus

data/microporus_affinis/51068740.jpg
data/omphalotus_nidiformis/51545311.jpg
data/cruentomycena_viscidocruenta/51771025.jpg
data/cortinarius_rotundisporus/51976772.jpeg
data/amanita_xanthocephala/51541430.jpg
data/fomitiporia_robusta/51582073.jpg
data/amanita_muscaria/51851730.jpg
data/clavulinopsis_sulcata/51741144.jpeg
data/macrolepiota_clelandii/51325480.jpeg
data/sphaerobolus_stellatus/52003154.jpeg
data/banksiamyces_macrocarpus/51997515.jpeg
data/cortinarius_sinapicolor/51449413.jpeg
data/ascocoryne_sarcoides/51883102.jpeg
data/galerina_patagonica/51208982.jpeg
data/gymnopilus_junonius/52013792.jpg
data/ramaria_lorithamnus/51872557.jpeg
data/podoserpula_pusio/51347654.jpg
data/hypholoma_fasciculare_armeniacum/52100232.jpeg
data/byssomerulius_corium/51783419.jpeg
data/ramariopsis_crocea/51670931.jpeg
data/hygrocybe_astatogala/51559317.jpeg
data/russula_persanguinea/51952707.jpg
data/heterotextus_peziziformis/51459497.jpg
data/ramariopsis_crocea/51454097.jpeg
data/trametes_coccinea/

data/sanguinoderma_rude/52239611.jpeg
data/picipes_melanopus/52242940.jpeg
data/mycena_epipterygia/51989749.jpeg
data/tricholomopsis_rutilans/52004484.jpeg
data/tremella_fuciformis/52345990.jpeg
data/amanita_muscaria/51777189.jpg
data/geastrum_triplex/51219997.jpg
data/laetiporus_portentosus/51562361.jpg
data/trametes_coccinea/51766997.jpg
data/chlorophyllum_brunneum/51351633.jpg
data/clavulinopsis_sulcata/51780396.jpeg
data/mycena_subgalericulata/52122596.jpeg
data/gymnopilus_allantopus/52350083.jpeg
data/favolaschia_calocera/51889560.jpeg
data/hypholoma_fasciculare_armeniacum/51892763.jpeg
data/mycena_lazulina/51879613.jpeg
data/ramariopsis_kunzei/51454712.jpeg
data/heterotextus_peziziformis/51998158.jpg
data/hypholoma_fasciculare/51338880.jpeg
data/chlorociboria/52248463.jpg
data/lichenomphalia_chromacea/51671470.jpeg
data/clavulinopsis_sulcata/52227600.jpg
data/cortinarius_fibrillosus/51631715.jpeg
data/mycena_subgalericulata/51412010.jpeg
data/lactarius_eucalypti/51649796.jpg
data

data/mycena_cystidiosa/51767212.jpg
data/trametes_coccinea/51561187.jpg
data/amanita_xanthocephala/52001352.jpeg
data/clavulinopsis_sulcata/52340902.jpg
data/clavulina_vinaceocervina/51862990.jpeg
data/favolaschia_calocera/51774736.jpg
data/phaeoclavulina_abietina/52434741.jpeg
data/collybia_eucalyptorum/51449582.jpg
data/lichenomphalia_chromacea/51874157.jpeg
data/mycena_interrupta/52252727.jpg
data/chlorociboria/52222732.jpg
data/sanguinoderma_rude/52215248.jpg
data/clavulina_coralloides/52239857.jpeg
data/hypholoma_brunneum/51998436.jpg
data/phlebia_subceracea/51880678.jpeg
data/tremella_fuciformis/51767388.jpg
data/cortinarius_sinapicolor/52228893.jpg
data/aleuria_aurantia/51449818.jpeg
data/russula_lenkunya/52222881.jpg
data/sanguinoderma_rude/52215679.jpeg
data/entoloma_panniculus/52230838.jpg
data/amanita_xanthocephala/51767465.jpeg
data/hypholoma_brunneum/51880759.jpeg
data/hygrocybe_cheelii/51679216.jpeg
data/humidicutis_arcohastata/52415987.jpeg
data/stereum_versicolor/518833

data/trametes_coccinea/51995220.jpg
data/mycena_tuvara/52123298.jpeg


In [17]:
len(populated_image_urls)

52986

In [18]:
%time pool.map(save_image, populated_image_urls[:5000])

CPU times: user 7.37 s, sys: 6.07 s, total: 13.4 s
Wall time: 21min 24s


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [20]:
%time pool.map(save_image, populated_image_urls[5000:10000])

CPU times: user 7 s, sys: 5.28 s, total: 12.3 s
Wall time: 11min 6s


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [18]:
%time pool.map(save_image, populated_image_urls[10000:15000])

CPU times: user 6.07 s, sys: 4.53 s, total: 10.6 s
Wall time: 6min 2s


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [19]:
%time pool.map(save_image, populated_image_urls[15000:20000])

CPU times: user 4.53 s, sys: 3.26 s, total: 7.79 s
Wall time: 6min 26s


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [20]:
%time pool.map(save_image, populated_image_urls[20000:25000])

CPU times: user 4.56 s, sys: 3.48 s, total: 8.04 s
Wall time: 6min


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [None]:
%time pool.map(save_image, populated_image_urls[25000:30000])

In [None]:
%time pool.map(save_image, populated_image_urls[30000:35000])

In [None]:
%time pool.map(save_image, populated_image_urls[35000:40000])

In [None]:
%time pool.map(save_image, populated_image_urls[40000:45000])

In [None]:
%time pool.map(save_image, populated_image_urls[45000:50000])

In [None]:
%time pool.map(save_image, populated_image_urls[50000:])

### Time calculations

A sample run shows 12 seconds for 20 images

In [19]:
# Num hours for all images
50000/20*12/60/60

8.333333333333334

8 hours for all the images. Let's do them in batches of 5000.

In [20]:
# Approx time in minutes for 5000 images (pool 10)
5000/20*12/60

50.0

In [None]:
# With pool of 20, 5000 takes 21 mins
# With pool of 40, 5000 takes 11 mins
# With pool of 80, 5000 takes 6 mins