# Downloading Script

# Setup

In [1]:
import pandas as pd
import numpy as np
import os
import urllib.request
import requests
from google.colab import files as FILE

# Changing the option to show a dataframe not 'in-line'
pd.set_option('display.expand_frame_repr', False)

# Connecting to my drive
from google.colab import drive

drive.mount('/content/gdrive/', force_remount=True)
%cd gdrive/MyDrive/Mushroom_Data

# Checking that my drive is connected
os.listdir()

Mounted at /content/gdrive/
/content/gdrive/MyDrive/Mushroom_Data


['region_query.csv',
 'occurrence.tsv',
 'multimedia.tsv',
 'multimedia.gsheet',
 'region_query.gsheet',
 'links.csv',
 'links.gsheet',
 '.ipynb_checkpoints',
 'images',
 'GBIF_Playground.ipynb',
 'Downloading_Script']

In [2]:
# Loading CSV from google drive
links = pd.read_csv('links.csv', sep=',', encoding='latin-1', dtype=object).dropna(axis=1, how='all') # uses taxonKey for 'class'
print('Malformed:\t',links.isnull().any().any())
print('Shape:\t\t', links.shape)

Malformed:	 False
Shape:		 (10900, 3)


# Removing duplicate species data

When I was verifying that the data had the same amount of unique keys as species, I was surprised to see that was not the case. As it turns out, the data I had gotten from GBIF contained a species of mushroom that had two different taxonKeys. This cell removes one of the keys' data from the DataFrame

In [3]:
nkeys = links['key'].value_counts()
print('Unique keys:\t', len(nkeys))

nspecies = links['species'].value_counts()
print('Unique species:\t', len(nspecies), '\n')

keys_species = links.groupby(['key','species']).size().reset_index().rename(columns={0:'count'})
print(keys_species[['species']].value_counts().sort_values().tail())

print('\nIDs for duplicate species ("Laetiporus sulphureus"): ', links[links['species'] == 'Laetiporus sulphureus']['key'].unique())

links = links[links['key'] != '2542235']

print('\n\nAfter Removal:')
print('\nIDs for duplicate species ("Laetiporus sulphureus"): ', links[links['species'] == 'Laetiporus sulphureus']['key'].unique())

nkeys = links['key'].value_counts()
print('\nUnique keys:\t', len(nkeys))

nspecies = links['species'].value_counts()
print('Unique species:\t', len(nspecies))

Unique keys:	 109
Unique species:	 108 

species               
Volvariella bombycina     1
Pseudocolus fusiformis    1
Poronidulus conchifer     1
Stereum ostrea            1
Laetiporus sulphureus     2
dtype: int64

IDs for duplicate species ("Laetiporus sulphureus"):  ['9072021' '2542235']


After Removal:

IDs for duplicate species ("Laetiporus sulphureus"):  ['9072021']

Unique keys:	 108
Unique species:	 108


In [4]:
dir = os.listdir()
print(dir)
if 'images' not in dir:
    os.mkdir('images/')

['region_query.csv', 'occurrence.tsv', 'multimedia.tsv', 'multimedia.gsheet', 'region_query.gsheet', 'links.csv', 'links.gsheet', '.ipynb_checkpoints', 'images', 'GBIF_Playground.ipynb', 'Downloading_Script']


In [None]:
for key in links['key'].unique()[1:2]:
    subset = links[links['key'] == key]
    output_dir = '/content/gdrive/MyDrive/Mushroom_Data/images/'
    count = 0
    species_name = subset['species'].unique()[0].replace(' ', '_')

    print(species_name)

    # print(subset)

    for link in links[links['key'] == key]['link']:
        try:
            image = requests.get(link).content
            output_file = os.path.join(output_dir, species_name + '_' + str(count) + '.jpg')
            # print(output_file)
            with open(output_file, 'wb') as writer:
                writer.write(image)

            FILE.download(output_file)

        except Exception as e: 
            print(e)
        
        finally:
            count += 1


Chlorophyllum_molybdites


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>