# Retrieving species predictions from AI4Earth
## By Emma Vriezen | Created on: 2020-12-21 | Last edit on: 2021-01-18

In [46]:
# Imports:
import pandas as pd  # DataFrames
import numpy as np  # Nice functions
import requests as rq  # Send HTTP requests to servers
import re  # Regular expressions
import time # Sleep function to allow for waiting between requests
from PIL import Image  # Store an image that is downloaded from Waarneming
from io import StringIO, BytesIO, BufferedReader  # To convert a string that describes bytes into interpretable IO


In [2]:
# Import the bird data, which is now merely a list of 298 species:
db_path = r"C:\Users\emmav\Documents\AI Master year 1\TMM\Project\BirdIDs.txt"
db = pd.read_csv(db_path, header=0)
db.head()


Unnamed: 0,Species,Scientific,ID
0,Dodaars,Tachybaptus ruficollis,2
1,Fuut,Podiceps cristatus,91
2,Roodhalsfuut,Podiceps grisegena,367
3,Geoorde Fuut,Podiceps nigricollis,3
4,Aalscholver,Phalacrocorax carbo,58


### Check which of these species the classifier is traind on:

In [3]:
ai_list_path = r"C:\Users\emmav\Documents\AI Master year 1\TMM\Project\species_classification.2019.12.00.classes.txt"
ai_list = pd.read_csv(ai_list_path, names=['Scientific'], sep='|', usecols=[0])
ai_list = ai_list.drop_duplicates()  # There are many dogs too which all have the same scientific name.
ai_list.head()


Unnamed: 0,Scientific
0,Marmota flaviventris
1,Zonotrichia capensis
2,Arctium minus
3,Cirsium occidentale venustum
4,Olivella biplicata


In [4]:
# Merge the lists on scientific name to retain the species that the classifier is traind on.
common = pd.merge(db, ai_list, how='inner', on=['Scientific'])
common.head()


Unnamed: 0,Species,Scientific,ID
0,Dodaars,Tachybaptus ruficollis,2
1,Fuut,Podiceps cristatus,91
2,Roodhalsfuut,Podiceps grisegena,367
3,Geoorde Fuut,Podiceps nigricollis,3
4,Aalscholver,Phalacrocorax carbo,58


In [5]:
# Save the result:
common.to_csv("BirdsForClassification.txt", index=False)


### Test the server responses
- Example of AI for Earth is found at https://aiforearth.portal.azure-api.net/docs/services/species-classification-v2/operations/health-check
- Their example notebook is at http://dolphinvm.westus2.cloudapp.azure.com/ai4e/notebooks/species-classification-api-demo.html 

In [6]:
# Constants related to the Species Classification API:
BASE_URL = 'https://aiforearth.azure-api.net/'
API_VERSION = '2.0'
CONTENT_TYPE_KEY = 'Content-Type'
CONTENT_TYPE = 'application/octet-stream'
AUTHORIZATION_HEADER = 'Ocp-Apim-Subscription-Key'
CLASSIFY_FORMAT = '{0}/species-classification/v{1}/predict?topK={2}&predictMode={3}'
PREDICT_MODE = 'classifyOnly'  # Alternative: 'classifyAndDetect'

# Subscription key of the Species Classification API:
SUBSCRIPTION_KEY = 'bd08344a7f0b475e84ed4ed0b0c6285b'


In [36]:
# Definitions of methods:
def build_classify_url(topK=5, base_url=BASE_URL, version=API_VERSION, predictMode=PREDICT_MODE):
    return CLASSIFY_FORMAT.format(base_url, version, topK, predictMode)

def get_api_headers(content_type):
    return {CONTENT_TYPE_KEY: content_type, AUTHORIZATION_HEADER: SUBSCRIPTION_KEY}

def get_api_response(imgdata):
    url = build_classify_url()
    # print('Running API...')
    r = rq.post(url, headers=get_api_headers(CONTENT_TYPE), data=imgdata) 
    if(r.status_code != 200):
        return r.json(), True
    # print('...done')
    return r.json(), False

def classify_and_display_results(image):
    result = get_api_response(image)
    if(result == None):
        print ("Error occured while calling API...Please try again")
        return
    predictions = result[0]['predictions']
    is_first_item = True 
    for item in predictions:
        species, species_common, prob = take_info_from_prediction(item)
        display_classification_results(species, species_common,  str(prob), is_first_item)   
        is_first_item = False
        
def take_info_from_prediction(prediction):
    species = prediction['species']
    species_common = prediction['species_common']
    prob = round(prediction['confidence'], 2)
    return species, species_common, prob
        
def display_classification_results(species, species_common, progress, is_first_item): 
    html_string = ""
    if(is_first_item):
        html_string = "<style>" \
                      ".progress-container {margin:0 auto; min-height: 25px;margin:0;width:100%; margin-top:10px}" \
                      ".progress-bar{background-color:#ffc107; padding:3px}" \
                      ".progress-text{color:black; margin-top:5px;} " \
                      ".species .species-common {" \
                      " color:black !important; font-family:'Helvetica Neue',Helvetica,Arial,sans-serif;" \
                      " font-size:14px;line-height:20px;}" \
                      "</style>"
                   
    progress = progress + "%"
    style = "width:" + progress + ""
    bing_search_link = 'https://bing.com/images/search?q=' + species
    
    html_string += "<a style='color:black;' class='species' href='" + bing_search_link +"' target='_blank'>" + species + "</a>" \
                   "<span class='species-common'>  ( " + species_common + " ) </span>" \
                   "<div class='progress progress-container'>" \
                   "<div class='progress-bar' style='" + style + "' >" \
                   "<span class='progress-text'>" + progress + "</span></div></div>" \
    
    display(HTML(html_string))


In [10]:
# If the commented and last line are done at once, an error will occur
# because the calls are too close together in time.
image_data = open('eekhoorn2.jpg', 'rb')
# result = get_api_response(image_data)  # images[0]['data'])
print(result)
classify_and_display_results(image_data)

({'predictions': [{'class': 'Mammalia', 'class_common': 'Mammals', 'confidence': 22.90235012769699, 'family': 'Sciuridae', 'family_common': 'Squirrels', 'genus': 'Sciurus', 'genus_common': 'Tree Squirrels', 'kingdom': 'Animalia', 'kingdom_common': 'Animals', 'order': 'Rodentia', 'order_common': 'Rodents', 'phylum': 'Chordata', 'phylum_common': 'Chordates', 'species': 'Sciurus vulgaris', 'species_common': 'Eurasian Red Squirrel', 'subfamily': 'Sciurinae', 'subfamily_common': 'Typical and Flying Squirrels', 'subphylum': 'Vertebrata', 'subphylum_common': 'Vertebrates', 'tribe': 'Sciurini', 'tribe_common': 'Typical Squirrels'}, {'class': 'Mammalia', 'class_common': 'Mammals', 'confidence': 16.79779589176178, 'family': 'Sciuridae', 'family_common': 'Squirrels', 'genus': 'Sciurus', 'genus_common': 'Tree Squirrels', 'kingdom': 'Animalia', 'kingdom_common': 'Animals', 'order': 'Rodentia', 'order_common': 'Rodents', 'phylum': 'Chordata', 'phylum_common': 'Chordates', 'species': 'Sciurus aberti'

### Get an image from Waarneming.nl and get the classification for it

In [11]:
# Example:
image_url = "https://waarneming.nl/media/photo/004/240/4240808.jpg"

image_r = rq.get(image_url, stream=True)

temp_img = open('temp_img.jpg', 'wb')
temp_img.write(image_r.content)
temp_img.close()

image_data = open('temp_img.jpg', 'rb')
classify_and_display_results(image_data)


In [12]:
# Next, get the predictions for all images in the list!
# First some settings:
waarneming_url = "https://waarneming.nl/fotonew/"
df = pd.read_csv("BirdPhotoIDs.txt", header=0)

# Add headers where the new predictions will be filled in:
header_list = ['ScientificPred0', 'CommonPred0', 'ProbPred0',
               'ScientificPred1', 'CommonPred1', 'ProbPred1',
               'ScientificPred2', 'CommonPred2', 'ProbPred2',
               'ScientificPred3', 'CommonPred3', 'ProbPred3',
               'ScientificPred4', 'CommonPred4', 'ProbPred4']
for header in header_list:
    df[header] = ''
print(df.head())


           ID              Scientific ScientificPred0 CommonPred0 ProbPred0  \
0  9/32670539  Tachybaptus ruficollis                                         
1  8/32664908  Tachybaptus ruficollis                                         
2  0/32663960  Tachybaptus ruficollis                                         
3  8/32662888  Tachybaptus ruficollis                                         
4  3/32660353  Tachybaptus ruficollis                                         

  ScientificPred1 CommonPred1 ProbPred1 ScientificPred2 CommonPred2 ProbPred2  \
0                                                                               
1                                                                               
2                                                                               
3                                                                               
4                                                                               

  ScientificPred3 CommonPred3 ProbPred

In [18]:
# Now get predictions for each of the images:
for row_i in range(len(df.index)):
    # Assemble the hyperlink to the image and request it:
    photo_id = df.loc[row_i, 'ID']
    image_url = waarneming_url + photo_id + '.jpg'
    image_r = rq.get(image_url, stream=True)
    
    # Store the image:
    temp_img = open('temp_img.jpg', 'wb')
    temp_img.write(image_r.content)
    temp_img.close()
    
    # Retrieve the AI4Earth prediction for the image:
    image_data = open('temp_img.jpg', 'rb')
    result = get_api_response(image_data)
    
    # Dissect the response:
    predictions = result[0]['predictions']
    is_first_item = True 
    for pred_i, item in enumerate(predictions):
        species, species_common, prob = take_info_from_prediction(item)
        if pred_i == 0:
            print(row_i, ":", df.loc[row_i, 'Scientific'], "| Prediction:", species)
        df.at[row_i, 'ScientificPred'+str(pred_i)] = species
        df.at[row_i, 'CommonPred'+str(pred_i)] = species_common
        df.at[row_i, 'ProbPred'+str(pred_i)] = prob


0 : Tachybaptus ruficollis | Prediction: Tachybaptus ruficollis
1 : Tachybaptus ruficollis | Prediction: Tachybaptus ruficollis
2 : Tachybaptus ruficollis | Prediction: Oxyura jamaicensis
3 : Tachybaptus ruficollis | Prediction: Podiceps grisegena
4 : Tachybaptus ruficollis | Prediction: Oxyura jamaicensis
5 : Tachybaptus ruficollis | Prediction: Tachybaptus ruficollis
6 : Tachybaptus ruficollis | Prediction: Tachybaptus ruficollis
7 : Tachybaptus ruficollis | Prediction: Tachybaptus ruficollis
8 : Tachybaptus ruficollis | Prediction: Podilymbus podiceps
9 : Tachybaptus ruficollis | Prediction: Podiceps grisegena
10 : Tachybaptus ruficollis | Prediction: Tachybaptus dominicus
11 : Tachybaptus ruficollis | Prediction: Tachybaptus ruficollis
12 : Tachybaptus ruficollis | Prediction: Podiceps nigricollis
13 : Tachybaptus ruficollis | Prediction: Podiceps auritus
14 : Tachybaptus ruficollis | Prediction: Tachybaptus ruficollis
15 : Tachybaptus ruficollis | Prediction: Tachybaptus dominicus

136 : Nycticorax nycticorax | Prediction: Nycticorax nycticorax
137 : Nycticorax nycticorax | Prediction: Nycticorax nycticorax
138 : Nycticorax nycticorax | Prediction: Nycticorax nycticorax
139 : Nycticorax nycticorax | Prediction: Nycticorax nycticorax
140 : Nycticorax nycticorax | Prediction: Nycticorax nycticorax
141 : Nycticorax nycticorax | Prediction: Nycticorax nycticorax
142 : Nycticorax nycticorax | Prediction: Nycticorax nycticorax
143 : Nycticorax nycticorax | Prediction: Nycticorax nycticorax
144 : Egretta garzetta | Prediction: Egretta garzetta
145 : Egretta garzetta | Prediction: Ardea alba
146 : Egretta garzetta | Prediction: Egretta garzetta
147 : Egretta garzetta | Prediction: Egretta garzetta
148 : Egretta garzetta | Prediction: Egretta garzetta
149 : Egretta garzetta | Prediction: Egretta garzetta
150 : Egretta garzetta | Prediction: Egretta garzetta
151 : Egretta garzetta | Prediction: Egretta garzetta
152 : Egretta garzetta | Prediction: Egretta garzetta
153 : Eg

KeyboardInterrupt: 

In [43]:
# Now get predictions for each of the images:
for row_i in range(4272, len(df.index)):
    # Assemble the hyperlink to the image and request it:
    photo_id = df.loc[row_i, 'ID']
    image_url = waarneming_url + photo_id + '.jpg'
    image_r = rq.get(image_url, stream=True)
    
    # Store the image:
    temp_img = open('temp_img.jpg', 'wb')
    temp_img.write(image_r.content)
    temp_img.close()
    
    # Retrieve the AI4Earth prediction for the image:
    image_data = open('temp_img.jpg', 'rb')
    
    result = get_api_response(image_data)
#     fault = True
#     while fault:
#         result = get_api_response(image_data)
#         fault = result[1]
    
    # Dissect the response:
    predictions = result[0]['predictions']
    is_first_item = True 
    for pred_i, item in enumerate(predictions):
        species, species_common, prob = take_info_from_prediction(item)
        if pred_i == 0:
            print(row_i, ":", df.loc[row_i, 'Scientific'], "| Prediction:", species)
        df.at[row_i, 'ScientificPred'+str(pred_i)] = species
        df.at[row_i, 'CommonPred'+str(pred_i)] = species_common
        df.at[row_i, 'ProbPred'+str(pred_i)] = prob
    
    if row_i % 50 == 0:  # Save it every now and then:
        df.to_csv("PredictedSpecies.txt", index=False)

# Save it at the end:        
df.to_csv("PredictedSpecies.txt", index=False)


4272 : Acanthis flammea | Prediction: Acanthis flammea
4273 : Acanthis flammea | Prediction: Acanthis flammea
4274 : Acanthis flammea | Prediction: Setophaga coronata auduboni
4275 : Acanthis flammea | Prediction: Acanthis flammea
4276 : Acanthis flammea | Prediction: Dumetella carolinensis
4277 : Acanthis flammea | Prediction: Acanthis flammea
4278 : Acanthis flammea | Prediction: Acanthis flammea
4279 : Acanthis flammea | Prediction: Acanthis flammea
4280 : Acanthis flammea | Prediction: Acanthis flammea
4281 : Acanthis flammea | Prediction: Acanthis flammea
4282 : Acanthis flammea | Prediction: Spinus pinus
4283 : Acanthis flammea | Prediction: Acanthis flammea
4284 : Acanthis flammea | Prediction: Betula populifolia
4285 : Acanthis flammea | Prediction: Fringilla montifringilla
4286 : Acanthis flammea | Prediction: Acanthis flammea
4287 : Acanthis flammea | Prediction: Falco tinnunculus
4288 : Acanthis flammea | Prediction: Acanthis flammea
4289 : Acanthis flammea | Prediction: Phy

In [29]:
df.to_csv("PredictedSpecies.txt", index=False)
# 1601 and 1602 went wrong, 3207, 4271
