# Retrieving species IDs and scientific names from Waarneming.nl
## By Emma Vriezen | Created on: 2020-11-19 | Last edit on: 2020-12-21

In [1]:
# Imports:
import pandas as pd  # DataFrames
import requests as rq  # Send HTTP requests to servers
import re  # Regular expressions
import time # Sleep function to allow for waiting between requests


In [2]:
# Import the bird data, which is now merely a list of 298 species:
bd_path = r"C:\Users\emmav\Documents\AI Master year 1\TMM\Project\BirdInfo.txt"
bd = pd.read_csv(bd_path, header=0, encoding='utf-16')
bd.head()


Unnamed: 0,Species
0,Dodaars
1,Fuut
2,Roodhalsfuut
3,Geoorde Fuut
4,Aalscholver


### Example request to Waarneming.nl

In [3]:
# Show the response:
url = "https://waarneming.nl/species/search"
species = bd.loc[238,'Species']
parameters = {'species_group': '1', 'q': species}
response = rq.get(url, parameters)  # From the server, contains species number
print(response.text)


<!DOCTYPE html>
<html lang="nl">
<head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    
        <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
    

    <meta name="format-detection" content="telephone=no">
    <title>Zoek een soort
        - Waarneming.nl</title>

    

<link rel="apple-touch-icon" sizes="180x180" href="/static/favicon/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/static/favicon/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="/static/favicon/favicon-16x16.png">
<link rel="manifest" href="/site.webmanifest">
<link rel="mask-icon" href="/static/favicon/safari-pinned-tab.svg" color="#0066b1">
<link rel="shortcut icon" href="/static/favicon/favicon.ico">
<meta name="msapplication-TileColor" content="#0066b1">
<meta name="msapplication-config" content="/static/favicon/browserconfig.xml">
<meta name="theme-color" content="#ffffff">


  

In [14]:
# Extract the species ID from the example:

# Without scientific name:
# regex = '1\W*</td>\W*<td\s[^"]*"' + species + '">\W*<a\shref="/species/(\d*)/">'
# species_nr = re.findall(regex, response.text)[0]

# With scientific name:
regex = '1\W*</td>\W*<td\s[^"]*"' + species + '">\W*<a\shref="/species/(\d*)/"><[^>]*>' + species + '</span>\s-\s<[^>]*>([^<]*)</i>'
print(regex)
(species_nr, sc_name) = re.findall(regex, response.text)[0]  # Integer that is the index for the current species
print(species_nr)
print(sc_name)


1\W*</td>\W*<td\s[^"]*"Griel">\W*<a\shref="/species/(\d*)/"><[^>]*>Griel</span>\s-\s<[^>]*>([^<]*)</i>
21
Burhinus oedicnemus


In [15]:
# Add species number to the dataframe:
bd.loc[238,'Scientific'] = sc_name
bd.loc[238,'ID'] = species_nr
print(bd.iloc[238])
bd.head()


Species                     Griel
Scientific    Burhinus oedicnemus
ID                             21
Name: 238, dtype: object


Unnamed: 0,Species,Scientific,ID
0,Dodaars,,
1,Fuut,,
2,Roodhalsfuut,,
3,Geoorde Fuut,,
4,Aalscholver,,


### Obtain IDs for all species

In [16]:
# Loop over all species in the database:
url = "https://waarneming.nl/species/search"

for i, species in enumerate(bd.loc[:,'Species']):
    parameters = {'species_group': '1', 'q': species}
    response = rq.get(url, parameters)
    regex = ('1\W*</td>\W*<td\s[^"]*"'
             + species
             + '">\W*<a\shref="/species/(\d*)/"><[^>]*>'
             + species
             + '</span>\s-\s<[^>]*>([^<]*)</i>'
            ).replace("'", "&#x27;")
    (species_nr, sc_name) = re.findall(regex, response.text)[0]
    bd.loc[i,'Scientific'] = sc_name
    bd.loc[i,'ID'] = species_nr
    print(bd.iloc[i])
    time.sleep(4.95)


Species                      Dodaars
Scientific    Tachybaptus ruficollis
ID                                 2
Name: 0, dtype: object
Species                     Fuut
Scientific    Podiceps cristatus
ID                            91
Name: 1, dtype: object
Species             Roodhalsfuut
Scientific    Podiceps grisegena
ID                           367
Name: 2, dtype: object
Species               Geoorde Fuut
Scientific    Podiceps nigricollis
ID                               3
Name: 3, dtype: object
Species               Aalscholver
Scientific    Phalacrocorax carbo
ID                             58
Name: 4, dtype: object
Species                 Roerdomp
Scientific    Botaurus stellaris
ID                             1
Name: 5, dtype: object
Species                  Woudaap
Scientific    Ixobrychus minutus
ID                             4
Name: 6, dtype: object
Species                        Kwak
Scientific    Nycticorax nycticorax
ID                                5
Name: 7, dtype: o

Species              Grutto
Scientific    Limosa limosa
ID                       26
Name: 70, dtype: object
Species                   Wulp
Scientific    Numenius arquata
ID                         203
Name: 71, dtype: object
Species             Tureluur
Scientific    Tringa totanus
ID                        27
Name: 72, dtype: object
Species               Oeverloper
Scientific    Actitis hypoleucos
ID                           154
Name: 73, dtype: object
Species                    Zwartkopmeeuw
Scientific    Ichthyaetus melanocephalus
ID                                   363
Name: 74, dtype: object
Species                 Dwergmeeuw
Scientific    Hydrocoloeus minutus
ID                              84
Name: 75, dtype: object
Species                         Kokmeeuw
Scientific    Chroicocephalus ridibundus
ID                                   138
Name: 76, dtype: object
Species        Stormmeeuw
Scientific    Larus canus
ID                    183
Name: 77, dtype: object
Species       Kl

Species       Cetti's Zanger
Scientific      Cettia cetti
ID                       237
Name: 139, dtype: object
Species               Graszanger
Scientific    Cisticola juncidis
ID                          1111
Name: 140, dtype: object
Species        Sprinkhaanzanger
Scientific    Locustella naevia
ID                          330
Name: 141, dtype: object
Species                          Snor
Scientific    Locustella luscinioides
ID                                 47
Name: 142, dtype: object
Species                       Rietzanger
Scientific    Acrocephalus schoenobaenus
ID                                    48
Name: 143, dtype: object
Species                Bosrietzanger
Scientific    Acrocephalus palustris
ID                                75
Name: 144, dtype: object
Species               Kleine Karekiet
Scientific    Acrocephalus scirpaceus
ID                                130
Name: 145, dtype: object
Species                  Grote Karekiet
Scientific    Acrocephalus arundinaceus
I

Species             Kuifduiker
Scientific    Podiceps auritus
ID                         286
Name: 206, dtype: object
Species       Noordse Stormvogel
Scientific    Fulmarus glacialis
ID                           298
Name: 207, dtype: object
Species       Grauwe Pijlstormvogel
Scientific           Ardenna grisea
ID                             1388
Name: 208, dtype: object
Species       Noordse Pijlstormvogel
Scientific         Puffinus puffinus
ID                               297
Name: 209, dtype: object
Species         Vale Pijlstormvogel
Scientific    Puffinus mauretanicus
ID                              369
Name: 210, dtype: object
Species              Stormvogeltje
Scientific    Hydrobates pelagicus
ID                             335
Name: 211, dtype: object
Species          Vaal Stormvogeltje
Scientific    Oceanodroma leucorhoa
ID                              343
Name: 212, dtype: object
Species         Jan-van-gent
Scientific    Morus bassanus
ID                       267
Name: 

Species              Witvleugelstern
Scientific    Chlidonias leucopterus
ID                               361
Name: 272, dtype: object
Species          Zeekoet
Scientific    Uria aalge
ID                   354
Name: 273, dtype: object
Species              Alk
Scientific    Alca torda
ID                   215
Name: 274, dtype: object
Species       Zwarte Zeekoet
Scientific    Cepphus grylle
ID                      1538
Name: 275, dtype: object
Species       Kleine Alk
Scientific     Alle alle
ID                   270
Name: 276, dtype: object
Species           Papegaaiduiker
Scientific    Fratercula arctica
ID                           304
Name: 277, dtype: object
Species            Strandleeuwerik
Scientific    Eremophila alpestris
ID                             336
Name: 278, dtype: object
Species          Grote Pieper
Scientific    Anthus richardi
ID                        757
Name: 279, dtype: object
Species        Roodkeelpieper
Scientific    Anthus cervinus
ID                     

In [17]:
# Export the list:
bd.to_csv("BirdIDs.txt", index=False)
