In [4]:
import requests
import json
import time

# List of all provinces (departments) in Colombia
provinces_colombia = [
    "Amazonas", "Antioquia", "Arauca", "Atlántico", "Bolívar", "Boyacá", "Caldas",
    "Caquetá", "Casanare", "Cauca", "Cesar", "Chocó", "Córdoba", "Cundinamarca", 
    "Guainía", "Guaviare", "Huila", "La Guajira", "Magdalena", "Meta", "Nariño", 
    "Norte de Santander", "Putumayo", "Quindío", "Risaralda", "San Andrés y Providencia", 
    "Santander", "Sucre", "Tolima", "Valle del Cauca", "Vaupés", "Vichada"
]

base_url = "https://api.gbif.org/v1/occurrence/search?country=CO&kingdomKey=5&limit=300&stateProvince="

all_results = []

for province in provinces_colombia:
    print(f"Starting data fetch for {province}...")
    offset = 0

    previous_results = None

    while True:
        try:
            response = requests.get(f"{base_url}{province}&offset={offset}")
            #print("RESPONSE: ", response)
            response.raise_for_status()  # Check if the request was successful
            
            data = response.json()

            #print("####DATA####: ", data)

            # Check if 'results' key exists in the response
            if 'results' not in data:
                print(f"Error: 'results' key missing in data for {province} at offset {offset}.")
                break

            # If the results are the same as the previous fetch, break
            if previous_results == data['results']:
                print(f"Detected repeated records for {province}. Stopping fetch.")
                break

            all_results.extend(data['results'])
            print(f"Fetched {len(data['results'])} records for {province} at offset {offset}.")
            
            # Store the current results for comparison in the next iteration
            previous_results = data['results']

            # Check the endOfRecords key to determine if more records are available
            if data.get('endOfRecords', False):
                print(f"Completed data fetch for {province}.")
                break

            offset += 300

            # Adding a delay to respect rate limits
            time.sleep(20)
        
        except requests.RequestException as e:
            print(f"Error fetching data for {province} at offset {offset}: {e}. Trying next province...")
            break

# Save all results to a file
output_path = "all_data_colombia.json"
with open(output_path, "w") as f:
    json.dump(all_results, f)

print(f"Data saved to {output_path}")

Starting data fetch for Amazonas...
Fetched 98 records for Amazonas at offset 0.
Completed data fetch for Amazonas.
Starting data fetch for Antioquia...
Fetched 300 records for Antioquia at offset 0.
Fetched 300 records for Antioquia at offset 300.
Fetched 300 records for Antioquia at offset 600.
Fetched 300 records for Antioquia at offset 900.
Fetched 300 records for Antioquia at offset 1200.
Fetched 300 records for Antioquia at offset 1500.
Fetched 300 records for Antioquia at offset 1800.
Fetched 300 records for Antioquia at offset 2100.
Fetched 300 records for Antioquia at offset 2400.
Fetched 300 records for Antioquia at offset 2700.
Fetched 300 records for Antioquia at offset 3000.
Fetched 300 records for Antioquia at offset 3300.
Fetched 300 records for Antioquia at offset 3600.
Fetched 300 records for Antioquia at offset 3900.
Fetched 300 records for Antioquia at offset 4200.
Fetched 300 records for Antioquia at offset 4500.
Fetched 300 records for Antioquia at offset 4800.
Fet