Imports

In [None]:
import requests
import math
import time
import pandas as pd
import numpy as np

from datetime import date
from dateutil.relativedelta import relativedelta

Universal Parameters


In [None]:
PLACE_ID = 6986  # South Africa

# The last 5 years of data
today = date.today()
D2 = today.isoformat()
D1 = (today - relativedelta(years=5)).isoformat()



API fetch function:

In [None]:
def fetch_inat_observations(
    taxon_id: int,
    place_id: int = PLACE_ID,
    d1: str | None = None,
    d2: str | None = None,
    verifiable: bool = True,
    per_page: int = 200,
    max_records: int = 50000,
    sleep_sec: float = 0.2,
) -> pd.DataFrame:
    # Defaults to your rolling 5-year window if not provided
    if d1 is None: d1 = D1
    if d2 is None: d2 = D2
    per_page = min(max(1, per_page), 200)

    base = "https://api.inaturalist.org/v1/observations"
    common_params = {
        "taxon_id": taxon_id,
        "place_id": place_id,
        "verifiable": "true" if verifiable else "false",
        "d1": d1, "d2": d2,
    }

    # Probe total
    probe = requests.get(base, params={**common_params, "page": 1, "per_page": 1}, timeout=30)
    probe.raise_for_status()
    total = probe.json().get("total_results", 0)
    print(f"[iNat] taxon_id={taxon_id}, place_id={place_id}, {d1}→{d2} | total verifiable: {total}")
    if total == 0:
        return pd.DataFrame()

    # Paginate
    rows = []
    pages = min(math.ceil(total / per_page), math.ceil(max_records / per_page))
    for page in range(1, pages + 1):
        r = requests.get(base, params={**common_params, "page": page, "per_page": per_page}, timeout=60)
        r.raise_for_status()
        rows.extend(r.json().get("results", []))
        if len(rows) >= max_records:
            break
        time.sleep(sleep_sec)

    raw_df = pd.json_normalize(rows)
    print(f"Fetched rows: {len(raw_df)}")
    return raw_df


Cleaning Function:

In [None]:
KEEP_COLS = [
    "taxon.id",
    "taxon.name",
    "taxon.preferred_common_name",
    "longitude",
    "latitude",
    "observed_on_details.date",
    "observed_on_details.hour",
    "place_ids",
]

def clean_inat_minimal(raw_df: pd.DataFrame) -> pd.DataFrame:
    """Keep only selected iNat columns; derive lon/lat from 'location' ('lat,lon')."""
    out = raw_df.copy()

    if "location" in out.columns:
        loc = out["location"].astype(str).str.split(",", n=1, expand=True)
        out["latitude"]  = pd.to_numeric(loc[0].str.strip(), errors="coerce")
        out["longitude"] = pd.to_numeric(loc[1].str.strip(), errors="coerce")
    else:
        out["latitude"]  = np.nan
        out["longitude"] = np.nan

    for c in KEEP_COLS:
        if c not in out.columns:
            out[c] = np.nan


    return out[KEEP_COLS].reset_index(drop=True)



Fetching data for each animal:

1. Lion:


In [None]:
TAXON_ID = 41964
raw_df = fetch_inat_observations(TAXON_ID)

print(len(raw_df))
raw_df.head(2)


[iNat] taxon_id=41964, place_id=6986, 2020-10-15→2025-10-15 | total verifiable: 2495
Fetched rows: 2495
2495


Unnamed: 0,quality_grade,time_observed_at,taxon_geoprivacy,annotations,uuid,id,cached_votes_total,identifications_most_agree,species_guess,identifications_most_disagree,...,user.journal_posts_count,user.activity_count,user.species_count,user.annotated_observations_count,user.universal_search_rank,user.roles,user.icon_url,user.preferences.prefers_project_addition_by,user.preferences.prefers_community_taxa,user.preferences.prefers_observation_fields_by
0,research,2025-10-13T09:44:22+02:00,obscured,[],16396bac-715a-4fd8-a986-a03b03f73c9d,321007782,0,True,Lion,False,...,0,437,370,0.0,436,[],https://static.inaturalist.org/attachments/use...,,,
1,research,2025-10-15T06:05:50+02:00,obscured,[],e02f0ee1-e1ec-4a21-9cec-d75951c0aa9d,321001872,0,True,Southern Lion,False,...,0,8277,1643,0.0,7106,[],,,,


In [None]:
lion_df = clean_inat_minimal(raw_df)
print(lion_df.shape)
lion_df.head()

(2495, 8)


Unnamed: 0,taxon.id,taxon.name,taxon.preferred_common_name,longitude,latitude,observed_on_details.date,observed_on_details.hour,place_ids
0,41964,Panthera leo,Lion,32.20007,-27.734227,2025-10-13,9,"[6986, 13313, 48462, 59647, 91708, 97392, 1081..."
1,557401,Panthera leo melanochaita,Southern Lion,31.966613,-24.480334,2025-10-15,6,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
2,557401,Panthera leo melanochaita,Southern Lion,31.900091,-24.572784,2025-10-15,5,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
3,557401,Panthera leo melanochaita,Southern Lion,31.579412,-24.875755,2025-08-15,7,"[6986, 7478, 48525, 57957, 59647, 91708, 97392..."
4,557401,Panthera leo melanochaita,Southern Lion,31.908049,-25.278137,2025-08-07,17,"[6986, 7478, 48532, 57957, 59647, 69020, 91708..."


2. Warthog:

In [None]:
TAXON_ID = 42122
raw_df = fetch_inat_observations(TAXON_ID)

print(len(raw_df))
raw_df.head(2)

[iNat] taxon_id=42122, place_id=6986, 2020-10-15→2025-10-15 | total verifiable: 2661
Fetched rows: 2661
2661


Unnamed: 0,quality_grade,time_observed_at,taxon_geoprivacy,annotations,uuid,id,cached_votes_total,identifications_most_agree,species_guess,identifications_most_disagree,...,user.icon_url,taxon.establishment_means.id,taxon.establishment_means.place_id,taxon.establishment_means.user_id,taxon.establishment_means.occurrence_status_level,taxon.establishment_means.establishment_means,taxon.preferred_establishment_means,user.preferences.prefers_project_addition_by,user.preferences.prefers_observation_fields_by,user.preferences.prefers_community_taxa
0,research,2025-10-08T11:36:38+02:00,open,[],f45150d2-9411-4b44-a967-41e4fa952c9a,320891360,0,True,Southern Warthog,False,...,,,,,,,,,,
1,research,2025-10-08T09:18:15+02:00,open,[],53723da7-76b4-4dfa-8be0-8716c9992158,320890370,0,True,Southern Warthog,False,...,,,,,,,,,,


In [None]:
warthog_df = clean_inat_minimal(raw_df)
print(warthog_df.shape)
warthog_df.head()

(2661, 8)


Unnamed: 0,taxon.id,taxon.name,taxon.preferred_common_name,longitude,latitude,observed_on_details.date,observed_on_details.hour,place_ids
0,531646,Phacochoerus africanus sundevallii,Southern Warthog,25.770331,-33.446211,2025-10-08,11,"[6986, 8872, 48322, 51115, 59647, 69022, 91708..."
1,531646,Phacochoerus africanus sundevallii,Southern Warthog,25.661428,-33.584344,2025-10-08,9,"[6986, 8872, 48351, 51115, 59647, 91708, 97392..."
2,531646,Phacochoerus africanus sundevallii,Southern Warthog,31.686009,-24.392211,2025-10-14,17,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
3,531646,Phacochoerus africanus sundevallii,Southern Warthog,31.89353,-25.358438,2025-10-01,10,"[6986, 7478, 48532, 57957, 59647, 69020, 91708..."
4,531646,Phacochoerus africanus sundevallii,Southern Warthog,31.827906,-28.230942,2025-10-04,16,"[6986, 13313, 48440, 59647, 71275, 91708, 9739..."


3. Elephant:

In [None]:
TAXON_ID = 43694
raw_df = fetch_inat_observations(TAXON_ID)

print(len(raw_df))
raw_df.head(2)

[iNat] taxon_id=43694, place_id=6986, 2020-10-15→2025-10-15 | total verifiable: 4701
Fetched rows: 4701
4701


Unnamed: 0,quality_grade,time_observed_at,taxon_geoprivacy,annotations,uuid,id,cached_votes_total,identifications_most_agree,species_guess,identifications_most_disagree,...,user.activity_count,user.species_count,user.annotated_observations_count,user.universal_search_rank,user.roles,user.icon_url,user.preferences.prefers_project_addition_by,user.preferences.prefers_observation_fields_by,user.preferences.prefers_community_taxa,observation_sounds
0,research,2025-10-14T16:52:52+02:00,obscured,[],b16d45c2-2829-4f73-bd54-d3b8e05103ab,320885819,0,True,African Savanna Elephant,False,...,866,397,92.0,619,[],https://static.inaturalist.org/attachments/use...,,,,
1,needs_id,2025-10-13T17:07:45+02:00,obscured,[],dff7d5b3-ebff-4a83-acd1-1a23f4b48996,320658052,0,False,,False,...,183,165,0.0,183,[],,,,,


In [None]:
elephant_df = clean_inat_minimal(raw_df)
print(elephant_df.shape)
elephant_df.head()

(4701, 8)


Unnamed: 0,taxon.id,taxon.name,taxon.preferred_common_name,longitude,latitude,observed_on_details.date,observed_on_details.hour,place_ids
0,43694,Loxodonta africana,African Savanna Elephant,31.553999,-24.417992,2025-10-14,16,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
1,43694,Loxodonta africana,African Savanna Elephant,31.748145,-23.90933,2025-10-13,17,"[6986, 9074, 48488, 53299, 59647, 69020, 91708..."
2,43694,Loxodonta africana,African Savanna Elephant,31.631425,-25.1532,2025-10-05,10,"[6986, 7478, 48532, 57957, 59647, 69020, 91708..."
3,43694,Loxodonta africana,African Savanna Elephant,31.855951,-28.25999,2025-10-04,19,"[6986, 13313, 59647, 71275, 91708, 97392, 1081..."
4,43694,Loxodonta africana,African Savanna Elephant,31.964403,-28.360771,2025-10-04,19,"[6986, 13313, 59647, 71275, 91708, 97392, 1081..."


4. Cheetah:

In [None]:
TAXON_ID = 41955
raw_df = fetch_inat_observations(TAXON_ID)

print(len(raw_df))
raw_df.head(2)

[iNat] taxon_id=41955, place_id=6986, 2020-10-15→2025-10-15 | total verifiable: 708
Fetched rows: 708
708


Unnamed: 0,quality_grade,time_observed_at,taxon_geoprivacy,annotations,uuid,id,cached_votes_total,identifications_most_agree,species_guess,identifications_most_disagree,...,taxon.conservation_status.iucn,taxon.establishment_means.id,taxon.establishment_means.place_id,taxon.establishment_means.user_id,taxon.establishment_means.occurrence_status_level,taxon.establishment_means.establishment_means,taxon.preferred_establishment_means,user.preferences.prefers_project_addition_by,user.preferences.prefers_community_taxa,user.preferences.prefers_observation_fields_by
0,research,2025-10-06T07:36:48+02:00,obscured,[],5c96b695-be0e-4c41-b0a7-895fc2a482cb,320862049,0,True,Southern and Eastern African Cheetah,False,...,,,,,,,,,,
1,research,,obscured,[],8958485d-ecb9-4b22-ac85-314f34a641a3,320127038,0,True,Southern Cheetah,False,...,,,,,,,,,,


In [None]:
cheetah_df = clean_inat_minimal(raw_df)
print(cheetah_df.shape)
cheetah_df.head()

(708, 8)


Unnamed: 0,taxon.id,taxon.name,taxon.preferred_common_name,longitude,latitude,observed_on_details.date,observed_on_details.hour,place_ids
0,606958,Acinonyx jubatus jubatus,Southern and Eastern African Cheetah,32.107006,-27.834913,2025-10-06,7,"[6986, 13313, 48462, 59647, 91708, 97392, 1081..."
1,606958,Acinonyx jubatus jubatus,Southern and Eastern African Cheetah,25.40162,-32.150473,2025-10-05,0,"[6986, 8872, 48332, 59647, 91708, 97392, 10810..."
2,606958,Acinonyx jubatus jubatus,Southern and Eastern African Cheetah,30.899824,-23.849435,2025-09-10,8,"[6986, 9074, 48488, 53299, 59647, 91708, 97392..."
3,606958,Acinonyx jubatus jubatus,Southern and Eastern African Cheetah,31.572864,-23.834812,2024-04-01,8,"[6986, 9074, 48488, 53299, 59647, 69020, 91708..."
4,606958,Acinonyx jubatus jubatus,Southern and Eastern African Cheetah,31.574401,-25.395065,2025-10-03,9,"[6986, 7478, 48532, 57957, 59647, 69020, 91708..."


5. Giraffe:





In [None]:
TAXON_ID = 1510571
raw_df = fetch_inat_observations(TAXON_ID)

print(len(raw_df))
raw_df.head(2)

[iNat] taxon_id=1510571, place_id=6986, 2020-10-15→2025-10-15 | total verifiable: 3860
Fetched rows: 3860
3860


Unnamed: 0,quality_grade,time_observed_at,taxon_geoprivacy,annotations,uuid,id,cached_votes_total,identifications_most_agree,species_guess,identifications_most_disagree,...,user.icon_url,taxon.establishment_means.id,taxon.establishment_means.place_id,taxon.establishment_means.user_id,taxon.establishment_means.occurrence_status_level,taxon.establishment_means.establishment_means,taxon.preferred_establishment_means,user.preferences.prefers_project_addition_by,user.preferences.prefers_observation_fields_by,user.preferences.prefers_community_taxa
0,research,2025-10-13T08:40:11+02:00,open,[],b64e4332-0d51-46a4-b047-9b859f71b44b,320993365,0,True,South African Giraffe,False,...,https://static.inaturalist.org/attachments/use...,,,,,,,,,
1,research,2025-10-14T15:51:57+02:00,open,[],242101d5-b5e7-4f58-8d8d-9160e2319661,320885317,0,True,South African Giraffe,False,...,https://static.inaturalist.org/attachments/use...,,,,,,,,,


In [None]:
giraffe_df = clean_inat_minimal(raw_df)
print(giraffe_df.shape)
giraffe_df.head()

(3860, 8)


Unnamed: 0,taxon.id,taxon.name,taxon.preferred_common_name,longitude,latitude,observed_on_details.date,observed_on_details.hour,place_ids
0,1510582,Giraffa giraffa giraffa,South African Giraffe,31.43662,-25.40287,2025-10-13,8,"[6986, 7478, 48532, 57957, 59647, 69020, 91708..."
1,1510582,Giraffa giraffa giraffa,South African Giraffe,31.483228,-24.437263,2025-10-14,15,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
2,1510571,Giraffa giraffa,Southern Giraffe,22.147994,-34.02525,2025-10-04,13,"[6986, 6987, 48651, 59647, 91708, 97392, 10812..."
3,1510571,Giraffa giraffa,Southern Giraffe,31.630257,-23.822058,2025-10-13,17,"[6986, 9074, 48488, 53299, 59647, 69020, 91708..."
4,1510582,Giraffa giraffa giraffa,South African Giraffe,31.213092,-25.172888,2025-10-05,15,"[6986, 7478, 48524, 57957, 59647, 69020, 91708..."


6. Hippo:

In [None]:
TAXON_ID = 42149
raw_df = fetch_inat_observations(TAXON_ID)

print(len(raw_df))
raw_df.head(2)

[iNat] taxon_id=42149, place_id=6986, 2020-10-15→2025-10-15 | total verifiable: 2445
Fetched rows: 2445
2445


Unnamed: 0,quality_grade,time_observed_at,taxon_geoprivacy,annotations,uuid,id,cached_votes_total,identifications_most_agree,species_guess,identifications_most_disagree,...,user.journal_posts_count,user.activity_count,user.species_count,user.annotated_observations_count,user.universal_search_rank,user.roles,user.icon_url,user.preferences.prefers_project_addition_by,user.preferences.prefers_community_taxa,user.preferences.prefers_observation_fields_by
0,research,2025-09-04T03:49:00+02:00,,[],9b124681-65e2-47f7-97a5-6ebc4afa6f1a,320805742,0,True,Hipopótamo,False,...,0,114,94,0.0,99,[],,,,
1,research,2025-07-16T15:14:00+02:00,,[],c2bfa6ba-19aa-43cd-a6fc-166a0b10edfd,320689614,0,True,Common Hippopotamus,False,...,0,111,72,0.0,76,[],,,,


In [None]:
hippo_df = clean_inat_minimal(raw_df)
print(hippo_df.shape)
hippo_df.head()

(2445, 8)


Unnamed: 0,taxon.id,taxon.name,taxon.preferred_common_name,longitude,latitude,observed_on_details.date,observed_on_details.hour,place_ids
0,42149,Hippopotamus amphibius,Common Hippopotamus,31.915379,-25.11986,2025-09-04,3,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
1,42149,Hippopotamus amphibius,Common Hippopotamus,30.58529,-24.225022,2025-07-16,15,"[6986, 9074, 48477, 53299, 59647, 91708, 97392..."
2,42149,Hippopotamus amphibius,Common Hippopotamus,31.613224,-23.838688,2025-10-13,16,"[6986, 9074, 48488, 53299, 59647, 69020, 91708..."
3,42149,Hippopotamus amphibius,Common Hippopotamus,31.577587,-24.984697,2025-10-05,12,"[6986, 7478, 48532, 57957, 59647, 69020, 91708..."
4,42149,Hippopotamus amphibius,Common Hippopotamus,31.410626,-23.530782,2025-09-11,16,"[6986, 9074, 48488, 53299, 59647, 69020, 91708..."


7. Plains Zebra:

In [None]:
TAXON_ID = 43335
raw_df = fetch_inat_observations(TAXON_ID)

print(len(raw_df))
raw_df.head(2)

[iNat] taxon_id=43335, place_id=6986, 2020-10-15→2025-10-15 | total verifiable: 4345
Fetched rows: 4345
4345


Unnamed: 0,quality_grade,time_observed_at,taxon_geoprivacy,annotations,uuid,id,cached_votes_total,identifications_most_agree,species_guess,identifications_most_disagree,...,user.journal_posts_count,user.activity_count,user.species_count,user.annotated_observations_count,user.universal_search_rank,user.roles,user.icon_url,user.preferences.prefers_project_addition_by,user.preferences.prefers_observation_fields_by,user.preferences.prefers_community_taxa
0,needs_id,2025-10-13T16:08:47+02:00,,[],d1088e88-040e-42f5-9d01-829c96bf7d06,321005612,0,False,,False,...,0,437,370,0.0,436,[],https://static.inaturalist.org/attachments/use...,,,
1,research,2025-10-15T07:57:21+02:00,,[],dd1302bb-c36b-4167-a686-a33c13d5dde9,321001916,0,True,Plains Zebra,False,...,0,8277,1643,0.0,7106,[],,,,


In [None]:
plains_zebra_df = clean_inat_minimal(raw_df)
print(plains_zebra_df.shape)
plains_zebra_df.head()

(4345, 8)


Unnamed: 0,taxon.id,taxon.name,taxon.preferred_common_name,longitude,latitude,observed_on_details.date,observed_on_details.hour,place_ids
0,43335,Equus quagga,Plains Zebra,32.33803,-27.832153,2025-10-13,16,"[6986, 13313, 48462, 59647, 91708, 97392, 1081..."
1,43335,Equus quagga,Plains Zebra,31.989558,-24.365383,2025-10-15,7,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
2,43335,Equus quagga,Plains Zebra,31.454359,-24.975959,2025-10-15,10,"[6986, 7478, 48482, 57957, 59647, 91708, 97392..."
3,43335,Equus quagga,Plains Zebra,25.775011,-33.446122,2025-10-08,11,"[6986, 8872, 48322, 51115, 59647, 69022, 91708..."
4,43335,Equus quagga,Plains Zebra,31.699237,-24.391707,2025-10-14,17,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."


8. Mountain Zebra:

In [None]:
TAXON_ID = 43330
raw_df = fetch_inat_observations(TAXON_ID)

print(len(raw_df))
raw_df.head(2)

[iNat] taxon_id=43330, place_id=6986, 2020-10-15→2025-10-15 | total verifiable: 727
Fetched rows: 727
727


Unnamed: 0,quality_grade,time_observed_at,taxon_geoprivacy,annotations,uuid,id,cached_votes_total,identifications_most_agree,species_guess,identifications_most_disagree,...,user.identifications_count,user.journal_posts_count,user.activity_count,user.species_count,user.annotated_observations_count,user.universal_search_rank,user.roles,user.icon_url,user.preferences.prefers_project_addition_by,user.preferences.prefers_observation_fields_by
0,research,2025-10-14T08:44:21+02:00,open,[],fa216710-77ec-4ecc-a21d-bd9a9f7c8a7f,320986189,1,True,Mountain Zebra,False,...,131,0,2266,1175,355.0,2135,[],https://static.inaturalist.org/attachments/use...,,
1,research,2025-10-13T10:52:00+02:00,open,[],09ae463c-1e26-4231-b699-7d5cbe13e68a,320674519,0,True,Cape Mountain Zebra,False,...,1,0,36,30,0.0,35,[],,,


In [None]:
mountain_zebra_df = clean_inat_minimal(raw_df)
print(mountain_zebra_df.shape)
mountain_zebra_df.head()

(727, 8)


Unnamed: 0,taxon.id,taxon.name,taxon.preferred_common_name,longitude,latitude,observed_on_details.date,observed_on_details.hour,place_ids
0,43330,Equus zebra,Mountain Zebra,22.502972,-32.330037,2025-10-14,8,"[6986, 6987, 48630, 59647, 69023, 91708, 97392..."
1,132668,Equus zebra zebra,Cape Mountain Zebra,20.475947,-34.059425,2025-10-13,10,"[6986, 6987, 48663, 59647, 69028, 91708, 97392..."
2,132668,Equus zebra zebra,Cape Mountain Zebra,20.420317,-34.453768,2025-10-02,10,"[6986, 6987, 48632, 59647, 69351, 91708, 97392..."
3,132668,Equus zebra zebra,Cape Mountain Zebra,20.465855,-34.067412,2025-10-09,16,"[6986, 6987, 48663, 59647, 91708, 97392, 10813..."
4,132668,Equus zebra zebra,Cape Mountain Zebra,20.469004,-34.06438,2025-10-09,16,"[6986, 6987, 48663, 59647, 91708, 97392, 10813..."


Merge into one Animals df

In [None]:
animals_df = pd.concat( [lion_df, warthog_df, elephant_df, cheetah_df, giraffe_df, hippo_df, plains_zebra_df, mountain_zebra_df], ignore_index=True )
animals_df = animals_df.reindex(columns=KEEP_COLS)

Quick Look at the Animals df

In [None]:
# Quick peek before saving
print("Shape:", animals_df.shape)

# First few rows
display(animals_df.head(10))

# Counts by species (top 15)
if "taxon.preferred_common_name" in animals_df.columns:
    print("\nCounts by common name (top 15):")
    display(animals_df["taxon.preferred_common_name"].value_counts().head(15))

# Coordinate summary
if {"longitude","latitude"}.issubset(animals_df.columns):
    print("\nLongitude/Latitude summary:")
    display(animals_df[["longitude","latitude"]].describe())

# Date coverage
if "observed_on_details.date" in animals_df.columns:
    _dates = pd.to_datetime(animals_df["observed_on_details.date"], errors="coerce", utc=True)
    print("\nDate coverage:")
    print("  min:", _dates.min())
    print("  max:", _dates.max())
    print("  missing dates:", _dates.isna().sum())

# Quick missingness per column (percentage)
print("\nMissingness (% by column):")
display((animals_df.isna().mean()*100).round(1).sort_values(ascending=False))

# Very light duplicate check (same species, time (hour), and coords)
dupe_cols = ["taxon.id","observed_on_details.date","observed_on_details.hour","longitude","latitude"]
dupe_cols = [c for c in dupe_cols if c in animals_df.columns]
if dupe_cols:
    dupes = animals_df.duplicated(subset=dupe_cols).sum()
    print(f"\nPotential duplicate rows on {dupe_cols}: {dupes}")

Shape: (21942, 8)


Unnamed: 0,taxon.id,taxon.name,taxon.preferred_common_name,longitude,latitude,observed_on_details.date,observed_on_details.hour,place_ids
0,41964,Panthera leo,Lion,32.20007,-27.734227,2025-10-13,9,"[6986, 13313, 48462, 59647, 91708, 97392, 1081..."
1,557401,Panthera leo melanochaita,Southern Lion,31.966613,-24.480334,2025-10-15,6,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
2,557401,Panthera leo melanochaita,Southern Lion,31.900091,-24.572784,2025-10-15,5,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
3,557401,Panthera leo melanochaita,Southern Lion,31.579412,-24.875755,2025-08-15,7,"[6986, 7478, 48525, 57957, 59647, 91708, 97392..."
4,557401,Panthera leo melanochaita,Southern Lion,31.908049,-25.278137,2025-08-07,17,"[6986, 7478, 48532, 57957, 59647, 69020, 91708..."
5,557401,Panthera leo melanochaita,Southern Lion,31.598336,-25.169304,2025-10-07,9,"[6986, 7478, 48532, 57957, 59647, 69020, 91708..."
6,557401,Panthera leo melanochaita,Southern Lion,31.757075,-24.859772,2025-10-06,17,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
7,557401,Panthera leo melanochaita,Southern Lion,31.740621,-24.972518,2025-10-06,8,"[6986, 7478, 57957, 59647, 69020, 91708, 97392..."
8,557401,Panthera leo melanochaita,Southern Lion,30.852415,-23.989544,2025-09-14,11,"[6986, 9074, 48488, 53299, 59647, 91708, 97392..."
9,557401,Panthera leo melanochaita,Southern Lion,30.930098,-23.970731,2025-09-14,11,"[6986, 9074, 48488, 53299, 59647, 91708, 97392..."



Counts by common name (top 15):


Unnamed: 0_level_0,count
taxon.preferred_common_name,Unnamed: 1_level_1
African Savanna Elephant,4701
South African Giraffe,3328
Common Hippopotamus,2445
Southern Lion,2424
Southern Warthog,2114
Plains Zebra,1968
Chapman's Zebra,1368
Burchell's Zebra,1009
Southern and Eastern African Cheetah,687
Cape Mountain Zebra,647



Longitude/Latitude summary:


Unnamed: 0,longitude,latitude
count,21942.0,21942.0
mean,29.310522,-26.892062
std,3.440205,3.410924
min,17.948736,-34.748293
25%,27.105536,-28.30241
50%,31.304011,-25.279289
75%,31.719606,-24.57357
max,32.924815,-22.039419



Date coverage:
  min: 2020-10-15 00:00:00+00:00
  max: 2025-10-15 00:00:00+00:00
  missing dates: 0

Missingness (% by column):


Unnamed: 0,0
taxon.id,0.0
taxon.name,0.0
taxon.preferred_common_name,0.0
longitude,0.0
latitude,0.0
observed_on_details.date,0.0
observed_on_details.hour,0.0
place_ids,0.0



Potential duplicate rows on ['taxon.id', 'observed_on_details.date', 'observed_on_details.hour', 'longitude', 'latitude']: 625


In [None]:
animals_df = animals_df.drop_duplicates(subset=dupe_cols)
print("Shape after dropping duplicates:", animals_df.shape)

Shape after dropping duplicates: (21317, 8)


In [None]:
animals_df

Unnamed: 0,taxon.id,taxon.name,taxon.preferred_common_name,longitude,latitude,observed_on_details.date,observed_on_details.hour,place_ids
0,41964,Panthera leo,Lion,32.200070,-27.734227,2025-10-13,9,"[6986, 13313, 48462, 59647, 91708, 97392, 1081..."
1,557401,Panthera leo melanochaita,Southern Lion,31.966613,-24.480334,2025-10-15,6,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
2,557401,Panthera leo melanochaita,Southern Lion,31.900091,-24.572784,2025-10-15,5,"[6986, 7478, 48525, 57957, 59647, 69020, 91708..."
3,557401,Panthera leo melanochaita,Southern Lion,31.579412,-24.875755,2025-08-15,7,"[6986, 7478, 48525, 57957, 59647, 91708, 97392..."
4,557401,Panthera leo melanochaita,Southern Lion,31.908049,-25.278137,2025-08-07,17,"[6986, 7478, 48532, 57957, 59647, 69020, 91708..."
...,...,...,...,...,...,...,...,...
21937,132668,Equus zebra zebra,Cape Mountain Zebra,22.510217,-32.332507,2020-11-13,18,"[6986, 6987, 48630, 59647, 69023, 91708, 97392..."
21938,132668,Equus zebra zebra,Cape Mountain Zebra,22.505989,-32.331872,2020-11-13,16,"[6986, 6987, 48630, 59647, 69023, 91708, 97392..."
21939,132668,Equus zebra zebra,Cape Mountain Zebra,18.436840,-34.250275,2020-11-22,9,"[6986, 6987, 48659, 52355, 59647, 71668, 91708..."
21940,132668,Equus zebra zebra,Cape Mountain Zebra,22.355007,-32.263832,2020-10-31,14,"[6986, 6987, 48630, 59647, 69023, 91708, 97392..."


Save to a csv

In [None]:
animals_df.to_csv("animals.csv", index=False)
print("Saved animals.csv with", len(animals_df), "rows.")

Saved animals.csv with 21317 rows.
