# Locations

Some basic statistics on Locations.

## History

2025-07-13 Initial version with statistics on location names

In [None]:
from django.db import connection
from django.db.models.functions import Length
from django.template.loader import render_to_string

from IPython.display import display, HTML

import matplotlib.pyplot as plt

from ebird.api.data.models import Checklist, Location

In [None]:
# Location type - private versus hotspots

# Total number of locations
total = Location.objects.all().count()
# Total number of hotspots
hotspots = Location.objects.filter(hotspot=True).count()
# Total number of private/personal locations
private = Location.objects.filter(hotspot=False).count()

print("Total", total)
print("Hotspots", hotspots)
print("Private", private)

labels = 'Hotspots', 'Private'
sizes = [hotspots, private]

fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.f%%');

In [None]:
# A breakdown of locations type by checklist protocol

labels = [
    "Incidental (hotspot)", "Incidental (private)", 
    "Stationary (hotspot)", "Stationary (private)", 
    "Travelling (hotspot)", "Travelling (private)"
]

values = [
    Checklist.objects.filter(location__hotspot=True, protocol_code="P20").distinct("location_id").count(),
    Checklist.objects.filter(location__hotspot=False, protocol_code="P20").distinct("location_id").count(),
    Checklist.objects.filter(location__hotspot=True, protocol_code="P21").distinct("location_id").count(),
    Checklist.objects.filter(location__hotspot=False, protocol_code="P21").distinct("location_id").count(),
    Checklist.objects.filter(location__hotspot=True, protocol_code="P22").distinct("location_id").count(),
    Checklist.objects.filter(location__hotspot=False, protocol_code="P22").distinct("location_id").count(),
]

for label, value in zip(labels, values):
    print("{:22}: {}".format(label, value))

fig, ax = plt.subplots()
ax.pie(values, labels=labels, autopct='%1.f%%');

In [None]:
# Find the twenty shortest names
shortest = Location.objects.annotate(length=Length('original')).order_by("length")[:20]

table = render_to_string("location_basics/names-table.html", {"locations": shortest})
display(HTML(table))

In [None]:
# Find the twenty longest names
longest = Location.objects.annotate(length=Length('original')).order_by("-length")[:20]

table = render_to_string("location_basics/names-table.html", {"locations": longest})
display(HTML(table))

In [None]:
# What is the distribution of the name lengths
with connection.cursor() as cursor:
    cursor.execute(
        "select count(*), ((length(original) - 1) / 5)::int as quantile from data_location group by quantile"
    )

    result = sorted(cursor.fetchall(), key=lambda t: t[1])

data: list[int] = []
labels: list[str] = []

for count, index in result:
    start = index * 5 + 1
    end = start + 4
    data.append(count)
    labels.append(end)

fig, ax = plt.subplots()
fig.set_figwidth(15)
ax.bar(labels, data, width=4)
ax.set_xlabel("Name length ranges, e.g. 1-5, 6-10, etc.")
ax.set_ylabel("Number of locations")
ax.set_title('Distribution of location name lengths')
plt.show();

In [None]:
# Name lengths as a pie chart.

labels = ["<20", "21-40", "41-60", "61-80", ">80"]

values = [
    Location.objects.annotate(length=Length('name')).filter(length__lte=20).count(),
    Location.objects.annotate(length=Length('name')).filter(length__gt=20, length__lte=40).count(),
    Location.objects.annotate(length=Length('name')).filter(length__gt=40, length__lte=60).count(),
    Location.objects.annotate(length=Length('name')).filter(length__gt=60, length__lte=80).count(),
    Location.objects.annotate(length=Length('name')).filter(length__gt=80).count()
]

for label, value in zip(labels, values):
    print("{:>5}: {}".format(label, value))

fig, ax = plt.subplots()
ax.pie(values, labels=labels, autopct='%1.f%%');