# Observer names
Some names, particularly for tour guides are long.
This notebook looks at ways to shorten them.

## History

2025-05-03 Initial version

In [None]:
from django.db.models.functions import Length
from django.template.loader import render_to_string

from IPython.display import display, HTML

from data.models import Observer

In [None]:
# Top fifty longest names
longest = Observer.objects.annotate(length=Length('name')).order_by("-length")[:50]

table = render_to_string("notebooks/observer-names.html", {"observers": longest})
display(HTML(table))

In [None]:
# Names containing punctuation
punctuated = Observer.objects.filter(name__regex=r"(--|\||\\|/)")

table = render_to_string("notebooks/observer-names.html", {"observers": punctuated})
display(HTML(table))

In [None]:
# Names containing double spaces
spaces = Observer.objects.filter(name__contains="  ")

table = render_to_string("notebooks/observer-names.html", {"observers": spaces})
display(HTML(table))

In [None]:
# Shorten names
import logging

logger = logging.getLogger(__name__)

# Normally there is a separator, |, -- between the name of the 
# observer and any commercial name. In case the name is only the
# commercial name, check for the following keywords so the name
# can be reviewed and manually changed.
keywords = ["Tours", "Guide", "www", ".com"]


def remove_adverts(name):
    for separator in ["|", "--"]:
        if separator in name:
            name = name.split(separator, 1)[0].strip()
    return name


def remove_extra_spaces(name):
    if "  " in name:
        name = name.replace("  ", " ")
    return name

    
def flag_keywords(name):
    for keyword in keywords:
        if keyword in name:
            logger.warning("Review observer name: %s", name)
            

def generate_byname(name) -> str:
    cleaned = remove_adverts(name)
    cleaned = remove_extra_spaces(cleaned)

    flag_keywords(cleaned)

    return cleaned if cleaned != name else ""


with open('results/observers-original.txt', 'a') as fpo:
    with open('results/observers-cleaned.txt', 'a') as fpc:
        
        for observer in Observer.objects.all():
            observer.byname = generate_byname(observer.name)
        
            fpo.write("%s\n" % observer.name)
            fpc.write("%s\n" % observer.original)

print("Observer bynames generated")