In [1]:
import sys
from collections import namedtuple

In [7]:
from typing import List

In [8]:
NameLine = namedtuple("NameLine", ["name", "sex", "count"])

In [20]:
def name_lines_for_year(year: int) -> List[NameLine]:
    if not (1880 <= year <= 2019):
        raise ValueError("Year out of range.")
    with open(f"./data/yob{year}.txt", "r") as f:
        lines: List[str] = f.readlines()
    name_lines = [NameLine(*line.strip().split(",")) for line in lines]
    return name_lines

In [21]:
x = name_lines_for_year(1990)

In [28]:
x[:10]

[NameLine(name='Jessica', sex='F', count='46481'),
 NameLine(name='Ashley', sex='F', count='45561'),
 NameLine(name='Brittany', sex='F', count='36538'),
 NameLine(name='Amanda', sex='F', count='34410'),
 NameLine(name='Samantha', sex='F', count='25865'),
 NameLine(name='Sarah', sex='F', count='25818'),
 NameLine(name='Stephanie', sex='F', count='24861'),
 NameLine(name='Jennifer', sex='F', count='22230'),
 NameLine(name='Elizabeth', sex='F', count='20748'),
 NameLine(name='Lauren', sex='F', count='20507')]

In [29]:
last_fourty_years = list(range(2020-40, 2020))

In [31]:
name_lines_fourty_years = [ nl for year in last_fourty_years for nl in name_lines_for_year(year)]

In [33]:
name_lines_fourty_years[:10]

[NameLine(name='Jennifer', sex='F', count='58379'),
 NameLine(name='Amanda', sex='F', count='35819'),
 NameLine(name='Jessica', sex='F', count='33923'),
 NameLine(name='Melissa', sex='F', count='31641'),
 NameLine(name='Sarah', sex='F', count='25756'),
 NameLine(name='Heather', sex='F', count='19974'),
 NameLine(name='Nicole', sex='F', count='19919'),
 NameLine(name='Amy', sex='F', count='19835'),
 NameLine(name='Elizabeth', sex='F', count='19526'),
 NameLine(name='Michelle', sex='F', count='19124')]

In [34]:
len(name_lines_fourty_years)

1125487

In [35]:
len([name for name in name_lines_fourty_years if name.sex == 'F'])

668291

In [36]:
female_names = {} # {"name" : {year: count, year: count}}
for year in last_fourty_years:
    for current in name_lines_for_year(year):
        if current.sex != "F":
            continue
        name = current.name
        to_update = female_names.get(name, {})
        to_update[year] = current.count
        female_names[name] = to_update

In [41]:
list(female_names.keys())[:10]

['Jennifer',
 'Amanda',
 'Jessica',
 'Melissa',
 'Sarah',
 'Heather',
 'Nicole',
 'Amy',
 'Elizabeth',
 'Michelle']

In [45]:
female_totals = {name: sum(map(int, x.values())) for name, x in female_names.items()}

In [46]:
list(female_totals.items())[:10]

[('Jennifer', 667626),
 ('Amanda', 618448),
 ('Jessica', 881437),
 ('Melissa', 354767),
 ('Sarah', 667888),
 ('Heather', 280406),
 ('Nicole', 423417),
 ('Amy', 250319),
 ('Elizabeth', 598873),
 ('Michelle', 301380)]

In [50]:
top_names = sorted([ (count,name) for name, count in female_totals.items()], reverse=True)

In [51]:
top_names[:10]

[(881437, 'Jessica'),
 (824206, 'Ashley'),
 (710157, 'Emily'),
 (667888, 'Sarah'),
 (667626, 'Jennifer'),
 (618448, 'Amanda'),
 (598873, 'Elizabeth'),
 (536269, 'Samantha'),
 (444541, 'Emma'),
 (433746, 'Stephanie')]

In [52]:
with open("top2000f-last40.txt", "w") as f:
    for count, name in top_names[:2000]:
        f.write(f"{name}\n")
        