In [2]:
import pandas as pd

In [25]:
forenames = pd.read_csv("../assets/common-forenames-by-country.csv")
surnames = pd.read_csv("../assets/common-surnames-by-country.csv")

In [20]:
forenames[forenames.Country.isin(["JP","KR","US","CN","TW","GB"])][["Country", "Gender", "Romanized Name"]]

Unnamed: 0,Country,Gender,Romanized Name
745,GB,F,Olivia
746,GB,F,Amelia
747,GB,F,Isla
748,GB,F,Ava
749,GB,F,Ivy
...,...,...,...
2475,JP,F,Yua
2476,JP,F,Mei
2477,JP,F,Riko
2478,JP,F,Sakura


In [23]:
forenames[~forenames.Country.isin(["JP","KR","US","CN","TW","GB"])].sample(100)[["Country", "Gender", "Romanized Name"]]

Unnamed: 0,Country,Gender,Romanized Name
2331,ME,F,Ljiljana
1054,IM,F,Isabella
1617,NZ,F,Amelia
146,AU,F,Amelia
1784,PT,F,Leonor
...,...,...,...
1723,PK,F,Fozia
2122,ZA,F,Iminathi
154,AU,F,Willow
1772,PR,M,Liam


In [30]:
from dataclasses import dataclass
from typing import Literal
from random import sample, choice

@dataclass
class NamePair:
    first_name: str
    last_name: str
    origin: str
    
TAG_TRANSL = {
    "Oriental": ["JP","KR","CN","TW"],
    "Western": ["US","GB"],
}

class NameGenerator:
    def __init__(self, forenames: pd.DataFrame, surnames: pd.DataFrame):
        self.forenames = forenames
        self.surnames = surnames
    
    def generate_name(self, gender: Literal["m", "f"], tag: str) -> NamePair:
        if possibilities := TAG_TRANSL.get(tag):
            origin = choice(possibilities)
        else:
            origin = tag
        
        first_name_candidates = self.forenames[
            (self.forenames.Country == origin) & (self.forenames.Gender == gender.upper())
        ]["Romanized Name"].tolist()
        
        last_name_candidates = self.surnames[
            self.surnames.Country == origin
        ]["Romanized Name"].tolist()
        
        return NamePair(
            first_name=choice(first_name_candidates),
            last_name=choice(last_name_candidates),
            origin=origin
        )

In [31]:
gen = NameGenerator(
    forenames[forenames.Country.isin(["JP","KR","US","CN","TW","GB"])][["Country", "Gender", "Romanized Name"]],
    surnames[surnames.Country.isin(["JP","KR","US","CN","TW","GB"])][["Country", "Romanized Name"]]
)

In [37]:
gen.generate_name("m", "Oriental")

NamePair(first_name='Wen-Hsiung', last_name='Liu', origin='TW')

In [38]:
import pickle as pkl

In [39]:
with open("../assets/name_generator.pkl", "wb") as f:
    pkl.dump(gen, f)