### The `known()` function receives a name and returns the gender and country in which that name is common

# Install Laiberry

In [1]:
!pip install Faker
!pip install names-dataset



In [2]:
from faker import Faker
from names_dataset import NameDataset
import numpy as np
import pandas as pd

# Load Data

In [3]:
en_fake = Faker()
fa_fake = Faker('fa')

In [4]:
nd = NameDataset()

In [5]:
def make_name():
    return np.random.choice(
        [en_fake.name(), fa_fake.name()],
         p=[0.9, 0.1]
    )

In [6]:
df = pd.DataFrame({
    'fullName' : [make_name() for _ in range(10)]
})

In [7]:
df.head(3)

Unnamed: 0,fullName
0,Natasha Montgomery
1,Larry Taylor MD
2,Dustin Keith


# Extract First name and Last name

In [8]:
df['firstName'] = df['fullName'].apply(
    lambda full_name: full_name.split()[0]
)

In [9]:
df['lastName'] = df['fullName'].apply(
    lambda full_name: full_name.split()[-1]
)

In [10]:
df.head(3)

Unnamed: 0,fullName,firstName,lastName
0,Natasha Montgomery,Natasha,Montgomery
1,Larry Taylor MD,Larry,MD
2,Dustin Keith,Dustin,Keith


# Predict Gender

In [11]:
def get_gender(name):
    info = nd.search(name)['first_name']
    if info is None:
        return None
    info = info.get('gender')
    return max(info, key=info.get)

In [12]:
df['gender'] = df['firstName'].apply(get_gender)

In [13]:
def get_gender_probability(name):
    info = nd.search(name)['first_name']
    if info is None:
        return None
    info = info['gender'].values()
    return max(info)

In [14]:
df['gender_probability'] = df['firstName'].apply(get_gender_probability)

In [15]:
df.head()

Unnamed: 0,fullName,firstName,lastName,gender,gender_probability
0,Natasha Montgomery,Natasha,Montgomery,Female,0.989
1,Larry Taylor MD,Larry,MD,Male,0.977
2,Dustin Keith,Dustin,Keith,Male,0.994
3,Rebecca Turner,Rebecca,Turner,Female,0.993
4,Mrs. Ann Roy MD,Mrs.,MD,,


# Predict Country

In [16]:
def get_country(name):
    if not isinstance(name, str):
        return None
    info = nd.search(name)['first_name']
    if info is None:
        return None
    return max(info['country'], key=info['country'].get)

In [17]:
df['country'] = df['firstName'].apply(get_country)

In [18]:
def get_country_probability(name):
    info = nd.search(name)['first_name']
    if info is None:
        return None
    info = info['country'].values()
    return max(info)

In [19]:
df['country_probability'] = df['firstName'].apply(get_country_probability)

In [20]:
df.head()

Unnamed: 0,fullName,firstName,lastName,gender,gender_probability,country,country_probability
0,Natasha Montgomery,Natasha,Montgomery,Female,0.989,United States,0.231
1,Larry Taylor MD,Larry,MD,Male,0.977,United States,0.756
2,Dustin Keith,Dustin,Keith,Male,0.994,United States,0.75
3,Rebecca Turner,Rebecca,Turner,Female,0.993,United States,0.327
4,Mrs. Ann Roy MD,Mrs.,MD,,,,
