In [1]:
!pip install Faker 
!pip install names-dataset



In [2]:
from faker import Faker

In [37]:
en_fake = Faker()
fa_fake = Faker('fa')

In [39]:
from names_dataset import NameDataset
nd = NameDataset()

In [46]:
nd.search('behzad')['first_name']['gender']

{'Female': 0.022, 'Male': 0.978}

# let's Generate some Fake Data!!

In [47]:
import numpy as np 
import pandas as pd 

In [105]:
def make_name():
    ''' 
    This function generate an Iranian name in 50% of the times and
    an English name in the other 50% of the times.
    '''
    if np.random.rand() > .5:
        return en_fake.name()
    return fa_fake.name()

In [106]:
name = [make_name() for _ in range(100)]

In [107]:
df = pd.DataFrame({
    'name' : name
})

In [108]:
df

Unnamed: 0,name
0,زينب تحسینی
1,Robert Palmer
2,جناب آقای آرمین تنزیلی
3,Christopher Jenkins
4,اميرمحمد اشتری
...,...
95,محمدجواد نعمتی
96,Christopher Moore
97,Kristina Higgins
98,سرکار خانم نيايش سماوات


# extracting first name and last name

In [117]:
full_name = fa_fake.name()
full_name

'الينا شاکری'

In [121]:
f_name = full_name.split()[0]
f_name

'الينا'

In [124]:
df['first_name'] = df['name'].apply(lambda full_name: full_name.split()[0])
df['last_name'] = df['name'].apply(lambda full_name: full_name.split()[-1])

In [127]:
df

Unnamed: 0,name,last_name,first_name
0,زينب تحسینی,تحسینی,زينب
1,Robert Palmer,Palmer,Robert
2,جناب آقای آرمین تنزیلی,تنزیلی,جناب
3,Christopher Jenkins,Jenkins,Christopher
4,اميرمحمد اشتری,اشتری,اميرمحمد
...,...,...,...
95,محمدجواد نعمتی,نعمتی,محمدجواد
96,Christopher Moore,Moore,Christopher
97,Kristina Higgins,Higgins,Kristina
98,سرکار خانم نيايش سماوات,سماوات,سرکار


# Predict Gender

In [224]:
def name_to_gender(first_name):
    info = nd.search(first_name)['first_name']
    if info is None:
        return 
    return max(info['gender'], key=info['gender'].get)

In [225]:
name_to_gender('شقایق')

'Female'

In [226]:
name_to_gender('behzad')

'Male'

In [227]:
#for unknown names
name_to_gender('berpsogmrwpovf')

### let's bring this ability to our DataFrame

In [228]:
df['gender'] = df['first_name'].apply(lambda fn: name_to_gender(fn))

In [229]:
df

Unnamed: 0,name,last_name,first_name,gender
0,زينب تحسینی,تحسینی,زينب,Female
1,Robert Palmer,Palmer,Robert,Male
2,جناب آقای آرمین تنزیلی,تنزیلی,جناب,Male
3,Christopher Jenkins,Jenkins,Christopher,Male
4,اميرمحمد اشتری,اشتری,اميرمحمد,Male
...,...,...,...,...
95,محمدجواد نعمتی,نعمتی,محمدجواد,Male
96,Christopher Moore,Moore,Christopher,Male
97,Kristina Higgins,Higgins,Kristina,Female
98,سرکار خانم نيايش سماوات,سماوات,سرکار,


# Gender probability

In [265]:
def geder_prob(first_name):
    '''
    cheking the Probability of estimation! 
    '''
    info = nd.search(first_name)['first_name']
    if info is None: 
        return
    return max(info['gender'].values())
    

In [273]:
geder_prob('ali')

0.942

In [274]:
geder_prob('نیلوفر')

0.866

### apply it to our DataFrame...

In [275]:
df['Gender Probability'] = df['first_name'].apply(lambda gp: geder_prob(gp))

In [276]:
df

Unnamed: 0,name,last_name,first_name,gender,Gender Probability
0,زينب تحسینی,تحسینی,زينب,Female,0.923
1,Robert Palmer,Palmer,Robert,Male,0.992
2,جناب آقای آرمین تنزیلی,تنزیلی,جناب,Male,0.942
3,Christopher Jenkins,Jenkins,Christopher,Male,0.991
4,اميرمحمد اشتری,اشتری,اميرمحمد,Male,0.953
...,...,...,...,...,...
95,محمدجواد نعمتی,نعمتی,محمدجواد,Male,0.960
96,Christopher Moore,Moore,Christopher,Male,0.991
97,Kristina Higgins,Higgins,Kristina,Female,0.993
98,سرکار خانم نيايش سماوات,سماوات,سرکار,,


# country prediction

In [289]:
def name_to_country(first_name):
    info = nd.search(first_name)['first_name']
    if info is None:
        return
    return max(info['country'], key=info['country'].get)

In [290]:
name_to_country('nicolas')

'France'

In [291]:
name_to_country('Christopher')

'United States'

In [292]:
df['country'] = df['first_name'].apply(lambda co: name_to_country(co))

In [293]:
df

Unnamed: 0,name,last_name,first_name,gender,Gender Probability,country
0,زينب تحسینی,تحسینی,زينب,Female,0.923,Iraq
1,Robert Palmer,Palmer,Robert,Male,0.992,United States
2,جناب آقای آرمین تنزیلی,تنزیلی,جناب,Male,0.942,Egypt
3,Christopher Jenkins,Jenkins,Christopher,Male,0.991,United States
4,اميرمحمد اشتری,اشتری,اميرمحمد,Male,0.953,Sudan
...,...,...,...,...,...,...
95,محمدجواد نعمتی,نعمتی,محمدجواد,Male,0.960,"Iran, Islamic Republic of"
96,Christopher Moore,Moore,Christopher,Male,0.991,United States
97,Kristina Higgins,Higgins,Kristina,Female,0.993,Russian Federation
98,سرکار خانم نيايش سماوات,سماوات,سرکار,,,


## Country Probability

In [310]:
def Country_Probability(first_name):
    info = nd.search(first_name)['first_name']
    if info is None:
        return
    return max(info['country'].values())

In [320]:
Country_Probability('behzad')

0.787

In [321]:
Country_Probability('behnaz')

0.672

In [322]:
df['Country Probability'] = df['first_name'].apply(lambda cp: Country_Probability(cp))

In [323]:
df

Unnamed: 0,name,last_name,first_name,gender,Gender Probability,country,Country Probability
0,زينب تحسینی,تحسینی,زينب,Female,0.923,Iraq,0.474
1,Robert Palmer,Palmer,Robert,Male,0.992,United States,0.424
2,جناب آقای آرمین تنزیلی,تنزیلی,جناب,Male,0.942,Egypt,0.547
3,Christopher Jenkins,Jenkins,Christopher,Male,0.991,United States,0.444
4,اميرمحمد اشتری,اشتری,اميرمحمد,Male,0.953,Sudan,0.311
...,...,...,...,...,...,...,...
95,محمدجواد نعمتی,نعمتی,محمدجواد,Male,0.960,"Iran, Islamic Republic of",0.546
96,Christopher Moore,Moore,Christopher,Male,0.991,United States,0.444
97,Kristina Higgins,Higgins,Kristina,Female,0.993,Russian Federation,0.264
98,سرکار خانم نيايش سماوات,سماوات,سرکار,,,,


In [327]:
df.head(20)

Unnamed: 0,name,last_name,first_name,gender,Gender Probability,country,Country Probability
0,زينب تحسینی,تحسینی,زينب,Female,0.923,Iraq,0.474
1,Robert Palmer,Palmer,Robert,Male,0.992,United States,0.424
2,جناب آقای آرمین تنزیلی,تنزیلی,جناب,Male,0.942,Egypt,0.547
3,Christopher Jenkins,Jenkins,Christopher,Male,0.991,United States,0.444
4,اميرمحمد اشتری,اشتری,اميرمحمد,Male,0.953,Sudan,0.311
5,جناب آقای دکتر بنیامین هوشیار,هوشیار,جناب,Male,0.942,Egypt,0.547
6,پرهام اشتری,اشتری,پرهام,Male,0.94,"Iran, Islamic Republic of",0.953
7,اسما موحد,موحد,اسما,Female,0.871,Egypt,0.441
8,Michael Dawson,Dawson,Michael,Male,0.992,United States,0.407
9,Kevin Jones,Jones,Kevin,Male,0.992,United States,0.31


# it's done!!