In [1]:
import os
import numpy as np
import pandas as pd

# Load Data

## Load Uploaded Data to classify

In [41]:
# Choose file
filename = "german-500.csv"
path = os.path.join("..","raw_data","test",filename)

In [42]:
path

'../raw_data/test/german-500.csv'

In [47]:
df = pd.read_csv(path, sep=";",encoding='latin-1', names=["name","country","continent"])

In [49]:
df

Unnamed: 0,name,country,continent
0,name,country,continent
1,Marie,Austria,Europe
2,Sophie,Germany,Europe
3,Maximilian,Germany,Europe
4,Maria,Brazil,Americas
...,...,...,...
495,Dennis,,
496,Elijah,,
497,Emanuel,,
498,Ensar,,


In [50]:
df.shape

(500, 3)

## Load Name Database

In [51]:
def load_data():
    path = os.path.join("..","raw_data","wgnd_2_0_name-gender-code.csv")
    df = pd.read_csv(path)

    # Clean data
    # kick out where gender is not defined
    df = df[df["gender"] !="?"]

    # add country names and regions
    path_cc = os.path.join("..","raw_data","country-code","all-country-codes_continent_subregion.csv")
    cc_all = pd.read_csv(path_cc, sep=",")
    cc_short = cc_all[["alpha-2", "name", "region", "sub-region"]].copy()
    cc_short.rename(columns={"name":"country"}, inplace=True)
    df.rename(columns={"code":"alpha-2" }, inplace=True)

    # merge data and country info
    data = pd.merge(df, cc_short, how="left", on="alpha-2")
    continent_list = data["region"].unique()
    # clean merged data
    data = data[data["alpha-2"] != "??"] #filter out names where there is no country assigned
    # smal letters for country and region
    data["country"] = data["country"].apply(lambda x: x.lower())
    data["region"] = data["region"].apply(lambda x: x.lower())

    return data

In [52]:
data = load_data()

In [53]:
data

Unnamed: 0,name,alpha-2,gender,wgt,country,region,sub-region
0,"""baby""",AU,F,1.0,australia,oceania,Australia and New Zealand
1,'aisyah,AU,F,1.0,australia,oceania,Australia and New Zealand
2,'anela,CA,F,1.0,canada,americas,Northern America
3,'fiyinfoluwa,CA,F,1.0,canada,americas,Northern America
4,'olioni,AU,M,1.0,australia,oceania,Australia and New Zealand
...,...,...,...,...,...,...,...
4577585,힘찬,KR,M,1.0,"korea, republic of",asia,Eastern bu
4577586,凉,CN,F,0.5,china,asia,Eastern Asia
4577587,凉,CN,M,0.5,china,asia,Eastern Asia
4577588,凉峰,CN,M,1.0,china,asia,Eastern Asia


In [54]:
data.sample(5)

Unnamed: 0,name,alpha-2,gender,wgt,country,region,sub-region
3224777,vedandtham,IN,M,1.0,india,asia,Southern Asia
301289,baljibill,IN,M,1.0,india,asia,Southern Asia
1866643,meghshayamar,IN,M,1.0,india,asia,Southern Asia
1664140,li cha,CN,F,0.857143,china,asia,Eastern Asia
3936407,承陆,CN,M,1.0,china,asia,Eastern Asia


# Clean and convert Data

## Convert input to string and lower string

In [55]:
df["name"] = df["name"].astype(str)
df["country"] = df["country"].astype(str)
df["continent"] = df["continent"].astype(str)

In [59]:
df["name"] = df["name"].apply(lambda x: x.lower())
df["country"] = df["country"].apply(lambda x: x.lower())
df["continent"] = df["continent"].apply(lambda x: x.lower())

In [60]:
df

Unnamed: 0,name,country,continent
0,name,country,continent
1,marie,austria,europe
2,sophie,germany,europe
3,maximilian,germany,europe
4,maria,brazil,americas
...,...,...,...
495,dennis,,
496,elijah,,
497,emanuel,,
498,ensar,,


## Create country, continent  list

In [14]:
# Country list
country_list = data["country"].unique()

In [15]:
# Continent List
continents_list = data["region"].unique()

## Shorten Dataframe with input names

In [17]:
# Filter Large Data frame with only names

In [62]:
data = data[data['name'].isin(df["name"])]

In [63]:
data.shape

(23714, 7)

# Test for single name

In [20]:
%%time
# For name and country
df_test = data[(data["name"] == "paul") & (data["country"] == "germany")]
df_test

CPU times: user 15.1 ms, sys: 1.4 ms, total: 16.5 ms
Wall time: 14.1 ms


Unnamed: 0,name,alpha-2,gender,wgt,country,region,sub-region
2269089,paul,DE,M,1.0,germany,europe,Western Europe


In [21]:
%%time

df_test = data[(data["name"] == "paul")]
df_test

CPU times: user 6 ms, sys: 0 ns, total: 6 ms
Wall time: 5.33 ms


Unnamed: 0,name,alpha-2,gender,wgt,country,region,sub-region
2269067,paul,AG,M,1.000000,antigua and barbuda,americas,Latin America and the Caribbean
2269068,paul,AT,M,1.000000,austria,europe,Western Europe
2269069,paul,AU,F,0.000153,australia,oceania,Australia and New Zealand
2269070,paul,AU,M,0.999847,australia,oceania,Australia and New Zealand
2269071,paul,BB,M,1.000000,barbados,americas,Latin America and the Caribbean
...,...,...,...,...,...,...,...
2269152,paul,VU,M,1.000000,vanuatu,oceania,Melanesia
2269153,paul,WS,M,1.000000,samoa,oceania,Polynesia
2269154,paul,ZA,M,1.000000,south africa,africa,Sub-Saharan Africa
2269155,paul,ZM,M,1.000000,zambia,africa,Sub-Saharan Africa


# Main functions

In [22]:
def csv_share_male_female(result):
    """
    Takes a dataframe and calculates the probability of femal and male
    """
    male = 0
    female = 0
    male_p = 0
    female_p = 0
    # Make a list out of the grouped table results
    result_list = []
    try: result_list.append(["M", result["M"] ])
    except: pass
    try: result_list.append(["F", result["F"] ])
    except: pass

    # Calculate percentage results
    for res in result_list:
        if res[0] == "M":
            male = res[1]
        elif res[0] == "F":
            female = res[1]
        male_p = round(male*100/(male+female),2)
        female_p = round(female*100/(female+male),2)
    
    if male_p > female_p:
        gender = "m"
        return gender, male_p
    else:
        gender = "f"
        return gender, female_p


In [65]:
# above works

In [66]:
## Maybe slice the large dataframe with the names that are in the uploaded list befor running stuff
# Should speed it up

## Version with apply

In [23]:
# Add a colum for each checked item

In [24]:
# Check country
def check_country(country):
    return country.lower() in country_list

In [25]:
# Check continent
def check_continent(continent):
    return continent.lower() in continents_list

In [26]:
%%time
df["c_country"] = df["country"].apply(check_country)

CPU times: user 27.5 ms, sys: 1.81 ms, total: 29.3 ms
Wall time: 27.2 ms


In [27]:
%%time
df["c_continent"] = df["continent"].apply(check_continent)

CPU times: user 22.4 ms, sys: 445 µs, total: 22.8 ms
Wall time: 20.5 ms


In [28]:
# combine in one column

In [29]:

def check_all(row):
    res_list = []
    res_list.append(row['c_country'])
    res_list.append(row["c_continent"])
                     
    return res_list

In [30]:
df

Unnamed: 0,name,country,continent,c_country,c_continent
0,name,,,False,False
1,marie,,,False,False
2,sophie,,,False,False
3,maximilian,,,False,False
4,maria,,,False,False
...,...,...,...,...,...
4995,yona,,,False,False
4996,zuhal,,,False,False
4997,yonah,,,False,False
4998,zuri,,,False,False


In [31]:
%%time
df["check_all"] = df.apply(check_all, axis=1)

CPU times: user 39.1 ms, sys: 0 ns, total: 39.1 ms
Wall time: 35.7 ms


In [32]:
df

Unnamed: 0,name,country,continent,c_country,c_continent,check_all
0,name,,,False,False,"[False, False]"
1,marie,,,False,False,"[False, False]"
2,sophie,,,False,False,"[False, False]"
3,maximilian,,,False,False,"[False, False]"
4,maria,,,False,False,"[False, False]"
...,...,...,...,...,...,...
4995,yona,,,False,False,"[False, False]"
4996,zuhal,,,False,False,"[False, False]"
4997,yonah,,,False,False,"[False, False]"
4998,zuri,,,False,False,"[False, False]"


In [33]:
# try it with apply
def csv_predict_from_data(row):
    """
    Create a temporary dataframe with name and gender in differnt countries
    Adds rows with gender and percentage to the input dataframe 
    """

    if row["check_all"][0]:
        print("name and country")
        df_name = data[(data["name"] == row["name"] ) & (data["country"] == row["country"])].groupby("gender")["wgt"].sum()
    elif row["check_all"][1]:
        print("name and continent")
        df_name = data[(data["name"] == row["name"] ) & (data["region"] == row["continent"])].groupby("gender")["wgt"].sum()                                                           
    else:
        print("just name or nothing")
        df_name = data[data["name"] == row["name"].lower() ].groupby("gender")["wgt"].sum()


    if df_name.empty:
        return "No Data", "No Data"
    else:
        gender, perc = share_male_female(df_name)
        return gender, perc

In [34]:
%%time
result = df.apply(csv_predict_from_data, axis=1)

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or nothing
just name or 

In [35]:
def extend_result_df(df, result):
    for index, res in enumerate(result):
        df.loc[index, "gender"] = res[0]
        df.loc[index, "percentage"] = res[1]
    return df

In [36]:

df_new = extend_result_df(df, result)

In [37]:
df_new[["gender"]].value_counts()

gender 
f          2191
m          2174
No Data     635
dtype: int64

## Version with iterrow

In [20]:
# this below works but takes to longn- 1:30 for 500 -> 3min for 1K -> 30min for 10K

In [23]:
# takes to loong 
def iterrows_predict_from_data(df, data):
    """
    Create a temporary dataframe with name and gender in differnt countries
    Adds rows with gender and percentage to the input dataframe 
    """
    # Still takes to long - need to be converted into apply or applymap
    for index, row in df.iterrows():

        if df.loc[index, "name"] != "nan" and df.loc[index, "country"].lower() in country_list :
            print("name and country")
            df_name = data[(data["name"] == df.loc[index, "name"].lower()) & (data["country"] == df.loc[index, "country"])].groupby("gender")["wgt"].sum()
        elif df.loc[index, "name"] != "nan" and df.loc[index, "continent"] in continents_list:
            print("name and continent")
            df_name = data[(data["name"] == df.loc[index,"name"].lower()) & (data["region"] == df.loc[index, "continent"])].groupby("gender")["wgt"].sum()
        elif df.loc[index,"name"] != "nan":
            print("only name")
            df_name = data[data["name"] == df.loc[index, "name"].lower()].groupby("gender")["wgt"].sum()
        else:
            print("no data given")
            df.loc[index,"gender"] = "Name not found"

        if df_name.empty:
            df.loc[index, "gender"] = "No Data"
            df.loc[index, "percentage"] = "No Data"
        else:
            gender, perc = share_male_female(df_name)
            df.loc[index, "gender"] = gender
            df.loc[index, "percentage"] = perc
            
        print(index)

    return  df
    

In [24]:
# Calculate for uploaded Datafram


In [25]:
%%time
df_new = iterrows_predict_from_data(df, data)

only name
0
only name
1
only name
2
only name
3
only name
4
only name
5
only name
6
only name
7
only name
8
only name
9
only name
10
only name
11
only name
12
only name
13
only name
14
only name
15
only name
16
only name
17
only name
18
only name
19
only name
20
only name
21
only name
22
only name
23
only name
24
only name
25
only name
26
only name
27
only name
28
only name
29
only name
30
only name
31
only name
32
only name
33
only name
34
only name
35
only name
36
only name
37
only name
38
only name
39
only name
40
only name
41
only name
42
only name
43
only name
44
only name
45
only name
46
only name
47
only name
48
only name
49
only name
50
only name
51
only name
52
only name
53
only name
54
only name
55
only name
56
only name
57
only name
58
only name
59
only name
60
only name
61
only name
62
only name
63
only name
64
only name
65
only name
66
only name
67
only name
68
only name
69
only name
70
only name
71
only name
72
only name
73
only name
74
only name
75
only name
76
only name

636
only name
637
only name
638
only name
639
only name
640
only name
641
only name
642
only name
643
only name
644
only name
645
only name
646
only name
647
only name
648
only name
649
only name
650
only name
651
only name
652
only name
653
only name
654
only name
655
only name
656
only name
657
only name
658
only name
659
only name
660
only name
661
only name
662
only name
663
only name
664
only name
665
only name
666
only name
667
only name
668
only name
669
only name
670
only name
671
only name
672
only name
673
only name
674
only name
675
only name
676
only name
677
only name
678
only name
679
only name
680
only name
681
only name
682
only name
683
only name
684
only name
685
only name
686
only name
687
only name
688
only name
689
only name
690
only name
691
only name
692
only name
693
only name
694
only name
695
only name
696
only name
697
only name
698
only name
699
only name
700
only name
701
only name
702
only name
703
only name
704
only name
705
only name
706
only name
707
on

1327
only name
1328
only name
1329
only name
1330
only name
1331
only name
1332
only name
1333
only name
1334
only name
1335
only name
1336
only name
1337
only name
1338
only name
1339
only name
1340
only name
1341
only name
1342
only name
1343
only name
1344
only name
1345
only name
1346
only name
1347
only name
1348
only name
1349
only name
1350
only name
1351
only name
1352
only name
1353
only name
1354
only name
1355
only name
1356
only name
1357
only name
1358
only name
1359
only name
1360
only name
1361
only name
1362
only name
1363
only name
1364
only name
1365
only name
1366
only name
1367
only name
1368
only name
1369
only name
1370
only name
1371
only name
1372
only name
1373
only name
1374
only name
1375
only name
1376
only name
1377
only name
1378
only name
1379
only name
1380
only name
1381
only name
1382
only name
1383
only name
1384
only name
1385
only name
1386
only name
1387
only name
1388
only name
1389
only name
1390
only name
1391
only name
1392
only name
1393
only 

1902
only name
1903
only name
1904
only name
1905
only name
1906
only name
1907
only name
1908
only name
1909
only name
1910
only name
1911
only name
1912
only name
1913
only name
1914
only name
1915
only name
1916
only name
1917
only name
1918
only name
1919
only name
1920
only name
1921
only name
1922
only name
1923
only name
1924
only name
1925
only name
1926
only name
1927
only name
1928
only name
1929
only name
1930
only name
1931
only name
1932
only name
1933
only name
1934
only name
1935
only name
1936
only name
1937
only name
1938
only name
1939
only name
1940
only name
1941
only name
1942
only name
1943
only name
1944
only name
1945
only name
1946
only name
1947
only name
1948
only name
1949
only name
1950
only name
1951
only name
1952
only name
1953
only name
1954
only name
1955
only name
1956
only name
1957
only name
1958
only name
1959
only name
1960
only name
1961
only name
1962
only name
1963
only name
1964
only name
1965
only name
1966
only name
1967
only name
1968
only 

2450
only name
2451
only name
2452
only name
2453
only name
2454
only name
2455
only name
2456
only name
2457
only name
2458
only name
2459
only name
2460
only name
2461
only name
2462
only name
2463
only name
2464
only name
2465
only name
2466
only name
2467
only name
2468
only name
2469
only name
2470
only name
2471
only name
2472
only name
2473
only name
2474
only name
2475
only name
2476
only name
2477
only name
2478
only name
2479
only name
2480
only name
2481
only name
2482
only name
2483
only name
2484
only name
2485
only name
2486
only name
2487
only name
2488
only name
2489
only name
2490
only name
2491
only name
2492
only name
2493
only name
2494
only name
2495
only name
2496
only name
2497
only name
2498
only name
2499
only name
2500
only name
2501
only name
2502
only name
2503
only name
2504
only name
2505
only name
2506
only name
2507
only name
2508
only name
2509
only name
2510
only name
2511
only name
2512
only name
2513
only name
2514
only name
2515
only name
2516
only 

3036
only name
3037
only name
3038
only name
3039
only name
3040
only name
3041
only name
3042
only name
3043
only name
3044
only name
3045
only name
3046
only name
3047
only name
3048
only name
3049
only name
3050
only name
3051
only name
3052
only name
3053
only name
3054
only name
3055
only name
3056
only name
3057
only name
3058
only name
3059
only name
3060
only name
3061
only name
3062
only name
3063
only name
3064
only name
3065
only name
3066
only name
3067
only name
3068
only name
3069
only name
3070
only name
3071
only name
3072
only name
3073
only name
3074
only name
3075
only name
3076
only name
3077
only name
3078
only name
3079
only name
3080
only name
3081
only name
3082
only name
3083
only name
3084
only name
3085
only name
3086
only name
3087
only name
3088
only name
3089
only name
3090
only name
3091
only name
3092
only name
3093
only name
3094
only name
3095
only name
3096
only name
3097
only name
3098
only name
3099
only name
3100
only name
3101
only name
3102
only 

3625
only name
3626
only name
3627
only name
3628
only name
3629
only name
3630
only name
3631
only name
3632
only name
3633
only name
3634
only name
3635
only name
3636
only name
3637
only name
3638
only name
3639
only name
3640
only name
3641
only name
3642
only name
3643
only name
3644
only name
3645
only name
3646
only name
3647
only name
3648
only name
3649
only name
3650
only name
3651
only name
3652
only name
3653
only name
3654
only name
3655
only name
3656
only name
3657
only name
3658
only name
3659
only name
3660
only name
3661
only name
3662
only name
3663
only name
3664
only name
3665
only name
3666
only name
3667
only name
3668
only name
3669
only name
3670
only name
3671
only name
3672
only name
3673
only name
3674
only name
3675
only name
3676
only name
3677
only name
3678
only name
3679
only name
3680
only name
3681
only name
3682
only name
3683
only name
3684
only name
3685
only name
3686
only name
3687
only name
3688
only name
3689
only name
3690
only name
3691
only 

4229
only name
4230
only name
4231
only name
4232
only name
4233
only name
4234
only name
4235
only name
4236
only name
4237
only name
4238
only name
4239
only name
4240
only name
4241
only name
4242
only name
4243
only name
4244
only name
4245
only name
4246
only name
4247
only name
4248
only name
4249
only name
4250
only name
4251
only name
4252
only name
4253
only name
4254
only name
4255
only name
4256
only name
4257
only name
4258
only name
4259
only name
4260
only name
4261
only name
4262
only name
4263
only name
4264
only name
4265
only name
4266
only name
4267
only name
4268
only name
4269
only name
4270
only name
4271
only name
4272
only name
4273
only name
4274
only name
4275
only name
4276
only name
4277
only name
4278
only name
4279
only name
4280
only name
4281
only name
4282
only name
4283
only name
4284
only name
4285
only name
4286
only name
4287
only name
4288
only name
4289
only name
4290
only name
4291
only name
4292
only name
4293
only name
4294
only name
4295
only 

4831
only name
4832
only name
4833
only name
4834
only name
4835
only name
4836
only name
4837
only name
4838
only name
4839
only name
4840
only name
4841
only name
4842
only name
4843
only name
4844
only name
4845
only name
4846
only name
4847
only name
4848
only name
4849
only name
4850
only name
4851
only name
4852
only name
4853
only name
4854
only name
4855
only name
4856
only name
4857
only name
4858
only name
4859
only name
4860
only name
4861
only name
4862
only name
4863
only name
4864
only name
4865
only name
4866
only name
4867
only name
4868
only name
4869
only name
4870
only name
4871
only name
4872
only name
4873
only name
4874
only name
4875
only name
4876
only name
4877
only name
4878
only name
4879
only name
4880
only name
4881
only name
4882
only name
4883
only name
4884
only name
4885
only name
4886
only name
4887
only name
4888
only name
4889
only name
4890
only name
4891
only name
4892
only name
4893
only name
4894
only name
4895
only name
4896
only name
4897
only 

In [29]:
df_new["gender"].value_counts()

m          21489
f          21472
No Data     7627
Name: gender, dtype: int64

# Save Result


In [30]:
# Choose file
path_save = os.path.join("..","raw_data","test",f"genderized-{filename}")
path_save

'../raw_data/test/genderized-german-50k.csv'

In [31]:
df_new.to_csv(path_save, index=False)