# Unique Names
Finding all of the unique names, unique female names, unique male names, and unisex names for various analyses

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("SSANameData.txt", index_col = 0)
data = data.drop('index', axis = 1)
data.head()

Unnamed: 0,name,sex,total_count,1880,1881,1882,1883,1884,1885,1886,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,mary,f,4139160,7065,6919,8148,8012,9217,9128,9889,...,2642,2526,2424,2369,2239,2229,2100,2116,2081,2196
1,anna,f,912251,2604,2698,3143,3306,3860,3994,4283,...,5149,4805,4574,4197,3845,3507,3078,2958,2931,2700
2,emma,f,763546,2003,2034,2303,2367,2587,2728,2764,...,20485,19548,19870,18818,17221,15714,15543,14498,13579,13485
3,elizabeth,f,1681878,1939,1852,2186,2255,2549,2582,2680,...,9766,9602,8999,8595,7920,7299,7261,7014,6600,6878
4,minnie,f,159224,1746,1653,2004,2035,2243,2178,2372,...,68,66,67,67,74,71,63,55,50,62


## Methods

In [3]:
import functions as fun

In [4]:
def count_nonzero_columns(year_columns):
    year_columns_filtered = (lambda x : x > 0 for x in year_columns)
    return len(year_columns_filtered)

## Sorting Names

In [5]:
f_names = set(data[data['sex'] == 'f']['name'].tolist())
m_names = set(data[data['sex'] == 'm']['name'].tolist())
unique_f = f_names.difference(m_names)
unique_m = m_names.difference(f_names)
unisex_names = f_names.intersection(m_names)

In [6]:
most_feminine = data[data['name'].isin(unique_f)].copy()
most_masculine = data[data['name'].isin(unique_m)].copy()
most_feminine['years'] = (most_feminine.iloc[:,3:148] > 0).sum(axis = 1)
most_masculine['years'] = (most_feminine.iloc[:,3:148] > 0).sum(axis = 1)
most_feminine = most_feminine.drop(most_feminine.columns[3:148], axis = 1)
most_masculine = most_masculine.drop(most_masculine.columns[3:148], axis = 1)
most_feminine = most_feminine.sort_values(by = 'total_count', ascending = False).reset_index()
most_masculine = most_masculine.sort_values(by = 'total_count', ascending = False).reset_index()
most_feminine

Unnamed: 0,index,name,sex,total_count,years
0,553,delilah,f,62507,145
1,5821,athena,f,50614,115
2,4279,alina,f,43173,91
3,197,helena,f,39561,145
4,30544,ayla,f,34866,48
...,...,...,...,...,...
59910,28773,pamlyn,f,5,1
59911,89699,semayah,f,5,1
59912,89700,senaiya,f,5,1
59913,89701,sensi,f,5,1


In [8]:
unisex = data[data['name'].isin(unisex_names)].copy()
unisex['years'] = (unisex.iloc[:,3:148] > 0).sum(axis = 1)
unisex['female_sum'] = unisex[unisex['sex'] == 'f']['total_count']
unisex['female_sum'] = unisex['female_sum'].fillna(0)
unisex['male_sum'] = unisex[unisex['sex'] == 'm']['total_count']
unisex['male_sum'] = unisex['male_sum'].fillna(0)
unisex = unisex.drop(unisex.columns[3:148], axis = 1)
unisex = unisex.drop('sex', axis = 1)
unisex[['total_count', 'female_sum', 'male_sum', 'years']] = unisex.groupby(['name'])[['total_count', 'female_sum', 'male_sum', 'years']].transform('sum')
unisex['per_diff'] = unisex.apply(lambda x : fun.calculate_percent_diff(x['female_sum'], x['male_sum']), axis = 1)
unisex = unisex.drop_duplicates(['name'])
unisex[['female_sum', 'male_sum']] = unisex[['female_sum', 'male_sum']].astype('int')
unisex = unisex.reset_index()


In [9]:
unisex[unisex['per_diff'] > 150]

Unnamed: 0,index,name,total_count,years,female_sum,male_sum,per_diff
0,0,mary,4154332,277,4139160,15172,198.539163
1,1,anna,915007,283,912251,2756,198.795200
2,2,emma,765264,266,763546,1718,199.102009
3,3,elizabeth,1687089,286,1681878,5211,198.764499
4,4,minnie,160011,218,159224,787,198.032635
...,...,...,...,...,...,...,...
11688,112735,keyoir,52,4,46,6,153.846154
11694,113093,cyxx,48,4,5,43,158.333333
11706,113492,kailoh,49,4,5,44,159.183673
11712,113944,rumani,211,3,200,11,179.146919


In [None]:
## TODO
# Write names to file to save for later

## Biggest Rank Jumps
Finding the male and female names that jumped the most ranks in a single year

In [10]:
%%time
results = {}
for n in (unique_m):
    results[n] = fun.biggest_rank_jump(n, 'm', 1880, 2025)
sorted_results_m = {key: value for key, value in sorted(results.items(), key=lambda item: abs(item[1][0]), reverse = True)}

results = {}
for n in (unique_f):
    results[n] = fun.biggest_rank_jump(n, 'f', 1880, 2025)
sorted_results_f = {key: value for key, value in sorted(results.items(), key=lambda item: abs(item[1][0]), reverse = True)}

KeyboardInterrupt: 

In [None]:
## TODO
# Interpret results
# Write to file?