# Import Packages
Below, you'll find what you need in order to run this notebook successfully. I used Pandas and Matplotlib.

In [1]:
import matplotlib.pyplot as plt
import pandas as pd

# Read Data In
I pulled in baby name data from 2010 and 2019; the show premiered in April 2011, so I chose the year before as a comparison to demonstrate that some of the names might not have ever been
on anyone's radar, save people that had read the books. I chose 2019 because the series ended in May of that year, so by then, the names would have become a part of pop culture.

In [2]:
#2010 data
df_2010 = pd.read_csv('assets/yob_2010.csv')
#2019 data
df_2019 = pd.read_csv('assets/yob2019.csv')


# Combine the Two Dataframes and Clean Up
I combined the 2010 and 2019 data into one frame so I could clean up and analyze it more easily.

In [57]:
#I combined the two dataframes into one and merged the "Name" column because there were, for example, more than one instance of the name "Arya" in that column in both files.

#This one works, but then names don't show up anymore
#df_combined_years = pd.merge(df_2010, df_2019, how='inner')

#I think I'm almost there, but it's messing with the gender columns
df_combined_years = df_2010.join(df_2019.set_index('Name'), on='Name', lsuffix='Name')


#change NaN values to zeroes
df_combined_years['2010 Name Count'].fillna(0, inplace = True)
df_combined_years['2019 Name Count'].fillna(0, inplace = True)
df_combined_years


Unnamed: 0,Name,GenderName,2010 Name Count,Gender,2019 Name Count
0,Isabella,F,22925,F,13360.0
0,Isabella,F,22925,M,15.0
1,Sophia,F,20648,F,13769.0
1,Sophia,F,20648,M,16.0
2,Emma,F,17354,F,17176.0
...,...,...,...,...,...
34084,Zymaire,M,5,,0.0
34085,Zyonne,M,5,,0.0
34086,Zyquarius,M,5,,0.0
34087,Zyran,M,5,M,5.0


In [55]:
df_combined_years.loc[(df_combined_years['Name']=='Cersei')]


Unnamed: 0,Name,GenderName,2010 Name Count,Gender,2019 Name Count


# Analyze the Data
First, I used the .loc method to retrieve rows from the dataframe that contained specific _Game of Thrones_ character names. 

In [33]:
#See how many times specific character names show up in the dataframe to compare occurences in year 2010 vs. 2019 (HOW TO SHOW RESULTS EVEN IF ZERO)
df_characters = df_combined_years.loc[
                        (df_combined_years['Name']=='Cersei') |  
                        (df_combined_years['Name']=='Arya') | 
                        (df_combined_years['Name']=='Jon') | 
                        (df_combined_years['Name']=='Khaleesi') |
                        (df_combined_years['Name']=='Catelyn') |
                        (df_combined_years['Name']=='Bran') |
                        (df_combined_years['Name']=='Brienne') |
                        (df_combined_years['Name']=='Daenerys') |
                        (df_combined_years['Name']=='Jaime') |
                        (df_combined_years['Name']=='Joffrey') |
                        (df_combined_years['Name']=='Jorah') |
                        (df_combined_years['Name']=='Margaery') |
                        (df_combined_years['Name']=='Melisandre') |
                        (df_combined_years['Name']=='Robb') |
                        (df_combined_years['Name']=='Samwell') |
                        (df_combined_years['Name']=='Sansa') |
                        (df_combined_years['Name']=='Theon') |
                        (df_combined_years['Name']=='Tyrion')
                         ]
#Sort by Name so that male and female values show up paired together.
df_characters.sort_values('Name')

#How to show all results, even if zeroes
#pd.set_option('display.max_rows', None)
#print(df_characters)




Unnamed: 0,Name,Gender,2010 Name Count,2019 Name Count
940,Arya,F,273,3047
14823,Arya,M,88,49
22704,Bran,M,5,8
6228,Brienne,F,20,33
2122,Catelyn,F,93,15
2262,Jaime,F,85,48
13460,Jaime,M,814,518
13673,Jon,M,439,315
20075,Tyrion,M,11,57
