In [30]:
import pandas as pd

In [31]:
# read the file as a pd.DataFrame object
baby = pd.read_csv('data/babynames20102022.csv')
baby

Unnamed: 0,Name,Sex,Count,Year
0,Olivia,F,16573,2022
1,Emma,F,14435,2022
2,Charlotte,F,12891,2022
3,Amelia,F,12333,2022
4,Sophia,F,12310,2022
...,...,...,...,...
426767,Zymaire,M,5,2010
426768,Zyonne,M,5,2010
426769,Zyquarius,M,5,2010
426770,Zyran,M,5,2010


In [32]:
# slicing
# .loc lets us select rows and columns using their laabels
# Notice that .loc needs square brackets.
baby.loc[1,'Name']

'Emma'

In [33]:
# slice out multiple rows and columns
baby.loc[1:3,'Name':'Count']

Unnamed: 0,Name,Sex,Count
1,Emma,F,14435
2,Charlotte,F,12891
3,Amelia,F,12333


In [34]:
# slice all rows and columns
baby.loc[:,:]

Unnamed: 0,Name,Sex,Count,Year
0,Olivia,F,16573,2022
1,Emma,F,14435,2022
2,Charlotte,F,12891,2022
3,Amelia,F,12333,2022
4,Sophia,F,12310,2022
...,...,...,...,...
426767,Zymaire,M,5,2010
426768,Zyonne,M,5,2010
426769,Zyquarius,M,5,2010
426770,Zyran,M,5,2010


In [35]:
# DataFrame and Series
# a pd.DataFrame is two-dimensional, it has rows and columns.
# a pd.Series is one-dimensional, it represents a list of data.
print('### This is a Series. ###')
count = baby.loc[0:5,'Count']
print(count)
count.__class__.__name__

# pass a list into .loc to select DataFrame
print('### This is a DataFrame. ###')
baby.loc[0:5,['Name','Sex']]

### This is a Series. ###
0    16573
1    14435
2    12891
3    12333
4    12310
5    11662
Name: Count, dtype: int64
### This is a DataFrame. ###


Unnamed: 0,Name,Sex
0,Olivia,F
1,Emma,F
2,Charlotte,F
3,Amelia,F
4,Sophia,F
5,Isabella,F


In [36]:
# This is a shorthand.
print(baby['Name'])
baby[['Name']]

0            Olivia
1              Emma
2         Charlotte
3            Amelia
4            Sophia
            ...    
426767      Zymaire
426768       Zyonne
426769    Zyquarius
426770        Zyran
426771        Zzyzx
Name: Name, Length: 426772, dtype: object


Unnamed: 0,Name
0,Olivia
1,Emma
2,Charlotte
3,Amelia
4,Sophia
...,...
426767,Zymaire
426768,Zyonne
426769,Zyquarius
426770,Zyran


In [37]:
# Using .iloc, it uses the positions of rows and columns.
baby.iloc[0:3,0:1]


Unnamed: 0,Name
0,Olivia
1,Emma
2,Charlotte


In [38]:
# Filtering rows
# bool comparison
baby['Name'] == 'Emma'

0         False
1          True
2         False
3         False
4         False
          ...  
426767    False
426768    False
426769    False
426770    False
426771    False
Name: Name, Length: 426772, dtype: bool

In [43]:
# a faster way
baby.loc[baby['Name'] == 'Emma',:].head(5)

Unnamed: 0,Name,Sex,Count,Year
1,Emma,F,14435,2022
23573,Emma,M,15,2022
31916,Emma,F,15510,2021
55646,Emma,M,14,2021
63601,Emma,F,15680,2020


In [42]:
# a shorthand
baby[baby['Name'] == 'Emma'].head(5)

Unnamed: 0,Name,Sex,Count,Year
1,Emma,F,14435,2022
23573,Emma,M,15,2022
31916,Emma,F,15510,2021
55646,Emma,M,14,2021
63601,Emma,F,15680,2020


In [41]:
# Using parentheses to make it more readable.
(baby[baby['Name'] == 'Emma']
 .sort_values('Count',ascending=False)
 .head(2))

Unnamed: 0,Name,Sex,Count,Year
324970,Emma,F,20964,2012
258346,Emma,F,20958,2014
