In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import glob

# Load all text files
files = glob.glob('yob*.txt')

# Combine all files into a single DataFrame
data_list = []
for file in files:
    year = int(file[3:7])
    df = pd.read_csv(file, names=['Name', 'Sex', 'Count'])
    df['Year'] = year
    data_list.append(df)

baby_names = pd.concat(data_list, ignore_index=True)

# Filter the dataset for the names of interest
names_of_interest = ['Sara', 'John']
filtered_data = baby_names[baby_names['Name'].isin(names_of_interest)]

# Group by year, name, and sex
name_trends = filtered_data.groupby(['Year', 'Name', 'Sex']).sum().reset_index()

# Create a figure with two subplots
fig, axes = plt.subplots(ncols=2, figsize=(15, 6))

# Define colors
colors = {'Sara': 'orange', 'John': 'blue'}

# Plot for Male Names
ax = axes[0]
for name in names_of_interest:
    name_data = name_trends[(name_trends['Name'] == name) & (name_trends['Sex'] == 'M')]
    if not name_data.empty:
        ax.plot(name_data['Year'], name_data['Count'], label=name, color=colors[name])
ax.set_title('JOHN')
ax.set_xlabel('Year')
ax.set_ylabel('Count')
ax.legend()
ax.grid(True)

# Plot for Female Names
ax = axes[1]
for name in names_of_interest:
    name_data = name_trends[(name_trends['Name'] == name) & (name_trends['Sex'] == 'F')]
    if not name_data.empty:
        ax.plot(name_data['Year'], name_data['Count'], label=name, color=colors[name])
ax.set_title('SARA')
ax.set_xlabel('Year')
ax.set_ylabel('Count')
ax.legend()
ax.grid(True)

plt.tight_layout()
plt.show()
