In [19]:
import pandas as pd
from sqlalchemy import create_engine

### Extract CSVs into DataFrames

In [21]:
alaska_names_file = "Resources/AlaskaNames.csv"
alaska_names_df = pd.read_csv(alaska_names_file)
alaska_names_df.sample(15)


Unnamed: 0,Id,Name,Year,Gender,State,Count
1132864,1132865,Lisa,1963,F,AK,66
400228,400229,David,1997,M,AK,52
385643,385644,Jordyn,1992,F,AK,5
145799,145800,Cheryl,1946,F,AK,9
82224,82225,Nicolas,1999,M,AK,5
1346201,1346202,Aveline,2015,F,AK,5
1161672,1161673,Catherine,1962,F,AK,19
732311,732312,Tanya,1988,F,AK,5
3251,3252,Jean,1962,F,AK,9
934617,934618,Brittany,1987,F,AK,56


In [22]:
national_names_files = "Resources/NationalNames.csv"
national_names_df = pd.read_csv(national_names_files)
national_names_df.head()

Unnamed: 0,Id,Name,Year,Gender,Count
0,1,Mary,1880,F,7065
1,2,Anna,1880,F,2604
2,3,Emma,1880,F,2003
3,4,Elizabeth,1880,F,1939
4,5,Minnie,1880,F,1746


### Transform Alaska Names DataFrame

In [23]:
# Create a filtered dataframe from specific columns
alaska_columns = ["Name", "Year", "Gender", "Count"]
alaska_transformed = alaska_names_df[alaska_columns].copy()

# Rename column headers
alaska_transformed = alaska_transformed.rename(columns={"Name":"baby_name","Year":"birth_year","Gender":"gender","Count":"total"})

# Clean the data by dropping duplicates and setting the index
alaska_transformed.drop_duplicates("baby_name", inplace=True)
alaska_transformed.set_index("baby_name", inplace=True)

alaska_transformed.sample(10)



Unnamed: 0_level_0,birth_year,gender,total
baby_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Patrice,1953,F,5
Kris,1975,M,5
Rowan,2006,F,6
Raven,1992,F,9
August,1954,M,5
Kiara,1996,F,5
Sheena,1982,F,8
Kasen,2011,M,5
Nicolas,1979,M,5
Destiny,1990,F,5


### Transform National Names

In [24]:
# Create a filtered dataframe from specific columns
national_columns = ["Name", "Year", "Gender", "Count"]
national_transformed = national_names_df[national_columns].copy()

#Rename the column headers
national_transformed = national_transformed.rename(columns={"Name":"baby_name", "Year":"birth_year","Gender":"gender","Count":"total"})

# Clean the data by dropping duplicates and setting the index
national_transformed.drop_duplicates("baby_name", inplace=True)
national_transformed.set_index("baby_name", inplace=True)

national_transformed.sample(10)

Unnamed: 0_level_0,birth_year,gender,total
baby_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Marcie,1897,F,5
Beny,1922,M,5
Bradin,1989,M,6
Farhiya,2000,F,5
Kentyn,2012,M,6
Amzee,2014,F,5
Emarii,2011,M,5
Aliyahna,2008,F,5
Joannie,1928,F,5
Derica,1971,F,5


### Create Database Connection

In [None]:
connection_string = "postgres:pgadmin@localhost:5432/babynames_db"
engine = create_engine(f'postgresql://{connection_string}')

### Confirm Engine

In [None]:
 # Confirm tables
engine.table_names()

### Load DataFrames into database

In [11]:
 alaska_transformed.to_sql(name='alaska_names', con=engine, if_exists='append', index=False)

NameError: name 'alaska_transformed' is not defined

In [None]:
national_transformed.to_sql(name='national_names', con=engine, if_exists='append', index=False)