In [1]:
import pandas as pd
from sqlalchemy import create_engine

### Extract CSV into DataFrames

In [2]:
csv_file = "Resources/country_profile_variables.csv"
country_data_df = pd.read_csv(csv_file)
country_data_df.head()

Unnamed: 0,country,Region,Surface area (km2),Population in thousands (2017),"Population density (per km2, 2017)","Sex ratio (m per 100 f, 2017)",GDP: Gross domestic product (million current US$),"GDP growth rate (annual %, const. 2005 prices)",GDP per capita (current US$),Economy: Agriculture (% of GVA),...,Mobile-cellular subscriptions (per 100 inhabitants).1,Individuals using the Internet (per 100 inhabitants),Threatened species (number),Forested area (% of land area),CO2 emission estimates (million tons/tons per capita),"Energy production, primary (Petajoules)",Energy supply per capita (Gigajoules),"Pop. using improved drinking water (urban/rural, %)","Pop. using improved sanitation facilities (urban/rural, %)",Net Official Development Assist. received (% of GNI)
0,Afghanistan,SouthernAsia,652864,35530,54.4,106.3,20270,-2.4,623.2,23.3,...,8.3,42,2.1,9.8/0.3,63,5,78.2/47.0,45.1/27.0,21.43,-99
1,Albania,SouthernEurope,28748,2930,106.9,101.9,11541,2.6,3984.2,22.4,...,63.3,130,28.2,5.7/2.0,84,36,94.9/95.2,95.5/90.2,2.96,-99
2,Algeria,NorthernAfrica,2381741,41318,17.3,102.0,164779,3.8,4154.1,12.2,...,38.2,135,0.8,145.4/3.7,5900,55,84.3/81.8,89.8/82.2,0.05,-99
3,American Samoa,Polynesia,199,56,278.2,103.6,-99,-99.0,-99.0,-99.0,...,-99.0,92,87.9,-99,-99,-99,100.0/100.0,62.5/62.5,-99.0,-99
4,Andorra,SouthernEurope,468,77,163.8,102.3,2812,0.8,39896.4,0.5,...,96.9,13,34.0,0.5/6.4,1,119,100.0/100.0,100.0/100.0,-99.0,-99


In [3]:
# Display all column names
list(country_data_df.columns.values)

['country',
 'Region',
 'Surface area (km2)',
 'Population in thousands (2017)',
 'Population density (per km2, 2017)',
 'Sex ratio (m per 100 f, 2017)',
 'GDP: Gross domestic product (million current US$)',
 'GDP growth rate (annual %, const. 2005 prices)',
 'GDP per capita (current US$)',
 'Economy: Agriculture (% of GVA)',
 'Economy: Industry (% of GVA)',
 'Economy: Services and other activity (% of GVA)',
 'Employment: Agriculture (% of employed)',
 'Employment: Industry (% of employed)',
 'Employment: Services (% of employed)',
 'Unemployment (% of labour force)',
 'Labour force participation (female/male pop. %)',
 'Agricultural production index (2004-2006=100)',
 'Food production index (2004-2006=100)',
 'International trade: Exports (million US$)',
 'International trade: Imports (million US$)',
 'International trade: Balance (million US$)',
 'Balance of payments, current account (million US$)',
 'Population growth rate (average annual %)',
 'Urban population (% of total populat

### Transform premise DataFrame

In [4]:
# Create a filtered dataframe from specific columns
country_data_cols = ['country', 'Region', 'Population in thousands (2017)', 'GDP: Gross domestic product (million current US$)', 'Urban population (% of total population)']
country_data_transformed= country_data_df[country_data_cols].copy()

# Rename the column headers
country_data_transformed = country_data_transformed.rename(columns={"country": "country",
                                                          "Region": "region",
                                                          "Population in thousands (2017)": "population_inthousands",
                                                          "GDP: Gross domestic product (million current US$)": "gdp_inmillions",
                                                          "Urban population (% of total population)": "urbanpop_pcttotalpop"
                                                          })

# Clean the data by dropping duplicates and setting the index
country_data_transformed.set_index("country", inplace=True)

country_data_transformed.head()

Unnamed: 0_level_0,region,population_inthousands,gdp_inmillions,urbanpop_pcttotalpop
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,SouthernAsia,35530,20270,26.7
Albania,SouthernEurope,2930,11541,57.4
Algeria,NorthernAfrica,41318,164779,70.7
American Samoa,Polynesia,56,-99,87.2
Andorra,SouthernEurope,77,2812,85.1


### Create database connection

In [None]:
connection_string = "root:<insert password>@127.0.0.1/customer_db"
engine = create_engine(f'mysql://{connection_string}')

In [5]:
connection_string = "root:Whimsy08@127.0.0.1/country_db"
engine = create_engine(f'mysql://{connection_string}')

In [6]:
# Confirm tables
engine.table_names()

['countrydata']

### Load DataFrames into database

In [7]:
country_data_transformed.to_sql(name='countrydata', con=engine, if_exists='append', index=True)