In [1]:
import pandas as pd
from sqlalchemy import create_engine

### Store CSV into DataFrame

In [4]:
csv_file = "Resources/cost-of-living-2017.csv"
costofliving_data_df = pd.read_csv(csv_file)
costofliving_data_df.head()

Unnamed: 0,City,State,Country,Cost of Living Plus Rent Index,CLI,Rent Index,Groceries Index,Restaurant Price Index,Local Purchasing Power Index,Leverage Model 1,Leverage Model 2
0,Zurich,,Switzerland,108.77,149.53,66.76,163.71,140.58,126.3,0.042957,0.041121
1,Hamilton,,Bermuda,133.2,148.13,117.8,145.2,152.54,93.42,0.077443,0.041137
2,Zug,,Switzerland,105.54,142.54,67.39,148.36,143.18,105.62,0.034928,0.034619
3,Geneva,,Switzerland,106.73,142.12,70.25,147.13,138.96,115.2,0.030242,0.030232
4,Basel,,Switzerland,97.47,142.02,51.54,149.54,131.72,113.13,0.038061,0.032146


### Create new data with select columns

In [16]:
new_costofliving_data_df = costofliving_data_df[['Country', 'Cost of Living Plus Rent Index', 'Groceries Index','Restaurant Price Index']].copy()

# Rename the column headers
new_costofliving_data_df = new_costofliving_data_df.rename(columns={"Country": "country",
                                                          "Cost of Living Plus Rent Index": "Cost of Living Plus Rent Index",
                                                          "Groceries Index": "Groceries Index",
                                                          "Restaurant Price Index": "Restaurant Price Index",
                                                          
                                                          })





new_costofliving_data_df.set_index("country", inplace=True)
new_costofliving_data_df.head()





Unnamed: 0_level_0,Cost of Living Plus Rent Index,Groceries Index,Restaurant Price Index
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Switzerland,108.77,163.71,140.58
Bermuda,133.2,145.2,152.54
Switzerland,105.54,148.36,143.18
Switzerland,106.73,147.13,138.96
Switzerland,97.47,149.54,131.72


In [7]:
csv_file = "Resources/country_profile_variables.csv"
country_data_df = pd.read_csv(csv_file)
country_data_df.head()


Unnamed: 0,country,Region,Surface area (km2),Population in thousands (2017),"Population density (per km2, 2017)","Sex ratio (m per 100 f, 2017)",GDP: Gross domestic product (million current US$),"GDP growth rate (annual %, const. 2005 prices)",GDP per capita (current US$),Economy: Agriculture (% of GVA),...,Mobile-cellular subscriptions (per 100 inhabitants).1,Individuals using the Internet (per 100 inhabitants),Threatened species (number),Forested area (% of land area),CO2 emission estimates (million tons/tons per capita),"Energy production, primary (Petajoules)",Energy supply per capita (Gigajoules),"Pop. using improved drinking water (urban/rural, %)","Pop. using improved sanitation facilities (urban/rural, %)",Net Official Development Assist. received (% of GNI)
0,Afghanistan,SouthernAsia,652864,35530,54.4,106.3,20270,-2.4,623.2,23.3,...,8.3,42,2.1,9.8/0.3,63,5,78.2/47.0,45.1/27.0,21.43,-99
1,Albania,SouthernEurope,28748,2930,106.9,101.9,11541,2.6,3984.2,22.4,...,63.3,130,28.2,5.7/2.0,84,36,94.9/95.2,95.5/90.2,2.96,-99
2,Algeria,NorthernAfrica,2381741,41318,17.3,102.0,164779,3.8,4154.1,12.2,...,38.2,135,0.8,145.4/3.7,5900,55,84.3/81.8,89.8/82.2,0.05,-99
3,American Samoa,Polynesia,199,56,278.2,103.6,-99,-99.0,-99.0,-99.0,...,-99.0,92,87.9,-99,-99,-99,100.0/100.0,62.5/62.5,-99.0,-99
4,Andorra,SouthernEurope,468,77,163.8,102.3,2812,0.8,39896.4,0.5,...,96.9,13,34.0,0.5/6.4,1,119,100.0/100.0,100.0/100.0,-99.0,-99


In [8]:
# Create a filtered dataframe from specific columns
country_data_cols = ['country', 'Region', 'Population in thousands (2017)', 'GDP: Gross domestic product (million current US$)', 'Urban population (% of total population)']
country_data_transformed= country_data_df[country_data_cols].copy()

# Rename the column headers
country_data_transformed = country_data_transformed.rename(columns={"country": "country",
                                                          "Region": "region",
                                                          "Population in thousands (2017)": "population_inthousands",
                                                          "GDP: Gross domestic product (million current US$)": "gdp_inmillions",
                                                          "Urban population (% of total population)": "urbanpop_pcttotalpop"
                                                          })

# Clean the data by dropping duplicates and setting the index
country_data_transformed.set_index("country", inplace=True)

country_data_transformed.head()

Unnamed: 0_level_0,region,population_inthousands,gdp_inmillions,urbanpop_pcttotalpop
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,SouthernAsia,35530,20270,26.7
Albania,SouthernEurope,2930,11541,57.4
Algeria,NorthernAfrica,41318,164779,70.7
American Samoa,Polynesia,56,-99,87.2
Andorra,SouthernEurope,77,2812,85.1


### Clean DataFrame

In [6]:
rds_connection_string = "<inser user name>:<insert password>@127.0.0.1/customer_db"
engine = create_engine(f'mysql://{rds_connection_string}')

### Check for tables

In [7]:
engine.table_names()

['customer_location', 'customer_name']

### Use pandas to load csv converted DataFrame into database

In [8]:
new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

### Use pandas to load json converted DataFrame into database

In [9]:
new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [10]:
pd.read_sql_query('select * from customer_name', con=engine).head()

Unnamed: 0,id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen


### Confirm data has been added by querying the customer_location table

In [11]:
pd.read_sql_query('select * from customer_location', con=engine).head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York
