In [1]:
import pandas as pd
from sqlalchemy import create_engine

### Store CSV into DataFrame

In [4]:
csv_file = "./Resources/Malaria_deaths_report.xls"
raw_malaria_df = pd.read_csv(csv_file, encoding='latin_1')
raw_malaria_df.head()

Unnamed: 0,"Annex 3 ? J. Reported malaria deaths, 2010?2018",Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19
0,,,,,,,,,,,,,,,,,,,,
1,WHO region\r\nCountry/area,2000.0,2001.0,2002,2003,2004,2005,2006,2007,2008.0,2009.0,2010.0,2011.0,2012.0,2013.0,2014.0,2015.0,2016.0,2017.0,2018.0
2,AFRICAN,,,,,,,,,,,,,,,,,,,
3,Algeria,2.0,1.0,?,?,?,?,?,?,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Angola,9510.0,9473.0,14434,38598,12459,13768,10220,9812,9465.0,10530.0,8114.0,6909.0,5736.0,7300.0,5714.0,7832.0,15997.0,13967.0,11814.0


In [5]:
csv_file = "./Resources/04-22-2020.csv"
raw_covid_df = pd.read_csv(csv_file)
raw_covid_df.head()

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key
0,45001.0,Abbeville,South Carolina,US,2020-04-22 23:30:53,34.223334,-82.461707,22,0,0,22,"Abbeville, South Carolina, US"
1,22001.0,Acadia,Louisiana,US,2020-04-22 23:30:53,30.295065,-92.414197,117,7,0,110,"Acadia, Louisiana, US"
2,51001.0,Accomack,Virginia,US,2020-04-22 23:30:53,37.767072,-75.632346,59,1,0,58,"Accomack, Virginia, US"
3,16001.0,Ada,Idaho,US,2020-04-22 23:30:53,43.452658,-116.241552,622,12,0,610,"Ada, Idaho, US"
4,19001.0,Adair,Iowa,US,2020-04-22 23:30:53,41.330756,-94.471059,1,0,0,1,"Adair, Iowa, US"


### Create new data with select columns

In [49]:
malaria_df = raw_malaria_df.iloc[2:,[0,raw_malaria_df.shape[1]-1]].dropna()
malaria_df.columns = ['Country','Deaths']
malaria_df.head(5)

Unnamed: 0,Country,Deaths
3,Algeria,0
4,Angola,11814
5,Benin,2138
6,Botswana,9
7,Burkina Faso,4294


In [46]:
covid_df = pd.pivot_table(raw_covid_df, values='Deaths', index='Country_Region', aggfunc='sum').reset_index()

### Connect to local database

In [38]:
rds_connection_string = "postgres:chicago13@localhost:5432/death_db"
engine = create_engine(f'postgresql://{rds_connection_string}')
engine

Engine(postgresql://postgres:***@localhost:5432/death_db)

### Check for tables

In [40]:
engine.table_names()

['malaria', 'covid19']

### Use pandas to load Malaria csv converted DataFrame into database

In [50]:
malaria_df.to_sql(name='malaria', con=engine, if_exists='replace', index=False)

### Use pandas to load Covid19 csv converted DataFrame into database

In [51]:
covid_df.to_sql(name='covid19', con=engine, if_exists='replace', index=False)

### Confirm data has been added by querying the malaria table
* NOTE: can also check using pgAdmin

In [52]:
pd.read_sql_query('select * from malaria', con=engine).head()

Unnamed: 0,Country,Deaths
0,Algeria,0
1,Angola,11814
2,Benin,2138
3,Botswana,9
4,Burkina Faso,4294


### Confirm data has been added by querying the covid19 table

In [53]:
pd.read_sql_query('select * from covid19', con=engine).head()

Unnamed: 0,Country_Region,Deaths
0,Afghanistan,40
1,Albania,27
2,Algeria,402
3,Andorra,37
4,Angola,2
