#### Create DB and table

In [4]:
import aws_util
import pandas as pd
db_name = 'covid19_db'
conn, cur = aws_util.conn_db(db_name)

In [6]:
cur.execute('GRANT rds_superuser TO cyan8388;')

In [14]:
df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
                          'foo', 'bar', 'foo', 'foo'],
                   'B' : ['one', 'one', 'two', 'three',
                          'two', 'two', 'one', 'three']})

In [25]:
len(df)//2

4

In [22]:
df_test = pd.read_sql("SELECT distinct(location, vaccine) FROM vac WHERE vaccine like '%Oxford%'", conn)

In [23]:
df_test

Unnamed: 0,row
0,"(Afghanistan,Oxford/AstraZeneca)"
1,"(Albania,""Oxford/AstraZeneca, Pfizer/BioNTech,..."
2,"(Andorra,""Pfizer/BioNTech, Oxford/AstraZeneca"")"
3,"(Angola,Oxford/AstraZeneca)"
4,"(Anguilla,Oxford/AstraZeneca)"
...,...
179,"(Uzbekistan,Oxford/AstraZeneca)"
180,"(Vietnam,Oxford/AstraZeneca)"
181,"(Wales,""Moderna, Oxford/AstraZeneca, Pfizer/Bi..."
182,"(Wales,""Oxford/AstraZeneca, Pfizer/BioNTech"")"


In [7]:
df_test = pd.read_sql('SELECT * from daily_case LIMIT 10', conn)

In [12]:
print(df_test.shape[0])
print(len(df_test))

0
0


In [2]:
import create_table

In [2]:
# create_table.create_database()

--- Creating database -----
connecting to AWS RDS Postgres default_db
Dropping database if exists...
Creating database covid19_db...
Connecting to AWS RDS database covid19_db...
--- Finish creating database ---


In [5]:
create_table.create_tables(conn, cur)

--- Creating tables in db ---
--- Finish creating tables ---


In [8]:
# Get all the available tables
cur.execute('''
SELECT table_name
  FROM information_schema.tables
 WHERE table_schema='public'
   AND table_type='BASE TABLE';''')

In [9]:
cur.fetchall()

[('daily_case',), ('vac',), ('country_loc',), ('dim_time',), ('csv_record',)]

### Load a csv file from S3 to df

In [2]:
file_name = 'daily_case_data/01-01-2021.csv'

In [6]:
df = pd.read_aws_csv(file_name)

### Perform ETL

In [1]:
import etl

In [7]:
# file_all = aws_util.list_files('daily_case_data/old_format/')

In [3]:
print(file_all)

[]


##### Daily cases

In [None]:
# If only want to update with the latest files that have not been processed before
etl.process_case_data()

In [2]:
# If want to download files from specific date(s)
file_date = ['2021-01-01', '2021-05-28'] # Specify the start and end dates of files to download
etl.process_case_data(file_date=file_date)

----- Start processing daily case data -----
Determining which daily case csv files to process based on csv_record table...
Loading and processing daily case data...
05-20-2021.csv
05-21-2021.csv
05-22-2021.csv
05-23-2021.csv
05-24-2021.csv
05-25-2021.csv


  time_data = (t.dt.strftime('%Y-%m-%d %H:%M:%S'), t.dt.strftime('%Y-%m-%d'), t.dt.hour.values, t.dt.day.values, t.dt.weekofyear.values, t.dt.month.values, t.dt.year.values, t.dt.weekday.values)


Loading and processing old daily case data...
Bulk inserting processed daily case data into daily case table...
  FIPS Admin2 Province_State Country_Region date_string          Last_Update  \
0                               Afghanistan  2021-05-21  2021-05-21 04:20:43   
1                                   Albania  2021-05-21  2021-05-21 04:20:43   
2                                   Algeria  2021-05-21  2021-05-21 04:20:43   
3                                   Andorra  2021-05-21  2021-05-21 04:20:43   
4                                    Angola  2021-05-21  2021-05-21 04:20:43   

  Confirmed Deaths Recovered Active  
0     64575   2772     55687   6116  
1    132118   2440    127869   1809  
2    126156   3401     87902  34853  
3     13569    127     13234    208  
4     31661    704     26483   4474  
Start bulk inserting...
Successfully inserted chunk0 of 1000 rows
Start bulk inserting...
Successfully inserted chunk1 of 1000 rows
Start bulk inserting...
Successfully inserted c

('Success', 'Success')

In [10]:
# If want to process all files
# etl.process_case_data(process_all=True)

##### Vaccination

In [5]:
etl.process_vaccine_data()

----- Start processing vaccination data -----
Loading and processing vaccination data...
Afghanistan.csv
Albania.csv
Algeria.csv
Andorra.csv
Angola.csv
Anguilla.csv
Antigua and Barbuda.csv
Argentina.csv
Armenia.csv
Aruba.csv
Australia.csv
Austria.csv
Azerbaijan.csv
Bahamas.csv
Bahrain.csv
Bangladesh.csv
Barbados.csv
Belarus.csv
Belgium.csv
Belize.csv
Benin.csv
Bermuda.csv
Bhutan.csv
Bolivia.csv
Bonaire Sint Eustatius and Saba.csv
Bosnia and Herzegovina.csv
Botswana.csv
Brazil.csv
Brunei.csv
Bulgaria.csv
Cambodia.csv
Cameroon.csv
Canada.csv
Cape Verde.csv
Cayman Islands.csv
Central African Republic.csv
Chile.csv
China.csv
Colombia.csv
Comoros.csv
Congo.csv
Costa Rica.csv
Cote d'Ivoire.csv
Croatia.csv
Cuba.csv
Curacao.csv
Cyprus.csv
Czechia.csv
Democratic Republic of Congo.csv
Denmark.csv
Djibouti.csv
Dominica.csv
Dominican Republic.csv
Ecuador.csv
Egypt.csv
El Salvador.csv
England.csv
Equatorial Guinea.csv
Estonia.csv
Eswatini.csv
Ethiopia.csv
Faeroe Islands.csv
Falkland Islands.csv
Fij

In [2]:
# list_files = aws_util.list_files('daily_case_data/')

In [5]:
# df_test = pd.read_sql("select * from vac where location = 'Israel'", conn)
df_test = pd.read_sql("""select * from daily_case where Country_Region = 'France' 
                      and date_string >= '2021-05-22'
                      and province_state = ''
                      order by date_string""", conn)

In [6]:
df_test.tail(20)

Unnamed: 0,fips,admin2,province_state,country_region,date_string,last_update,confirmed,deaths,recovered,active
0,,,,France,2021-05-22,2021-05-22 04:20:45,5526555,107645,324444,5092326
1,,,,France,2021-05-23,2021-05-23 04:20:51,5538938,107732,324444,5103831
2,,,,France,2021-05-24,2021-05-24 04:20:53,5547914,107789,324444,5112446
3,,,,France,2021-05-25,2021-05-25 04:21:08,5550143,107851,327886,5114406
4,,,,France,2021-05-26,2021-05-26 04:20:35,5552977,108071,327636,5117270
5,,,,France,2021-05-27,2021-05-27 04:20:59,5564217,108201,328943,5127073
6,,,,France,2021-05-28,2021-05-28 04:20:36,5578150,108343,329920,5139887
7,,,,France,2021-05-29,2021-05-29 04:20:41,5588981,108466,331006,5149509
