In [43]:
import os
import pandas as pd
import numpy as np
import psycopg2
import postgres_creds as cred
# Since changes were made in cred and our ipynb can't see new changes, we use Importlib to reload the module
import importlib
importlib.reload(cred)

<module 'postgres_creds' from '/Users/chewynguyen/Desktop/csv_postgres_connector/postgres_creds.py'>

In [33]:
# Import csv into dataframe
url = "https://raw.githubusercontent.com/datasets/covid-19/main/data/countries-aggregated.csv"
df = pd.read_csv(url)
table_name = url.replace('https://raw.githubusercontent.com/datasets/covid-19/main/data/countries-aggregated.csv','countries_aggregated')

In [34]:
# Replacing pd datatypes with sql datatypes
replacements = {
        'timedelta64[ns]': 'varchar(255)',
        'object': 'varchar(255)',
        'float64': 'float',
        'int64': 'int',
        'datetime64': 'timestamp'}
print(df.dtypes)
replaced_dtypes = df.dtypes.replace(replacements)
# Places column name next to DB object type, used inside SQL create table statement
column_dtype = ", ".join("{} {}".format(n, d) for (n, d) in zip(df.columns, replaced_dtypes))
print(column_dtype)

Date         object
Country      object
Confirmed     int64
Recovered     int64
Deaths        int64
dtype: object
Date varchar(255), Country varchar(255), Confirmed int, Recovered int, Deaths int


In [35]:
conn = psycopg2.connect(
    host = cred.host,
    user = cred.user,
    password = cred.password,
    database = cred.database)

cursor = conn.cursor()

Testing Strategy

1. create df_10 dataframe
2. drop table, create table in DB with replaced object type, insert statement queries
3. create insert into function
4. Show table in DB funtion

In [36]:
# 1. create df_10 dataframe
df_10 = df.head(10)
df_10.to_csv('countries_test10.csv')

# 2. create queries
drop_table = 'DROP TABLE IF EXISTS ' + table_name
create_table = 'CREATE TABLE ' + table_name + " (" + column_dtype + ")"
insert_into_table = 'INSERT INTO countries_aggregated (Date,Country,Confirmed,Recovered,Deaths) VALUES (%s,%s,%s,%s,%s)'
select_table = 'SELECT * FROM ' + table_name

cursor.execute(drop_table)
cursor.execute(create_table)
# 3. create insert into function
for index, row in df.iterrows():
    cursor.execute(insert_into_table,row)
conn.commit()

# 4. Show table in DB
cursor.execute(select_table)
for each in cursor:
    print(each)

('2020-01-22', 'Afghanistan', 0, 0, 0)
('2020-01-23', 'Afghanistan', 0, 0, 0)
('2020-01-24', 'Afghanistan', 0, 0, 0)
('2020-01-25', 'Afghanistan', 0, 0, 0)
('2020-01-26', 'Afghanistan', 0, 0, 0)
('2020-01-27', 'Afghanistan', 0, 0, 0)
('2020-01-28', 'Afghanistan', 0, 0, 0)
('2020-01-29', 'Afghanistan', 0, 0, 0)
('2020-01-30', 'Afghanistan', 0, 0, 0)
('2020-01-31', 'Afghanistan', 0, 0, 0)
('2020-02-01', 'Afghanistan', 0, 0, 0)
('2020-02-02', 'Afghanistan', 0, 0, 0)
('2020-02-03', 'Afghanistan', 0, 0, 0)
('2020-02-04', 'Afghanistan', 0, 0, 0)
('2020-02-05', 'Afghanistan', 0, 0, 0)
('2020-02-06', 'Afghanistan', 0, 0, 0)
('2020-02-07', 'Afghanistan', 0, 0, 0)
('2020-02-08', 'Afghanistan', 0, 0, 0)
('2020-02-09', 'Afghanistan', 0, 0, 0)
('2020-02-10', 'Afghanistan', 0, 0, 0)
('2020-02-11', 'Afghanistan', 0, 0, 0)
('2020-02-12', 'Afghanistan', 0, 0, 0)
('2020-02-13', 'Afghanistan', 0, 0, 0)
('2020-02-14', 'Afghanistan', 0, 0, 0)
('2020-02-15', 'Afghanistan', 0, 0, 0)
('2020-02-16', 'Afghanist

In [37]:
cursor.close()

Next Goal:

1. Automate code: csv's can be imported without manually changing code
2. Can upload multiple csv's at the same time

Next Steps:

1. Create new folder in current directory, if it exists, pass
2. Check cwd, if csv in cwd, move csv to new folder
3. Look inside new folder and clean the csv names and their column names

In [60]:
csv_files = []
for file in os.listdir(os.getcwd()):
    if '.csv' in file:
        csv_files.append(file)
        
print(csv_files)

['countries_test10.csv']


In [69]:
# 1. Create new folder in current directory

new_directory = "imported_csv"
try:
    os.mkdir(new_directory)
except:
    pass


In [74]:
# Check cwd, if csv in cwd, move csv to new folder
mv = "mv '{0}' {1}".format(file,new_directory)
os.system(mv)
print(mv)

mv 'csv_to_postgres.ipynb' imported_csv
