In [458]:
# Import Libraries
# Conda Install Libraries
# Pip Install Libraries

import pandas as pd
import numpy as np
from psycopg2.pool import ThreadedConnectionPool
import psycopg2

In [459]:
!ls

Customer Contracts$.csv  Makefile                 customer_contracts.csv
Customer Demo.csv        csv-db-automate.ipynb    requirements.txt
Customer Engagements.csv [1m[36mcsv-db-automation[m[m


In [460]:
df  = pd.read_csv('Customer Contracts$.csv')
df.head()

Unnamed: 0,customer_name,start_date,end_date,contract_amount_m,invoice_sent,paid
0,Nike,01-02-2019,12-20-2020,2.98,Yes,Yes
1,Reebox,06-20-2017,,3.9,No,No
2,Adidas,12-07-2015,6-20-2018,4.82,Yes,Yes
3,Google,05-25-2014,03-20-2017,5.74,Yes,No
4,Amazon,11-10-2012,12-20-2015,6.66,No,Yes


In [461]:
# Clean table names
# Lower case letters
# Remove all white spaces
# Replace -, /, \\, $ with _

file = 'Customer Contracts$'
clean_tbl_name = file.lower().replace(" ","_").replace("?",""). \
    replace("-","_").replace(r"/","_").replace("\\","_") \
    .replace("%","").replace(")","").replace(r"(","").replace("$","")

clean_tbl_name

'customer_contracts'

In [462]:
# Clean header names
# Lower case letters
# Remove all white spaces
# Replace -, /, \\, $ with _
df.columns = [x.lower().replace(" ","_").replace("?",""). \
    replace("-","_").replace(r"/","_").replace("\\","_") \
    .replace("%","").replace(")","").replace(r"(","").replace("$","") for x in df.columns]

df.columns

Index(['customer_name', 'start_date', 'end_date', 'contract_amount_m',
       'invoice_sent', 'paid'],
      dtype='object')

In [463]:
"""
create table customer_contracts
(
   customer_name       varchar,
   start_date          varchar,
   end_date            varchar,
   contract_amount_m   float,
   invoice_sent        varchar,
   paid                varchar
);
"""

'\ncreate table customer_contracts\n(\n   customer_name       varchar,\n   start_date          varchar,\n   end_date            varchar,\n   contract_amount_m   float,\n   invoice_sent        varchar,\n   paid                varchar\n);\n'

In [464]:
df.dtypes

customer_name         object
start_date            object
end_date              object
contract_amount_m    float64
invoice_sent          object
paid                  object
dtype: object

In [465]:
replacements = {
    'object' : 'varchar',
    'float64' : 'float',
    'int64' : 'int',
    'datetime64' : 'timestamp',
    'timedelta64[ns]' : 'varchar'
}

replacements

{'object': 'varchar',
 'float64': 'float',
 'int64': 'int',
 'datetime64': 'timestamp',
 'timedelta64[ns]': 'varchar'}

In [466]:
col_str = ", ".join("{} {}".format(n, d) for (n, d) in zip(df.columns, df.dtypes.replace(replacements)))
col_str

'customer_name varchar, start_date varchar, end_date varchar, contract_amount_m float, invoice_sent varchar, paid varchar'

In [467]:
df.columns

Index(['customer_name', 'start_date', 'end_date', 'contract_amount_m',
       'invoice_sent', 'paid'],
      dtype='object')

In [468]:
# Enter the values for your database
# dsn_dbname = "pyautomation"
# dsn_host = "192.168.1.12"
# dsn_port = "5432"
# dsn_user = "root"
# dsn_pass = "secret"

In [469]:
# Open a database connection
# conn_string = "host="+dsn_host+" port="+dsn_port+" dbname="+dsn_dbname+" user="+dsn_user+" pass="+dsn_pass
#
# conn = psycopg2.connect(conn_string)

In [470]:
# conn_string = ThreadedConnectionPool(minconn=1, maxconn=5, "host='localhost' dbname='pyautomation' user='root' password='secret' port='5432' options='-c search_path=dbo,public'")
# print ("Connecting to database\n ->%s" % (conn_string))
#
# conn = psycopg2.connect(conn_string)
# cursor = conn.cursor()
# print('opened database successfully')

In [471]:
conn_string = "host='localhost' dbname='pyautomation' user='root' password='secret' port='5432' options='-c search_path=dbo,public'"

print ("Connecting to database\n ->%s" % (conn_string))

conn = psycopg2.connect(conn_string)
cursor = conn.cursor()
print("opened database successfully")


Connecting to database
 ->host='localhost' dbname='pyautomation' user='root' password='secret' port='5432' options='-c search_path=dbo,public'
opened database successfully


In [472]:
clean_tbl_name

'customer_contracts'

In [473]:
# Drop tables with same name
cursor.execute("drop table if exists customer_contracts;")

In [474]:
# Call columns again
col_str

'customer_name varchar, start_date varchar, end_date varchar, contract_amount_m float, invoice_sent varchar, paid varchar'

In [475]:
# Create table
cursor.execute("create table customer_contracts \
               (customer_name      varchar, \
               start_date          varchar, \
               end_date            varchar, \
               contract_amount_m   float, \
               invoice_sent        varchar, \
               paid                varchar)")

In [476]:
cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' ")

for table in cursor.fetchall():
    print(table)

('customer_contracts',)


In [477]:
# Insert Values to table

# Save df to csv
df.to_csv('customer_contracts.csv', header=df.columns, index=False, encoding='utf-8')

# Open the csv file, save it as an object
my_file = open('customer_contracts.csv')
print("File opened in memory")

File opened in memory


In [478]:
# Upload to db
SQL_STATEMENT = """
COPY customer_contracts FROM STDIN WITH
    CSV
    HEADER
    DELIMITER AS ','
"""

cursor.copy_expert(sql=SQL_STATEMENT, file=my_file)
print("File copied to db")

File copied to db


In [479]:
cursor.execute("grant select on table customer_contracts to public")
conn.commit()

cursor.close()
print("Table customer_contracts to imported to db completed")

Table customer_contracts to imported to db completed
