In [1]:
import configparser
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
config = configparser.ConfigParser()
config.read('clustertab.config')

['clustertab.config']

In [3]:
db = config['POSTGRES']['PG_DB']
user = config['POSTGRES']['PG_UNAME']
passwd = config['POSTGRES']['PG_PASS']
port = config['POSTGRES']['PG_PORT']
host = config['POSTGRES']['PG_HOST']

In [4]:
db

'retail_db'

In [5]:
credentials = "postgresql://{}:{}@{}:{}/{}".format(user,passwd,host,port,db)

#using psycopg2 to test connection since there are no tables
import psycopg2
try:
    conn = psycopg2.connect(host=host,dbname=db,user=user,password=passwd,port=port)
except Exception as e:
    print(e)
    
conn.set_session(autocommit=True)

try:
    cur = conn.cursor()
    
except:
    print(e)

In [6]:
credentials

'postgresql://postgres:1234@172.17.0.2:5432/retail_db'

In [24]:
#Helper functions to work with the database
def schemaGen(dataframe, schemaName):
    localSchema = pd.io.sql.get_schema(dataframe,schemaName)
    localSchema = localSchema.replace('TEXT','VARCHAR(255)').replace('INTEGER','NUMERIC').replace('\n','').replace('"',"")
    return "".join(localSchema)

#Using pandas read_sql for getting schema
def getSchema(tableName, credentials):
    schema = pd.read_sql("""SELECT table_catalog, table_name, 
                column_name, data_type, 
                ordinal_position, column_default, character_maximum_length,
                is_nullable FROM information_schema.columns where table_name='{}'""".format(tableName),con=credentials)
    return schema

#Issue is in using pd.read_sql to write data to the database. so using psycopg2
def queryTable(query):
    try:
        schema = cur.execute(query)
        return 
    except Exception as e:
        print(e)
        
#This doesn't return anything

#Using the pd.read_sql for getting data from db
def queryBase(query):
    requiredTable = pd.read_sql(query,con=credentials)
    return requiredTable

#This returns the dataframe

**Data Definition** is kept seperately from the **DQL (Data Query)** and **DML (Data Manipulation)** Languages, or clauses.

DDL tasks are 

- Creating Tables : Objects inside the database

- Creating Indexes for Performance increase

- Creating Constraint (NOT NULL, CHECK, PRIMARY KEY, UNIQUE etc)

Another important command is ALTER. Helps in Adding Columns, Dropping and Changing datatypes

Less known operation is adding comments on Table and Column levels.

In [8]:
queryBase("""CREATE TABLE users ( 
    user_id SERIAL PRIMARY KEY,
    user_first_name VARCHAR(30) NOT NULL,
    user_last_name VARCHAR(30) NOT NULL,
    user_email_id VARCHAR(50) NOT NULL,
    user_email_validated BOOLEAN DEFAULT FALSE,
    user_password VARCHAR(200),
    user_role VARCHAR(1) NOT NULL DEFAULT 'U', --U and A
    is_active BOOLEAN DEFAULT FALSE,
    created_dt DATE DEFAULT CURRENT_DATE,
    last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);""")

ResourceClosedError: This result object does not return rows. It has been closed automatically.

In [14]:
#dropping the above table to create the same table differently

queryTable("""DROP TABLE users""")

In [15]:
queryTable("""CREATE TABLE users ( 
    user_id INT,
    user_first_name VARCHAR(30) NOT NULL,
    user_last_name VARCHAR(30) NOT NULL,
    user_email_id VARCHAR(50) NOT NULL,
    user_email_validated BOOLEAN,
    user_password VARCHAR(200),
    user_role VARCHAR(1),
    is_active BOOLEAN,
    created_dt DATE DEFAULT CURRENT_DATE
);""")

In [9]:
queryTable("""COMMENT ON TABLE users IS 'The table stores the user data'""")

In [17]:
queryBase("""SELECT table_catalog, table_name, 
                column_name, data_type, 
                ordinal_position, column_default, charact er_maximum_length,
                is_nullable FROM information_schema.columns WHERE table_name = 'users'
            ORDER BY ordinal_position""")

Unnamed: 0,table_catalog,table_name,column_name,data_type,ordinal_position,column_default,character_maximum_length,is_nullable
0,retail_db,users,user_id,integer,1,,,YES
1,retail_db,users,user_first_name,character varying,2,,30.0,NO
2,retail_db,users,user_last_name,character varying,3,,30.0,NO
3,retail_db,users,user_email_id,character varying,4,,50.0,NO
4,retail_db,users,user_email_validated,boolean,5,,,YES
5,retail_db,users,user_password,character varying,6,,200.0,YES
6,retail_db,users,user_role,character varying,7,,1.0,YES
7,retail_db,users,is_active,boolean,8,,,YES
8,retail_db,users,created_dt,date,9,CURRENT_DATE,,YES


In [18]:
queryTable("""DROP SEQUENCE IF EXISTS users_user_id_seq""")

In [19]:
queryTable("""CREATE SEQUENCE users_user_id_seq""")

In [25]:
getSchema("users",credentials)

Unnamed: 0,table_catalog,table_name,column_name,data_type,ordinal_position,column_default,character_maximum_length,is_nullable
0,retail_db,users,created_dt,date,9,CURRENT_DATE,,YES
1,retail_db,users,user_email_validated,boolean,5,,,YES
2,retail_db,users,is_active,boolean,8,,,YES
3,retail_db,users,user_id,integer,1,,,YES
4,retail_db,users,user_role,character varying,7,,1.0,YES
5,retail_db,users,user_password,character varying,6,,200.0,YES
6,retail_db,users,user_first_name,character varying,2,,30.0,NO
7,retail_db,users,user_last_name,character varying,3,,30.0,NO
8,retail_db,users,user_email_id,character varying,4,,50.0,NO


In [26]:
queryTable("""ALTER TABLE users ALTER COLUMN user_id SET DEFAULT nextval('users_user_id_seq')""")

In [27]:
getSchema("users",credentials)

Unnamed: 0,table_catalog,table_name,column_name,data_type,ordinal_position,column_default,character_maximum_length,is_nullable
0,retail_db,users,created_dt,date,9,CURRENT_DATE,,YES
1,retail_db,users,user_email_validated,boolean,5,,,YES
2,retail_db,users,is_active,boolean,8,,,YES
3,retail_db,users,user_id,integer,1,nextval('users_user_id_seq'::regclass),,YES
4,retail_db,users,user_role,character varying,7,,1.0,YES
5,retail_db,users,user_password,character varying,6,,200.0,YES
6,retail_db,users,user_first_name,character varying,2,,30.0,NO
7,retail_db,users,user_last_name,character varying,3,,30.0,NO
8,retail_db,users,user_email_id,character varying,4,,50.0,NO


In [28]:
queryTable("""ALTER TABLE users ALTER COLUMN user_email_validated SET DEFAULT FALSE""")

In [29]:
queryTable("""ALTER TABLE users ALTER COLUMN is_active SET DEFAULT FALSE""")

In [30]:
getSchema("users",credentials)

Unnamed: 0,table_catalog,table_name,column_name,data_type,ordinal_position,column_default,character_maximum_length,is_nullable
0,retail_db,users,created_dt,date,9,CURRENT_DATE,,YES
1,retail_db,users,user_email_validated,boolean,5,false,,YES
2,retail_db,users,is_active,boolean,8,false,,YES
3,retail_db,users,user_id,integer,1,nextval('users_user_id_seq'::regclass),,YES
4,retail_db,users,user_role,character varying,7,,1.0,YES
5,retail_db,users,user_password,character varying,6,,200.0,YES
6,retail_db,users,user_first_name,character varying,2,,30.0,NO
7,retail_db,users,user_last_name,character varying,3,,30.0,NO
8,retail_db,users,user_email_id,character varying,4,,50.0,NO


In [31]:
queryTable("""ALTER TABLE users ALTER COLUMN user_role SET DATA TYPE CHAR(1)""")

In [32]:
queryTable("""ALTER TABLE users ALTER COLUMN user_role SET DEFAULT 'U'""")

In [33]:
getSchema("users",credentials)

Unnamed: 0,table_catalog,table_name,column_name,data_type,ordinal_position,column_default,character_maximum_length,is_nullable
0,retail_db,users,created_dt,date,9,CURRENT_DATE,,YES
1,retail_db,users,user_email_validated,boolean,5,false,,YES
2,retail_db,users,is_active,boolean,8,false,,YES
3,retail_db,users,user_id,integer,1,nextval('users_user_id_seq'::regclass),,YES
4,retail_db,users,user_role,character,7,'U'::bpchar,1.0,YES
5,retail_db,users,user_password,character varying,6,,200.0,YES
6,retail_db,users,user_first_name,character varying,2,,30.0,NO
7,retail_db,users,user_last_name,character varying,3,,30.0,NO
8,retail_db,users,user_email_id,character varying,4,,50.0,NO


In [34]:
queryTable("""ALTER TABLE users ADD COLUMN last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP""")

In [35]:
getSchema('users',credentials)

Unnamed: 0,table_catalog,table_name,column_name,data_type,ordinal_position,column_default,character_maximum_length,is_nullable
0,retail_db,users,user_id,integer,1,nextval('users_user_id_seq'::regclass),,YES
1,retail_db,users,user_email_validated,boolean,5,false,,YES
2,retail_db,users,is_active,boolean,8,false,,YES
3,retail_db,users,created_dt,date,9,CURRENT_DATE,,YES
4,retail_db,users,last_updated_ts,timestamp without time zone,10,CURRENT_TIMESTAMP,,YES
5,retail_db,users,user_password,character varying,6,,200.0,YES
6,retail_db,users,user_first_name,character varying,2,,30.0,NO
7,retail_db,users,user_last_name,character varying,3,,30.0,NO
8,retail_db,users,user_email_id,character varying,4,,50.0,NO
9,retail_db,users,user_role,character,7,'U'::bpchar,1.0,YES


There are multiple constraints that can be enforced on the columns like PRIMARY KEY, UNIQUE, NOT NULL. The Foreign Key and Check Constraints have to understood throughly. One thing I came to know is that there can multiple column with UNIQUE constraints, and **Foreign Key can be defined agains the column that is UNIQUE**

In [38]:
queryBase("""SELECT table_catalog, table_name, constraint_type, constraint_name
                FROM information_schema.table_constraints
                WHERE table_name = 'users'""")

Unnamed: 0,table_catalog,table_name,constraint_type,constraint_name
0,retail_db,users,CHECK,2200_16444_2_not_null
1,retail_db,users,CHECK,2200_16444_3_not_null
2,retail_db,users,CHECK,2200_16444_4_not_null


In [40]:
#dropping the above table to create the same table differently

queryTable("""DROP TABLE IF EXISTS users """)

In [41]:
queryTable("""CREATE TABLE users ( 
    user_id INT,
    user_first_name VARCHAR(30) NOT NULL,
    user_last_name VARCHAR(30) NOT NULL,
    user_email_id VARCHAR(50) NOT NULL,
    user_email_validated BOOLEAN,
    user_password VARCHAR(200),
    user_role VARCHAR(1),
    is_active BOOLEAN,
    created_dt DATE DEFAULT CURRENT_DATE
);""")

In [42]:
queryBase("""SELECT table_catalog, table_name, constraint_type, constraint_name
            FROM information_schema.table_constraints
            WHERE table_name = 'users'""")

Unnamed: 0,table_catalog,table_name,constraint_type,constraint_name
0,retail_db,users,CHECK,2200_16457_2_not_null
1,retail_db,users,CHECK,2200_16457_3_not_null
2,retail_db,users,CHECK,2200_16457_4_not_null


In [43]:
queryTable("""ALTER TABLE users ADD PRIMARY KEY (user_id) """)

In [44]:
queryBase("""SELECT table_catalog, table_name, constraint_type, constraint_name
            FROM information_schema.table_constraints
            WHERE table_name = 'users'""")

Unnamed: 0,table_catalog,table_name,constraint_type,constraint_name
0,retail_db,users,PRIMARY KEY,users_pkey
1,retail_db,users,CHECK,2200_16457_1_not_null
2,retail_db,users,CHECK,2200_16457_2_not_null
3,retail_db,users,CHECK,2200_16457_3_not_null
4,retail_db,users,CHECK,2200_16457_4_not_null


In [47]:
queryTable("""ALTER TABLE users DROP CONSTRAINT PRIMARY KEY (user_id) """)

syntax error at or near "PRIMARY"
LINE 1: ALTER TABLE users DROP CONSTRAINT PRIMARY KEY (user_id) 
                                          ^



In [48]:
queryTable("""ALTER TABLE users ADD UNIQUE (user_email_id) """)

In [51]:
queryTable("""ALTER TABLE users
            ALTER COLUMN user_email_validated SET NOT NULL,
            ALTER COLUMN user_role SET NOT NULL""")

In [52]:
queryBase("""SELECT table_catalog, table_name, constraint_type, constraint_name
            FROM information_schema.table_constraints
            WHERE table_name = 'users'""")

Unnamed: 0,table_catalog,table_name,constraint_type,constraint_name
0,retail_db,users,PRIMARY KEY,users_pkey
1,retail_db,users,UNIQUE,users_user_email_id_key
2,retail_db,users,CHECK,2200_16457_1_not_null
3,retail_db,users,CHECK,2200_16457_2_not_null
4,retail_db,users,CHECK,2200_16457_3_not_null
5,retail_db,users,CHECK,2200_16457_4_not_null
6,retail_db,users,CHECK,2200_16457_5_not_null
7,retail_db,users,CHECK,2200_16457_7_not_null


In [53]:
#practice table
queryTable("""CREATE TABLE user_logins(
                user_login_id SERIAL PRIMARY KEY,
                user_id INT,
                user_login_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                user_ip_addr VARCHAR(20))""")

In [55]:
queryBase("""SELECT table_catalog, table_name, constraint_type, constraint_name
            FROM information_schema.table_constraints
            WHERE table_name = 'user_logins'""")

Unnamed: 0,table_catalog,table_name,constraint_type,constraint_name
0,retail_db,user_logins,PRIMARY KEY,user_logins_pkey
1,retail_db,user_logins,CHECK,2200_16466_1_not_null


In [57]:
queryTable("""ALTER TABLE user_logins ADD FOREIGN KEY (user_id) REFERENCES users(user_id)""")

In [58]:
queryBase("""SELECT table_catalog, table_name, constraint_type, constraint_name
            FROM information_schema.table_constraints
            WHERE table_name = 'user_logins'""")

Unnamed: 0,table_catalog,table_name,constraint_type,constraint_name
0,retail_db,user_logins,PRIMARY KEY,user_logins_pkey
1,retail_db,user_logins,FOREIGN KEY,user_logins_user_id_fkey
2,retail_db,user_logins,CHECK,2200_16466_1_not_null


In [59]:
queryBase("""SELECT table_catalog, table_name, constraint_type, constraint_name
            FROM information_ schema.table_constraints
            WHERE table_name = 'users'""")

Unnamed: 0,table_catalog,table_name,constraint_type,constraint_name
0,retail_db,users,PRIMARY KEY,users_pkey
1,retail_db,users,UNIQUE,users_user_email_id_key
2,retail_db,users,CHECK,2200_16457_1_not_null
3,retail_db,users,CHECK,2200_16457_2_not_null
4,retail_db,users,CHECK,2200_16457_3_not_null
5,retail_db,users,CHECK,2200_16457_4_not_null
6,retail_db,users,CHECK,2200_16457_5_not_null
7,retail_db,users,CHECK,2200_16457_7_not_null


In [60]:
queryTable("""CREATE INDEX order_items_oid_idx ON order_items(order_item_order_id)""")

In [62]:
queryBase("""SELECT table_catalog, table_name, constraint_type, constraint_name
            FROM information_schema.table_constraints
            WHERE table_name = 'order_items'""")

Unnamed: 0,table_catalog,table_name,constraint_type,constraint_name
0,retail_db,order_items,PRIMARY KEY,order_items_pkey
1,retail_db,order_items,CHECK,2200_16416_1_not_null
2,retail_db,order_items,CHECK,2200_16416_2_not_null
3,retail_db,order_items,CHECK,2200_16416_3_not_null
4,retail_db,order_items,CHECK,2200_16416_4_not_null
5,retail_db,order_items,CHECK,2200_16416_5_not_null
6,retail_db,order_items,CHECK,2200_16416_6_not_null


In [63]:
queryBase("""SELECT *
            FROM pg_catalog.pg_indexes
            WHERE schemaname = 'public'
            AND tablename = 'users'""")

Unnamed: 0,schemaname,tablename,indexname,tablespace,indexdef
0,public,users,users_pkey,,CREATE UNIQUE INDEX users_pkey ON public.users...
1,public,users,users_user_email_id_key,,CREATE UNIQUE INDEX users_user_email_id_key ON...


In [65]:
queryTable("""DROP TABLE IF EXISTS users cascade""")

In [66]:
queryTable("""CREATE TABLE users ( 
    user_id INT,
    user_first_name VARCHAR(30) NOT NULL,
    user_last_name VARCHAR(30) NOT NULL,
    user_email_id VARCHAR(50) NOT NULL,
    user_email_validated BOOLEAN,
    user_password VARCHAR(200),
    user_role VARCHAR(1),
    is_active BOOLEAN,
    created_dt DATE DEFAULT CURRENT_DATE
);""")

In [67]:
queryBase("""SELECT *
                FROM pg_catalog.pg_indexes
                WHERE schemaname = 'public'
                AND tablename = 'users'""")

Unnamed: 0,schemaname,tablename,indexname,tablespace,indexdef


In [68]:
queryTable("""ALTER TABLE users
            ALTER COLUMN user_id SET DEFAULT nextval('users_user_id_seq'),
            ADD PRIMARY KEY (user_id)""")

In [69]:
queryBase("""SELECT *
                FROM pg_catalog.pg_indexes
                WHERE schemaname = 'public'
                AND tablename = 'users'""")

Unnamed: 0,schemaname,tablename,indexname,tablespace,indexdef
0,public,users,users_pkey,,CREATE UNIQUE INDEX users_pkey ON public.users...


In [71]:
queryBase("""SELECT tc.table_catalog,
    tc.table_name, 
    tc.constraint_name,
    pi.indexname
FROM information_schema.table_constraints tc JOIN pg_catalog.pg_indexes pi
    ON tc.constraint_name = pi.indexname
WHERE tc.table_schema = 'public'
    AND tc.table_name = 'users'
    AND tc.constraint_type = 'PRIMARY KEY'""")

Unnamed: 0,table_catalog,table_name,constraint_name,indexname
0,retail_db,users,users_pkey,users_pkey


In [72]:
queryTable("""ALTER TABLE users ADD UNIQUE (user_email_id) """)

In [74]:
queryBase("""SELECT *
                FROM pg_catalog.pg_indexes 
                WHERE schemaname = 'public'
                AND tablename = 'users'""")

Unnamed: 0,schemaname,tablename,indexname,tablespace,indexdef
0,public,users,users_pkey,,CREATE UNIQUE INDEX users_pkey ON public.users...
1,public,users,users_user_email_id_key,,CREATE UNIQUE INDEX users_user_email_id_key ON...


Surrogate Keys are columns that are created by the data engineer when the given table doesn't have any relevant keys 

In [75]:
queryTable("""DROP SEQUENCE IF EXISTS test_seq""")

In [81]:
queryTable("""CREATE SEQUENCE test_seq
                START WITH 101
                MINVALUE 101
                MAXVALUE 1000
                INCREMENT BY 100""")

relation "test_seq" already exists



In [82]:
queryTable("""SELECT currval('test_seq')""")

In [83]:
queryTable("""SELECT nextval('test_seq')""")

In [84]:
queryBase("""SELECT currval('test_seq')""")

OperationalError: (psycopg2.errors.ObjectNotInPrerequisiteState) currval of sequence "test_seq" is not yet defined in this session

[SQL: SELECT currval('test_seq')]
(Background on this error at: https://sqlalche.me/e/14/e3q8)

In [86]:
queryTable("""
    DROP TABLE IF EXISTS users;
    CREATE TABLE users ( 
    user_id SERIAL PRIMARY KEY,
    user_first_name VARCHAR(30) NOT NULL,
    user_last_name VARCHAR(30) NOT NULL,
    user_email_id VARCHAR(50) NOT NULL,
    user_email_validated BOOLEAN,
    user_password VARCHAR(200),
    user_role VARCHAR(1),
    is_active BOOLEAN,
    created_dt DATE DEFAULT CURRENT_DATE
);""")

In [87]:
queryBase("""SELECT *
                FROM pg_catalog.pg_indexes 
                WHERE schemaname = 'public'
                AND tablename = 'users'""")

Unnamed: 0,schemaname,tablename,indexname,tablespace,indexdef
0,public,users,users_pkey,,CREATE UNIQUE INDEX users_pkey ON public.users...


In [89]:
queryBase("""SELECT *
                FROM information_schema.sequences""")

Unnamed: 0,sequence_catalog,sequence_schema,sequence_name,data_type,numeric_precision,numeric_precision_radix,numeric_scale,start_value,minimum_value,maximum_value,increment,cycle_option
0,retail_db,public,users_user_id_seq,bigint,64,2,0,1,1,9223372036854775807,1,NO
1,retail_db,public,user_logins_user_login_id_seq,integer,32,2,0,1,1,2147483647,1,NO
2,retail_db,public,test_seq,bigint,64,2,0,101,101,1000,100,NO
3,retail_db,public,users_user_id_seq1,integer,32,2,0,1,1,2147483647,1,NO
