In [1]:
import configparser
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [3]:
config = configparser.ConfigParser()
config.read('calter.config')

['calter.config']

In [4]:
db = config['POSTGRES']['PG_DB']
user = config['POSTGRES']['PG_UNAME']
passwd = config['POSTGRES']['PG_PASS']
port = config['POSTGRES']['PG_PORT']
host = config['POSTGRES']['PG_HOST']

In [5]:
db

'postgres'

In [6]:
credentials = "postgresql://{}:{}@{}:{}/{}".format(user,passwd,host,port,db)

#using psycopg2 to test connection since there are no tables
import psycopg2
try:
    conn = psycopg2.connect(host=host,dbname=db,user=user,password=passwd,port=port)
except Exception as e:
    print(e)
    
conn.set_session(autocommit=True)

try:
    cur = conn.cursor()
    
except:
    print(e)

In [7]:
credentials

'postgresql://postgres:1234@localhost:5432/postgres'

In [8]:
#Helper functions to work with the database
def schemaGen(dataframe, schemaName):
    localSchema = pd.io.sql.get_schema(dataframe,schemaName)
    localSchema = localSchema.replace('TEXT','VARCHAR(255)').replace('INTEGER','NUMERIC').replace('\n','').replace('"',"")
    return "".join(localSchema)

#Using pandas read_sql for getting schema
def getSchema(tableName, credentials):
    schema = pd.read_sql("""SELECT * FROM information_schema.columns where table_name='{}'""".format(tableName),con=credentials)
    return schema

#Issue is in using pd.read_sql to write data to the database. so using psycopg2
def queryTable(query):
    try:
        schema = cur.execute(query)
        return 
    except Exception as e:
        print(e)
        
#This doesn't return anything

#Using the pd.read_sql for getting data from db
def queryBase(query):
    requiredTable = pd.read_sql(query,con=credentials)
    return requiredTable

#This returns the dataframe

In [9]:
#There are 50 string related functions or routines
queryBase("""SELECT COUNT(1) FROM information_schema.routines 
                WHERE routine_name ~ 'str'""")

Unnamed: 0,count
0,39


In [11]:
queryBase("""SELECT * FROM orders LIMIT 5""") #position and length

Unnamed: 0,order_id,order_date,order_customer_id,order_status
0,1,2013-07-25,11599,CLOSED
1,2,2013-07-25,256,PENDING_PAYMENT
2,3,2013-07-25,12111,COMPLETE
3,4,2013-07-25,8827,CLOSED
4,5,2013-07-25,11318,COMPLETE


In [12]:
queryTable("""DROP TABLE IF EXISTS users""")

In [13]:
queryTable("""CREATE TABLE users(
    user_id SERIAL PRIMARY KEY,
    user_first_name VARCHAR(30),
    user_last_name VARCHAR(30),
    user_email_id VARCHAR(50),
    user_gender VARCHAR(1),
    user_unique_id VARCHAR(15),
    user_phone_no VARCHAR(20),
    user_dob DATE,
    created_ts TIMESTAMP
)""")

In [14]:
queryTable("""insert into users (
    user_first_name, user_last_name, user_email_id, user_gender, 
    user_unique_id, user_phone_no, user_dob, created_ts
) VALUES
    ('Giuseppe', 'Bode', 'gbode0@imgur.com', 'M', '88833-8759', 
     '+86 (764) 443-1967', '1973-05-31', '2018-04-15 12:13:38'),
    ('Lexy', 'Gisbey', 'lgisbey1@mail.ru', 'F', '262501-029', 
     '+86 (751) 160-3742', '2003-05-31', '2020-12-29 06:44:09'),
    ('Karel', 'Claringbold', 'kclaringbold2@yale.edu', 'F', '391-33-2823', 
     '+62 (445) 471-2682', '1985-11-28', '2018-11-19 00:04:08'),
    ('Marv', 'Tanswill', 'mtanswill3@dedecms.com', 'F', '1195413-80', 
     '+62 (497) 736-6802', '1998-05-24', '2018-11-19 16:29:43'),
    ('Gertie', 'Espinoza', 'gespinoza4@nationalgeographic.com', 'M', '471-24-6869', 
     '+249 (687) 506-2960', '1997-10-30', '2020-01-25 21:31:10'),
    ('Saleem', 'Danneil', 'sdanneil5@guardian.co.uk', 'F', '192374-933', 
     '+63 (810) 321-0331', '1992-03-08', '2020-11-07 19:01:14'),
    ('Rickert', 'O''Shiels', 'roshiels6@wikispaces.com', 'M', '749-27-47-52', 
     '+86 (184) 759-3933', '1972-11-01', '2018-03-20 10:53:24'),
    ('Cybil', 'Lissimore', 'clissimore7@pinterest.com', 'M', '461-75-4198', 
     '+54 (613) 939-6976', '1978-03-03', '2019-12-09 14:08:30'),
    ('Melita', 'Rimington', 'mrimington8@mozilla.org', 'F', '892-36-676-2', 
     '+48 (322) 829-8638', '1995-12-15', '2018-04-03 04:21:33'),
    ('Benetta', 'Nana', 'bnana9@google.com', 'M', '197-54-1646', 
     '+420 (934) 611-0020', '1971-12-07', '2018-10-17 21:02:51'),
    ('Gregorius', 'Gullane', 'ggullanea@prnewswire.com', 'F', '232-55-52-58', 
     '+62 (780) 859-1578', '1973-09-18', '2020-01-14 23:38:53'),
    ('Una', 'Glayzer', 'uglayzerb@pinterest.com', 'M', '898-84-336-6', 
     '+380 (840) 437-3981', '1983-05-26', '2019-09-17 03:24:21'),
    ('Jamie', 'Vosper', 'jvosperc@umich.edu', 'M', '247-95-68-44', 
     '+81 (205) 723-1942', '1972-03-18', '2020-07-23 16:39:33'),
    ('Calley', 'Tilson', 'ctilsond@issuu.com', 'F', '415-48-894-3', 
     '+229 (698) 777-4904', '1987-06-12', '2020-06-05 12:10:50'),
    ('Peadar', 'Gregorowicz', 'pgregorowicze@omniture.com', 'M', '403-39-5-869', 
     '+7 (267) 853-3262', '1996-09-21', '2018-05-29 23:51:31'),
    ('Jeanie', 'Webling', 'jweblingf@booking.com', 'F', '399-83-05-03', 
     '+351 (684) 413-0550', '1994-12-27', '2018-02-09 01:31:11'),
    ('Yankee', 'Jelf', 'yjelfg@wufoo.com', 'F', '607-99-0411', 
     '+1 (864) 112-7432', '1988-11-13', '2019-09-16 16:09:12'),
    ('Blair', 'Aumerle', 'baumerleh@toplist.cz', 'F', '430-01-578-5', 
     '+7 (393) 232-1860', '1979-11-09', '2018-10-28 19:25:35'),
    ('Pavlov', 'Steljes', 'psteljesi@macromedia.com', 'F', '571-09-6181', 
     '+598 (877) 881-3236', '1991-06-24', '2020-09-18 05:34:31'),
    ('Darn', 'Hadeke', 'dhadekej@last.fm', 'M', '478-32-02-87', 
     '+370 (347) 110-4270', '1984-09-04', '2018-02-10 12:56:00'),
    ('Wendell', 'Spanton', 'wspantonk@de.vu', 'F', null, 
     '+84 (301) 762-1316', '1973-07-24', '2018-01-30 01:20:11'),
    ('Carlo', 'Yearby', 'cyearbyl@comcast.net', 'F', null, 
     '+55 (288) 623-4067', '1974-11-11', '2018-06-24 03:18:40'),
    ('Sheila', 'Evitts', 'sevittsm@webmd.com', null, '830-40-5287',
     null, '1977-03-01', '2020-07-20 09:59:41'),
    ('Sianna', 'Lowdham', 'slowdhamn@stanford.edu', null, '778-0845', 
     null, '1985-12-23', '2018-06-29 02:42:49'),
    ('Phylys', 'Aslie', 'paslieo@qq.com', 'M', '368-44-4478', 
     '+86 (765) 152-8654', '1984-03-22', '2019-10-01 01:34:28')""")

In [15]:
queryBase("""SELECT * FROM users LIMIT 2""")

Unnamed: 0,user_id,user_first_name,user_last_name,user_email_id,user_gender,user_unique_id,user_phone_no,user_dob,created_ts
0,1,Giuseppe,Bode,gbode0@imgur.com,M,88833-8759,+86 (764) 443-1967,1973-05-31,2018-04-15 12:13:38
1,2,Lexy,Gisbey,lgisbey1@mail.ru,F,262501-029,+86 (751) 160-3742,2003-05-31,2020-12-29 06:44:09


In [19]:
#Exercise 4
queryBase("""SELECT COALESCE(REPLACE(REPLACE(user_gender,'M','Male'),'F','Female'),'Not Specified'), COUNT(1) AS user_count
            FROM users
            GROUP BY user_gender""")

Unnamed: 0,coalesce,user_count
0,Not Specified,2
1,Male,10
2,Female,13


In [42]:
queryBase("""SELECT CASE WHEN LENGTH(COALESCE(REPLACE(user_unique_id,'-',''),''))>9
                THEN 'more than 9'
                ELSE 'less'
                END AS length_try
            FROM users
            LIMIT 5""")

Unnamed: 0,length_try
0,less
1,less
2,less
3,less
4,less


In [46]:
#Idea of nesting each function one inside the other followed by using CASE, WHEN, THEN, ELSE 
#WHEN THEN clause can be used multiple times 
queryBase("""SELECT user_id, user_unique_id, 
                CASE WHEN LENGTH(COALESCE(REPLACE(user_unique_id,'-',''),''))>=9
                THEN SUBSTRING(REPLACE(user_unique_id,'-','') FROM 6)
                WHEN LENGTH(COALESCE(REPLACE(user_unique_id,'-',''),''))=0
                THEN 'Not Specified'
                WHEN LENGTH(COALESCE(REPLACE(user_unique_id,'-',''),''))<9
                THEN 'Invalid_id'
                ELSE ''
                END AS user_unique_id_last4
            FROM users
            LIMIT 5""")

Unnamed: 0,user_id,user_unique_id,user_unique_id_last4
0,1,88833-8759,8759
1,2,262501-029,1029
2,3,391-33-2823,2823
3,4,1195413-80,1380
4,5,471-24-6869,6869


In [72]:
queryBase("""SELECT SPLIT_PART(REPLACE(COALESCE(user_phone_no,''),'+',''),'(',1) AS country_code, COUNT(1) AS user_count 
                FROM users
                WHERE COALESCE(user_phone_no,'') != ''
                GROUP BY SPLIT_PART(REPLACE(COALESCE(user_phone_no,''),'+',''),'(',1)
                ORDER BY SPLIT_PART(REPLACE(COALESCE(user_phone_no,''),'+',''),'(',1)::INT""")

Unnamed: 0,country_code,user_count
0,1,1
1,7,2
2,48,1
3,54,1
4,55,1
5,62,3
6,63,1
7,81,1
8,84,1
9,86,4


In [79]:
queryBase("""SELECT * FROM order_items LIMIT 5""")

Unnamed: 0,order_item_id,order_item_order_id,order_item_product_id,order_item_quantity,order_item_subtotal,order_item_product_price
0,1,1,957,1,299.98,299.98
1,2,2,1073,1,199.99,199.99
2,3,2,502,5,250.0,50.0
3,4,2,403,1,129.99,129.99
4,5,4,897,2,49.98,24.99


In [118]:
queryBase("""SELECT CEIL((49.98875779 * 5)::numeric)""")

Unnamed: 0,ceil
0,250.0


In [101]:
queryBase("""SELECT order_item_quantity, order_item_subtotal,
                order_item_product_price, order_item_quantity * order_item_product_price AS product_value, 
                CASE WHEN order_item_quantity * order_item_product_price = order_item_subtotal
                THEN 'equal'
                ELSE 'not equal'
                END AS order_subtotal
            FROM order_items
            WHERE order_item_quantity * order_item_product_price <> order_item_subtotal
            LIMIT 5""")

Unnamed: 0,order_item_quantity,order_item_subtotal,order_item_product_price,product_value,order_subtotal
0,5,249.9,49.98,249.9,not equal
1,5,249.9,49.98,249.9,not equal
2,5,199.95,39.99,199.95,not equal
3,5,249.9,49.98,249.9,not equal
4,5,249.9,49.98,249.9,not equal


In [135]:
queryBase("""SELECT COUNT(1), CASE WHEN TO_CHAR(order_date,'d') IN ('1','7')
                    THEN 'weekend'
                    ELSE 'weekday'
                    END AS day_type
            FROM orders
            WHERE TO_CHAR(order_date,'yyyy-MM')='2014-01'
            GROUP BY day_type""")

Unnamed: 0,count,day_type
0,4403,weekday
1,1505,weekend
