In [1]:
import configparser
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
config = configparser.ConfigParser()
config.read('calter.config')

['calter.config']

In [3]:
db = config['POSTGRES']['PG_DB']
retail_db = config['POSTGRES']['PG_ret']
hr_db = config['POSTGRES']['PG_hr']
user = config['POSTGRES']['PG_UNAME']
passwd = config['POSTGRES']['PG_PASS']
port = config['POSTGRES']['PG_PORT']
host = config['POSTGRES']['PG_HOST']

In [4]:
hr_db

'hr_db'

In [6]:
credentials = "postgresql://{}:{}@{}:{}/{}".format(user,passwd,host,port,db)

In [7]:
credentials_ret = "postgresql://{}:{}@{}:{}/{}".format(user,passwd,host,port,retail_db)
credentials_hr = "postgresql://{}:{}@{}:{}/{}".format(user,passwd,host,port,hr_db)

In [59]:
#using psycopg2 to test connection since there are no tables
import psycopg2
try:
    conn = psycopg2.connect(host=host,dbname=db,user=user,password=passwd,port=port)
except Exception as e:
    print(e)
    
conn.set_session(autocommit=True)

try:
    cur = conn.cursor()
    
except:
    print(e)

In [9]:
#using psycopg2 to test connection since there are no tables
import psycopg2
try:
    conn_hr = psycopg2.connect(host=host,dbname=hr_db,user=user,password=passwd,port=port)
except Exception as e:
    print(e)
    
conn_hr.set_session(autocommit=True)

try:
    cur = conn_hr.cursor()
    
except:
    print(e)

In [10]:
credentials_hr

'postgresql://postgres:1234@localhost:5432/hr_db'

In [11]:
#Helper functions to work with the database
def schemaGen(dataframe, schemaName):
    localSchema = pd.io.sql.get_schema(dataframe,schemaName)
    localSchema = localSchema.replace('TEXT','VARCHAR(255)').replace('INTEGER','NUMERIC').replace('\n','').replace('"',"")
    return "".join(localSchema)

#Using pandas read_sql for getting schema
def getSchema(tableName, credentials):
    schema = pd.read_sql("""SELECT * FROM information_schema.columns where table_name='{}'""".format(tableName),con=credentials)
    return schema

#Issue is in using pd.read_sql to write data to the database. so using psycopg2
def queryTable(query):
    try:
        schema = cur.execute(query)
        return 
    except Exception as e:
        print(e)
        
#This doesn't return anything

#Using the pd.read_sql for getting data from db
def queryBase(query):
    requiredTable = pd.read_sql(query,con=credentials)
    return requiredTable

def queryBasehr(query):
    requiredTable = pd.read_sql(query,con=credentials_hr)
    return requiredTable

#This returns the dataframe

In [23]:
queryBasehr("""SELECT * FROM employees LIMIT 5""")

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,,,90
1,101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,,100.0,90
2,102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,,100.0,90
3,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,,102.0,60
4,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,,103.0,60


In [24]:
queryBasehr("""SELECT * FROM departments LIMIT 5""")

Unnamed: 0,department_id,department_name,manager_id,location_id
0,120,Treasury,,1700
1,130,Corporate Tax,,1700
2,140,Control And Credit,,1700
3,150,Shareholder Services,,1700
4,160,Benefits,,1700


In [28]:
queryBasehr("""SELECT e.employee_id, e.department_id, e.salary,
                d.department_name
               FROM employees e 
               LEFT OUTER JOIN departments d
               ON e.department_id = d.department_id
               LIMIT 2""")

Unnamed: 0,employee_id,department_id,salary,department_name
0,100,90,24000.0,Executive
1,101,90,17000.0,Executive


In [53]:
queryBasehr("""SELECT e.department_id,
                ae.avg_dept_salary,
                e.salary, e.employee_id, d.department_name
               FROM employees e 
               LEFT OUTER JOIN (
                   SELECT department_id, 
                   ROUND(AVG(salary)::numeric,2) AS avg_dept_salary
                   FROM employees
                   GROUP BY department_id
                   ) ae
               ON e.department_id = ae.department_id
               JOIN departments d
               ON ae.department_id = d.department_id
               WHERE e.salary > avg_dept_salary
               ORDER BY d.department_id,e.salary DESC""")

Unnamed: 0,department_id,avg_dept_salary,salary,employee_id,department_name
0,20,9500.0,13000.0,201,Marketing
1,30,4150.0,11000.0,114,Purchasing
2,50,3475.56,8200.0,121,Shipping
3,50,3475.56,8000.0,120,Shipping
4,50,3475.56,7900.0,122,Shipping
5,50,3475.56,6500.0,123,Shipping
6,50,3475.56,5800.0,124,Shipping
7,50,3475.56,4200.0,184,Shipping
8,50,3475.56,4100.0,185,Shipping
9,50,3475.56,4000.0,192,Shipping


In [59]:
#Using windowing function

queryBasehr("""SELECT e.employee_id, e.department_id, e.salary,
    AVG(e.salary) OVER (
        PARTITION BY e.department_id
    ) AS department_salary_expense
FROM employees e 
ORDER BY e.department_id, e.salary DESC""")

Unnamed: 0,employee_id,department_id,salary,department_salary_expense
0,200,10.0,4400.0,4400.0
1,201,20.0,13000.0,9500.0
2,202,20.0,6000.0,9500.0
3,114,30.0,11000.0,4150.0
4,115,30.0,3100.0,4150.0
...,...,...,...,...
102,111,100.0,7700.0,8600.0
103,113,100.0,6900.0,8600.0
104,205,110.0,12000.0,10150.0
105,206,110.0,8300.0,10150.0


In [63]:
queryBasehr("""SELECT t.employee_id, t.department_id, t.salary,
            d.department_name,
            ROUND(SUM(t.salary) OVER(
            PARTITION BY t.department_id
            ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),2)
            AS cumulative_dept_salary
            FROM employees t
            LEFT OUTER JOIN departments d 
            ON t.department_id = d.department_id
            WHERE d.department_name IN ('Finance','IT')""")

Unnamed: 0,employee_id,department_id,salary,department_name,cumulative_dept_salary
0,103,60,9000.0,IT,9000.0
1,104,60,6000.0,IT,15000.0
2,105,60,4800.0,IT,19800.0
3,106,60,4800.0,IT,24600.0
4,107,60,4200.0,IT,28800.0
5,108,100,12000.0,Finance,12000.0
6,109,100,9000.0,Finance,21000.0
7,110,100,8200.0,Finance,29200.0
8,111,100,7700.0,Finance,36900.0
9,112,100,7800.0,Finance,44700.0


In [29]:
queryBase("""SELECT t.*,
    round(sum(t.revenue) OVER (
        ORDER BY order_date
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ), 2) AS moving_3day_revenue
FROM daily_revenue t
ORDER BY order_date
LIMIT 20""")

Unnamed: 0,order_date,revenue,moving_3day_revenue
0,2013-07-25,31547.23,31547.23
1,2013-07-26,54713.23,86260.46
2,2013-07-27,48411.48,134671.94
3,2013-07-28,35672.03,138796.74
4,2013-07-29,54579.7,138663.21
5,2013-07-30,49329.29,139581.02
6,2013-07-31,59212.49,163121.48
7,2013-08-01,49160.08,157701.86
8,2013-08-02,50688.58,159061.15
9,2013-08-03,43416.74,143265.4


In [16]:
queryBasehr("""SELECT e.employee_id, e.department_id,
                d.department_name, e.salary,
                dense_rank() OVER(
                PARTITION BY e.department_id
                ORDER BY salary DESC
                ) AS d_rank
                FROM employees e 
                JOIN departments d 
                ON e.department_id = d.department_id
                LIMIT 5""")

Unnamed: 0,employee_id,department_id,department_name,salary,d_rank
0,200,10,Administration,4400.0,1
1,201,20,Marketing,13000.0,1
2,202,20,Marketing,6000.0,2
3,114,30,Purchasing,11000.0,1
4,115,30,Purchasing,3100.0,2


In [19]:
queryBase("""SELECT * FROM order_items LIMIT 5""")

Unnamed: 0,order_item_id,order_item_order_id,order_item_product_id,order_item_quantity,order_item_subtotal,order_item_product_price
0,1,1,957,1,299.98,299.98
1,2,2,1073,1,199.99,199.99
2,3,2,502,5,250.0,50.0
3,4,2,403,1,129.99,129.99
4,5,4,897,2,49.98,24.99


In [23]:
queryBase("""SELECT o.order_status, pdt.product_name,
            oi.order_item_subtotal 
            FROM orders o
            JOIN order_items oi
            ON o.order_id = oi.order_item_order_id
            JOIN products pdt
            ON oi.order_item_product_id = pdt.product_id
            LIMIT 5""")

Unnamed: 0,order_status,product_name,order_item_subtotal
0,CLOSED,Diamondback Women's Serene Classic Comfort Bi,299.98
1,PENDING_PAYMENT,Pelican Sunstream 100 Kayak,199.99
2,PENDING_PAYMENT,Nike Men's Dri-FIT Victory Golf Polo,250.0
3,PENDING_PAYMENT,Nike Men's CJ Elite 2 TD Football Cleat,129.99
4,CLOSED,Team Golf New England Patriots Putter Grip,49.98


In [50]:
queryBase("""SELECT o.order_status,pdt.product_id, pdt.product_name,
            o.order_date,
            SUM(oi.order_item_subtotal) OVER(
            PARTITION BY oi.order_item_product_id
            ) AS revenue,
            rank() OVER(
            PARTITION BY oi.order_item_product_id
            ) AS product_rank
            FROM orders o
            JOIN order_items oi
            ON o.order_id = oi.order_item_order_id
            JOIN products pdt
            ON oi.order_item_product_id = pdt.product_id
            WHERE o.order_status IN ('CLOSED','COMPLETE')
            LIMIT 5""")

Unnamed: 0,order_status,product_id,product_name,order_date,revenue,product_rank
0,CLOSED,19,Nike Men's Fingertrap Max Training Shoe,2013-08-04,2999.76,1
1,CLOSED,19,Nike Men's Fingertrap Max Training Shoe,2013-08-30,2999.76,1
2,COMPLETE,19,Nike Men's Fingertrap Max Training Shoe,2013-09-22,2999.76,1
3,COMPLETE,19,Nike Men's Fingertrap Max Training Shoe,2013-10-13,2999.76,1
4,COMPLETE,19,Nike Men's Fingertrap Max Training Shoe,2013-10-14,2999.76,1


In [57]:
queryBase("""SELECT sup.product_id, sup.product_name,
            SUM(sup.revenue) AS revenue
            FROM (
            SELECT o.order_status,pdt.product_id, pdt.product_name,
            o.order_date,
            SUM(oi.order_item_subtotal) OVER(
                PARTITION BY oi.order_item_product_id
            ) AS revenue
            FROM orders o
                JOIN order_items oi
                ON o.order_id = oi.order_item_order_id
                JOIN products pdt
                ON oi.order_item_product_id = pdt.product_id
            WHERE o.order_status IN ('CLOSED','COMPLETE') AND
            TO_CHAR(o.order_date,'yyyy-MM')='2014-01'
            )sup
            GROUP BY sup.product_name, sup.product_id
            ORDER BY revenue DESC
            LIMIT 15""")

Unnamed: 0,product_id,product_name,revenue
0,1004,Field & Stream Sportsman 16 Gun Fire Safe,157243700.0
1,365,Perfect Fitness Perfect Rip Deck,132540400.0
2,403,Nike Men's CJ Elite 2 TD Football Cleat,89119060.0
3,502,Nike Men's Dri-FIT Victory Golf Polo,88740400.0
4,957,Diamondback Women's Serene Classic Comfort Bi,73205920.0
5,1014,O'Brien Men's Neoprene Life Vest,67909780.0
6,1073,Pelican Sunstream 100 Kayak,64068000.0
7,191,Nike Men's Free 5.0+ Running Shoe,59661330.0
8,627,Under Armour Girls' Toddler Spine Surge Runni,13061730.0
9,565,adidas Youth Germany Black/Red Away Match Soc,61600.0


In [63]:
queryTable("""DROP TABLE IF EXISTS jansales""")

In [64]:
queryTable("""CREATE TABLE janSales
                AS
                SELECT o.order_status,pdt.product_id, 
                    pdt.product_name,pdt.product_category_id,
                    o.order_date,
                    SUM(oi.order_item_subtotal) OVER(
                        PARTITION BY oi.order_item_product_id
                    ) AS revenue
                    FROM orders o
                        JOIN order_items oi
                        ON o.order_id = oi.order_item_order_id
                        JOIN products pdt
                        ON oi.order_item_product_id = pdt.product_id
                    WHERE o.order_status IN ('CLOSED','COMPLETE') AND
                    TO_CHAR(o.order_date,'yyyy-MM')='2014-01'""")

In [65]:
queryBase("""SELECT * FROM jansales LIMIT 5""")

Unnamed: 0,order_status,product_id,product_name,product_category_id,order_date,revenue
0,COMPLETE,19,Nike Men's Fingertrap Max Training Shoe,2,2014-01-16,374.97
1,COMPLETE,19,Nike Men's Fingertrap Max Training Shoe,2,2014-01-22,374.97
2,COMPLETE,19,Nike Men's Fingertrap Max Training Shoe,2,2014-01-22,374.97
3,COMPLETE,24,Elevation Training Mask 2.0,2,2014-01-27,719.91
4,COMPLETE,24,Elevation Training Mask 2.0,2,2014-01-11,719.91


In [66]:
queryBase("""SELECT categ.category_id, categ.category_name, 
            j.product_id, j.product_name, j.revenue
            FROM jansales j
            JOIN categories categ
            ON j.product_category_id = categ.category_id
            LIMIT 5;""")

Unnamed: 0,category_id,category_name,product_id,product_name,revenue
0,2,Soccer,19,Nike Men's Fingertrap Max Training Shoe,374.97
1,2,Soccer,19,Nike Men's Fingertrap Max Training Shoe,374.97
2,2,Soccer,19,Nike Men's Fingertrap Max Training Shoe,374.97
3,2,Soccer,24,Elevation Training Mask 2.0,719.91
4,2,Soccer,24,Elevation Training Mask 2.0,719.91


In [69]:
queryBase("""SELECT categ.category_id, categ.category_name, 
            j.product_id, j.product_name, SUM(j.revenue) as revenue
            FROM jansales j
            JOIN categories categ
            ON j.product_category_id = categ.category_id
            GROUP BY categ.category_id, categ.category_name,
            j.product_id, j.product_name
            ORDER BY category_id, revenue DESC
            LIMIT 5;""")

Unnamed: 0,category_id,category_name,product_id,product_name,revenue
0,2,Soccer,24,Elevation Training Mask 2.0,2879.64
1,2,Soccer,19,Nike Men's Fingertrap Max Training Shoe,1124.91
2,3,Baseball & Softball,44,adidas Men's F10 Messi TRX FG Soccer Cleat,25915.68
3,3,Baseball & Softball,37,adidas Kids' F5 Messi FG Soccer Cleat,4408.74
4,3,Baseball & Softball,35,adidas Brazuca 2014 Official Match Ball,639.96


In [75]:
queryBase("""SELECT category_id,category_name, 
            product_id,product_name,revenue,
            rank() OVER(
                PARTITION BY category_id
                ORDER BY revenue
            ) AS product_rank
            FROM(
                SELECT categ.category_id, categ.category_name, 
                j.product_id, j.product_name, SUM(j.revenue) as revenue
                FROM jansales j
                JOIN categories categ
                ON j.product_category_id = categ.category_id
                GROUP BY categ.category_id, categ.category_name,
                j.product_id, j.product_name
                ORDER BY category_id, revenue DESC
            ) s""")

Unnamed: 0,category_id,category_name,product_id,product_name,revenue,product_rank
0,2,Soccer,19,Nike Men's Fingertrap Max Training Shoe,1.124910e+03,1
1,2,Soccer,24,Elevation Training Mask 2.0,2.879640e+03,2
2,3,Baseball & Softball,35,adidas Brazuca 2014 Official Match Ball,6.399600e+02,1
3,3,Baseball & Softball,37,adidas Kids' F5 Messi FG Soccer Cleat,4.408740e+03,2
4,3,Baseball & Softball,44,adidas Men's F10 Messi TRX FG Soccer Cleat,2.591568e+04,3
...,...,...,...,...,...,...
91,44,Hunting & Shooting,977,ENO Atlas Hammock Straps,1.481506e+04,3
92,45,Fishing,1004,Field & Stream Sportsman 16 Gun Fire Safe,1.572437e+08,1
93,46,Indoor/Outdoor Games,1014,O'Brien Men's Neoprene Life Vest,6.790978e+07,1
94,48,Water Sports,1059,Pelican Maverick 100X Kayak,3.499900e+02,1


In [None]:
(
)