In [1]:
import configparser
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [7]:
config = configparser.ConfigParser()
config.read('calter.config')

['calter.config']

In [8]:
db = config['POSTGRES']['PG_DB']
retail_db = config['POSTGRES']['PG_ret']
hr_db = config['POSTGRES']['PG_hr']
user = config['POSTGRES']['PG_UNAME']
passwd = config['POSTGRES']['PG_PASS']
port = config['POSTGRES']['PG_PORT']
host = config['POSTGRES']['PG_HOST']

In [10]:
hr_db

'hr_db'

In [11]:
    credentials = "postgresql://{}:{}@{}:{}/{}".format(user,passwd,host,port,db)

In [12]:
credentials_ret = "postgresql://{}:{}@{}:{}/{}".format(user,passwd,host,port,retail_db)
credentials_hr = "postgresql://{}:{}@{}:{}/{}".format(user,passwd,host,port,hr_db)

In [13]:
#using psycopg2 to test connection since there are no tables
import psycopg2
try:
    conn = psycopg2.connect(host=host,dbname=db,user=user,password=passwd,port=port)
except Exception as e:
    print(e)
    
conn.set_session(autocommit=True)

try:
    cur = conn.cursor()
    
except:
    print(e)

In [15]:
#using psycopg2 to test connection since there are no tables
import psycopg2
try:
    conn_hr = psycopg2.connect(host=host,dbname=hr_db,user=user,password=passwd,port=port)
except Exception as e:
    print(e)
    
conn_hr.set_session(autocommit=True)

try:
    cur = conn_hr.cursor()
    
except:
    print(e)

In [17]:
credentials_hr

'postgresql://postgres:1234@localhost:5432/hr_db'

In [20]:
#Helper functions to work with the database
def schemaGen(dataframe, schemaName):
    localSchema = pd.io.sql.get_schema(dataframe,schemaName)
    localSchema = localSchema.replace('TEXT','VARCHAR(255)').replace('INTEGER','NUMERIC').replace('\n','').replace('"',"")
    return "".join(localSchema)

#Using pandas read_sql for getting schema
def getSchema(tableName, credentials):
    schema = pd.read_sql("""SELECT * FROM information_schema.columns where table_name='{}'""".format(tableName),con=credentials)
    return schema

#Issue is in using pd.read_sql to write data to the database. so using psycopg2
def queryTable(query):
    try:
        schema = cur.execute(query)
        return 
    except Exception as e:
        print(e)
        
#This doesn't return anything

#Using the pd.read_sql for getting data from db
def queryBase(query):
    requiredTable = pd.read_sql(query,con=credentials)
    return requiredTable

def queryBasehr(query):
    requiredTable = pd.read_sql(query,con=credentials_hr)
    return requiredTable

#This returns the dataframe

In [23]:
queryBasehr("""SELECT * FROM employees LIMIT 5""")

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,,,90
1,101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,,100.0,90
2,102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,,100.0,90
3,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,,102.0,60
4,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,,103.0,60


In [24]:
queryBasehr("""SELECT * FROM departments LIMIT 5""")

Unnamed: 0,department_id,department_name,manager_id,location_id
0,120,Treasury,,1700
1,130,Corporate Tax,,1700
2,140,Control And Credit,,1700
3,150,Shareholder Services,,1700
4,160,Benefits,,1700


In [28]:
queryBasehr("""SELECT e.employee_id, e.department_id, e.salary,
                d.department_name
               FROM employees e 
               LEFT OUTER JOIN departments d
               ON e.department_id = d.department_id
               LIMIT 2""")

Unnamed: 0,employee_id,department_id,salary,department_name
0,100,90,24000.0,Executive
1,101,90,17000.0,Executive


In [53]:
queryBasehr("""SELECT e.department_id,
                ae.avg_dept_salary,
                e.salary, e.employee_id, d.department_name
               FROM employees e 
               LEFT OUTER JOIN (
                   SELECT department_id, 
                   ROUND(AVG(salary)::numeric,2) AS avg_dept_salary
                   FROM employees
                   GROUP BY department_id
                   ) ae
               ON e.department_id = ae.department_id
               JOIN departments d
               ON ae.department_id = d.department_id
               WHERE e.salary > avg_dept_salary
               ORDER BY d.department_id,e.salary DESC""")

Unnamed: 0,department_id,avg_dept_salary,salary,employee_id,department_name
0,20,9500.0,13000.0,201,Marketing
1,30,4150.0,11000.0,114,Purchasing
2,50,3475.56,8200.0,121,Shipping
3,50,3475.56,8000.0,120,Shipping
4,50,3475.56,7900.0,122,Shipping
5,50,3475.56,6500.0,123,Shipping
6,50,3475.56,5800.0,124,Shipping
7,50,3475.56,4200.0,184,Shipping
8,50,3475.56,4100.0,185,Shipping
9,50,3475.56,4000.0,192,Shipping


In [59]:
#Using windowing function

queryBasehr("""SELECT e.employee_id, e.department_id, e.salary,
    AVG(e.salary) OVER (
        PARTITION BY e.department_id
    ) AS department_salary_expense
FROM employees e 
ORDER BY e.department_id, e.salary DESC""")

Unnamed: 0,employee_id,department_id,salary,department_salary_expense
0,200,10.0,4400.0,4400.0
1,201,20.0,13000.0,9500.0
2,202,20.0,6000.0,9500.0
3,114,30.0,11000.0,4150.0
4,115,30.0,3100.0,4150.0
...,...,...,...,...
102,111,100.0,7700.0,8600.0
103,113,100.0,6900.0,8600.0
104,205,110.0,12000.0,10150.0
105,206,110.0,8300.0,10150.0


In [63]:
queryBasehr("""SELECT t.employee_id, t.department_id, t.salary,
            d.department_name,
            ROUND(SUM(t.salary) OVER(
            PARTITION BY t.department_id
            ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),2)
            AS cumulative_dept_salary
            FROM employees t
            LEFT OUTER JOIN departments d 
            ON t.department_id = d.department_id
            WHERE d.department_name IN ('Finance','IT')""")

Unnamed: 0,employee_id,department_id,salary,department_name,cumulative_dept_salary
0,103,60,9000.0,IT,9000.0
1,104,60,6000.0,IT,15000.0
2,105,60,4800.0,IT,19800.0
3,106,60,4800.0,IT,24600.0
4,107,60,4200.0,IT,28800.0
5,108,100,12000.0,Finance,12000.0
6,109,100,9000.0,Finance,21000.0
7,110,100,8200.0,Finance,29200.0
8,111,100,7700.0,Finance,36900.0
9,112,100,7800.0,Finance,44700.0


In [29]:
queryBase("""SELECT t.*,
    round(sum(t.revenue) OVER (
        ORDER BY order_date
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ), 2) AS moving_3day_revenue
FROM daily_revenue t
ORDER BY order_date
LIMIT 20""")

Unnamed: 0,order_date,revenue,moving_3day_revenue
0,2013-07-25,31547.23,31547.23
1,2013-07-26,54713.23,86260.46
2,2013-07-27,48411.48,134671.94
3,2013-07-28,35672.03,138796.74
4,2013-07-29,54579.7,138663.21
5,2013-07-30,49329.29,139581.02
6,2013-07-31,59212.49,163121.48
7,2013-08-01,49160.08,157701.86
8,2013-08-02,50688.58,159061.15
9,2013-08-03,43416.74,143265.4


In [32]:
queryBase("""SELECT t.*,
    round(sum(t.revenue) OVER (
        ORDER BY order_date
        ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
    ), 2) AS moving_3day_revenue
FROM daily_revenue t
ORDER BY order_date
LIMIT 20""")

Unnamed: 0,order_date,revenue,moving_3day_revenue
0,2013-07-25,31547.23,134671.94
1,2013-07-26,54713.23,170343.97
2,2013-07-27,48411.48,224923.67
3,2013-07-28,35672.03,242705.73
4,2013-07-29,54579.7,247204.99
5,2013-07-30,49329.29,247953.59
6,2013-07-31,59212.49,262970.14
7,2013-08-01,49160.08,251807.18
8,2013-08-02,50688.58,237570.9
9,2013-08-03,43416.74,212383.68


In [33]:
queryBase("""SELECT t.*,
    rank() OVER (
        PARTITION BY order_date
        ORDER BY revenue DESC
    ) AS rnk
FROM daily_product_revenue t
ORDER BY order_date, revenue DESC
LIMIT 30""")

Unnamed: 0,order_date,order_item_product_id,revenue,rnk
0,2013-07-25,1004,5599.72,1
1,2013-07-25,191,5099.49,2
2,2013-07-25,957,4499.7,3
3,2013-07-25,365,3359.44,4
4,2013-07-25,1073,2999.85,5
5,2013-07-25,1014,2798.88,6
6,2013-07-25,403,1949.85,7
7,2013-07-25,502,1650.0,8
8,2013-07-25,627,1079.73,9
9,2013-07-25,226,599.99,10


In [34]:
queryBase("""SELECT
    t.*,
    rank() OVER (
        PARTITION BY order_date
        ORDER BY revenue DESC
    ) rnk,
    dense_rank() OVER (
        PARTITION BY order_date
        ORDER BY revenue DESC
    ) drnk,
    row_number() OVER (
        PARTITION BY order_date
        ORDER BY revenue DESC
    ) rn
FROM daily_product_revenue AS t
ORDER BY order_date, revenue DESC
LIMIT 30""")

Unnamed: 0,order_date,order_item_product_id,revenue,rnk,drnk,rn
0,2013-07-25,1004,5599.72,1,1,1
1,2013-07-25,191,5099.49,2,2,2
2,2013-07-25,957,4499.7,3,3,3
3,2013-07-25,365,3359.44,4,4,4
4,2013-07-25,1073,2999.85,5,5,5
5,2013-07-25,1014,2798.88,6,6,6
6,2013-07-25,403,1949.85,7,7,7
7,2013-07-25,502,1650.0,8,8,8
8,2013-07-25,627,1079.73,9,9,9
9,2013-07-25,226,599.99,10,10,10


In [35]:
queryBase("""SELECT * FROM (
    SELECT nq.*,
        dense_rank() OVER (
            PARTITION BY order_date
            ORDER BY revenue DESC
        ) AS drnk
    FROM (
        SELECT o.order_date,
            oi.order_item_product_id,
            round(sum(oi.order_item_subtotal)::numeric, 2) AS revenue
        FROM orders o 
            JOIN order_items oi
                ON o.order_id = oi.order_item_order_id
        WHERE o.order_status IN ('COMPLETE', 'CLOSED')
        GROUP BY o.order_date, oi.order_item_product_id
    ) nq
) nq1
WHERE drnk <= 5
ORDER BY order_date, revenue DESC
cv
LIMIT 20""")

Unnamed: 0,order_date,order_item_product_id,revenue,drnk
0,2013-07-25,1004,5599.72,1
1,2013-07-25,191,5099.49,2
2,2013-07-25,957,4499.7,3
3,2013-07-25,365,3359.44,4
4,2013-07-25,1073,2999.85,5
5,2013-07-26,1004,10799.46,1
6,2013-07-26,365,7978.67,2
7,2013-07-26,957,6899.54,3
8,2013-07-26,191,6799.32,4
9,2013-07-26,1014,4798.08,5


In [36]:
queryBase("""
SELECT * FROM (SELECT dpr.*,
  dense_rank() OVER (
    PARTITION BY order_date
    ORDER BY revenue DESC
  ) AS drnk
FROM daily_product_revenue AS dpr) q
WHERE drnk <= 5
ORDER BY order_date, revenue DESC
LIMIT 20""")

Unnamed: 0,order_date,order_item_product_id,revenue,drnk
0,2013-07-25,1004,5599.72,1
1,2013-07-25,191,5099.49,2
2,2013-07-25,957,4499.7,3
3,2013-07-25,365,3359.44,4
4,2013-07-25,1073,2999.85,5
5,2013-07-26,1004,10799.46,1
6,2013-07-26,365,7978.67,2
7,2013-07-26,957,6899.54,3
8,2013-07-26,191,6799.32,4
9,2013-07-26,1014,4798.08,5
