In [1]:

import sqlalchemy as sqla
from sqlalchemy import text
import os
from dotenv import load_dotenv
import pandas as pd

In [2]:
# connecting to datatbase

load_dotenv() # loading env variables

# Access environment variables
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")

engine = sqla.create_engine(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")

In [3]:
# TC1: How many rows are in the employee table | EASY
with engine.begin() as conn:
    query = text("""SELECT count(*) as total_rows FROM prod.employee""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,total_rows
0,290


In [5]:
# TC2: Get me the jobtitle, maritalstatus, gender, and vacationhours of the top 5 employees with the most vacation hours | EASY
with engine.begin() as conn:
    query = text("""select jobtitle, maritalstatus, gender, vacationhours from prod.employee order by vacationhours DESC limit 5""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,jobtitle,maritalstatus,gender,vacationhours
0,Production Technician - WC50,M,M,99
1,Production Technician - WC10,S,F,99
2,Chief Executive Officer,S,M,99
3,Production Technician - WC10,M,M,98
4,Production Technician - WC50,S,M,98


In [6]:
# TC3: Get me the jobtitle, maritalstatus, gender, and vacationhours of the top 5 employees with the least vacation hours | EASY
with engine.begin() as conn:
    query = text("""select jobtitle, maritalstatus, gender, vacationhours from prod.employee order by vacationhours ASC limit 5""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,jobtitle,maritalstatus,gender,vacationhours
0,Chief Financial Officer,M,F,0
1,Production Technician - WC20,S,M,0
2,Production Technician - WC50,S,M,0
3,Production Technician - WC50,M,M,1
4,Vice President of Engineering,S,F,1


In [7]:
# TC4: Get me the number of employees that are salaried | EASY
with engine.begin() as conn:
    query = text("""select count(*) from prod.employee where salariedflag=true""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count
0,52


In [8]:
# TC5: how many males and females employees are there ? | EASY
with engine.begin() as conn:
    query = text("""select gender, count(*) from prod.employee group by gender""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,gender,count
0,M,206
1,F,84


In [9]:
# TC6: Get all the employee records for the 10 oldest employees and calculate their individual age | EASY
with engine.begin() as conn:
    query = text("""select *,DATE_PART('year', CURRENT_DATE) - DATE_PART('year', birthdate) as AGE from prod.employee order by birthdate ASC limit 10""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,businessentityid,nationalidnumber,loginid,jobtitle,birthdate,maritalstatus,gender,hiredate,salariedflag,vacationhours,sickleavehours,currentflag,rowguid,modifieddate,organizationnode,age
0,274,502097814,adventure-works\stephen0,North American Sales Manager,1951-10-17,M,M,2011-01-04,True,14,27,True,86d5fa9e-4bc0-48c8-91dc-1ec467418d11,2014-06-30,/6/1/,71.0
1,84,947029962,adventure-works\frank3,Production Technician - WC40,1952-03-02,M,M,2010-02-05,False,66,53,True,9af24acc-ea3e-4efe-b5e3-4762c496d57c,2014-06-30,/3/1/7/6/,71.0
2,256,367453993,adventure-works\frank2,Buyer,1952-05-12,M,M,2010-01-23,False,58,49,True,0423d8d0-1b14-4718-8c17-85e2f98d222f,2014-06-30,/4/3/1/6/,71.0
3,5,695256908,adventure-works\gail0,Design Engineer,1952-09-27,M,F,2008-01-06,True,5,22,True,ec84ae09-f9b8-4a15-b4a9-6ccbab919b08,2014-06-30,/1/1/2/,70.0
4,140,339233463,adventure-works\prasanna0,Production Technician - WC20,1953-04-30,M,M,2010-01-22,False,5,22,True,72ed240a-0fd8-4423-8a49-9078ba6bcf6c,2014-06-30,/3/1/14/6/,70.0
5,231,153479919,adventure-works\jo1,Janitor,1954-04-24,M,F,2010-03-07,False,91,65,True,39af5cca-9b52-41e5-b510-72901d2578f6,2014-06-30,/3/4/1/3/,69.0
6,63,414476027,adventure-works\maciej0,Production Technician - WC60,1955-01-30,S,M,2010-01-29,False,30,35,True,3229d93f-6a14-4bac-abc5-a91b9ecfb4d3,2014-06-30,/3/1/5/1/,68.0
7,40,309738752,adventure-works\jolynn0,Production Supervisor - WC60,1956-01-16,S,F,2007-12-26,False,82,61,True,2cc71b96-f421-485e-9832-8723337749bb,2014-06-30,/3/1/2/,67.0
8,49,912265825,adventure-works\barry0,Production Technician - WC10,1956-03-26,S,M,2008-01-07,False,88,64,True,756a60ae-378f-43d8-9f93-1e821d1eaf52,2014-06-30,/3/1/3/2/,67.0
9,53,9659517,adventure-works\diane0,Production Technician - WC10,1956-03-29,M,F,2008-03-28,False,87,63,True,c334b2d2-0c56-4906-9095-f1d07a98cbec,2014-06-30,/3/1/3/6/,67.0


In [10]:
# TC7: How many years has the employee with the job title Chief Executive Officer been working at the company? | MEDIUM
with engine.begin() as conn:
    query = text("""select date_part('year', age(hiredate)) from prod.employee where jobtitle in ('Chief Executive Officer')""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,date_part
0,14.0


In [44]:
# TC8: How many employees that have the term 'Marketing' in the job title are salaried? | MEDIUM
with engine.begin() as conn:
    query = text("""select count(*) from prod.employee where jobtitle like ('%Marketing%') and salariedflag=true""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count
0,1


In [45]:
# TC9: How many employees that have Marketing as a job title are not salaried? | MEDIUM
with engine.begin() as conn:
    query = text("""select count(*) from prod.employee where jobtitle like ('%Marketing%') and salariedflag=false""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count
0,8


In [46]:
# TC10: How many vacation hours, in days, do the top 10 salaried employees with the highest vacation hours have? | MEDIUM
with engine.begin() as conn:
    query = text("""select *, vacationhours/8 as vacationdays from prod.employee where salariedflag=true order by vacationhours desc limit 10""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,businessentityid,nationalidnumber,loginid,jobtitle,birthdate,maritalstatus,gender,hiredate,salariedflag,vacationhours,sickleavehours,currentflag,rowguid,modifieddate,organizationnode,vacationdays
0,1,295847284,adventure-works\ken0,Chief Executive Officer,1969-01-29,S,M,2009-01-14,True,99,69,True,f01251e5-96a3-448d-981e-0f99d789110d,2014-06-30,/,12
1,121,521265716,adventure-works\pilar0,Shipping and Receiving Supervisor,1972-09-09,S,M,2009-01-02,True,93,66,True,01bcce22-d63a-4c3f-9ba8-d3d5a4c3bd52,2014-06-30,/3/1/12/,11
2,228,553069203,adventure-works\christian0,Maintenance Supervisor,1976-01-18,M,M,2008-12-14,True,92,66,True,4611c7c5-90a0-407f-b054-93bd51533609,2014-06-30,/3/4/1/,11
3,227,141165819,adventure-works\gary1,Facilities Manager,1971-02-18,M,M,2009-12-02,True,86,63,True,62f6ec59-5635-44f3-9cc1-2d8eeaf90a05,2014-06-30,/3/4/,10
4,212,885055826,adventure-works\peng0,Quality Assurance Supervisor,1976-03-18,M,M,2008-12-09,True,81,60,True,e249d613-36c9-4544-9b6f-6ce50e5e0da5,2014-06-30,/3/2/1/,10


In [6]:
# TC11: How many vacation hours, in days, do the top 10 not salaried employees with the highest vacation hours individually have? | MEDIUM
with engine.begin() as conn:
    query = text("""select *, vacationhours/8 as vacationdays from prod.employee where salariedflag=false order by vacationhours desc limit 10""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,businessentityid,nationalidnumber,loginid,jobtitle,birthdate,maritalstatus,gender,hiredate,salariedflag,vacationhours,sickleavehours,currentflag,rowguid,modifieddate,organizationnode,vacationdays
0,117,599942664,adventure-works\chad0,Production Technician - WC50,1990-08-04,M,M,2009-02-18,False,99,69,True,83a32d4c-4725-478f-9997-91739bb957ba,2014-06-30,/3/1/11/9/,12
1,88,294148271,adventure-works\betsy0,Production Technician - WC10,1966-12-17,S,F,2009-12-18,False,99,69,True,ebcdba1c-6c1d-4d36-90f7-1893755c85e3,2014-06-30,/3/1/8/1/,12
2,92,826454897,adventure-works\tom0,Production Technician - WC10,1986-10-01,M,M,2010-03-10,False,98,69,True,b3bf7fc5-2014-48ce-b7bb-76124fa8446c,2014-06-30,/3/1/8/5/,12
3,124,420776180,adventure-works\kim0,Stocker,1984-04-30,S,F,2008-12-26,False,98,69,True,6150f197-7923-40d4-84f2-936207d468ab,2014-06-30,/3/1/12/3/,12
4,116,398737566,adventure-works\michael2,Production Technician - WC50,1974-05-03,S,M,2009-01-31,False,98,69,True,c6eb2566-cf28-4602-9a25-5b91c8728838,2014-06-30,/3/1/11/8/,12
5,114,342607223,adventure-works\mindaugas0,Production Technician - WC50,1978-05-07,M,M,2009-01-13,False,97,68,True,1d784088-dacb-4185-9f60-3d2db4925ba5,2014-06-30,/3/1/11/6/,12
6,90,82638150,adventure-works\danielle0,Production Technician - WC10,1986-09-07,S,F,2010-02-20,False,97,68,True,bb886159-1400-4264-b7c9-a3769beb1274,2014-06-30,/3/1/8/3/,12
7,122,586486572,adventure-works\susan0,Stocker,1978-02-17,S,F,2008-12-07,False,97,68,True,923ecfd6-e202-429a-9141-6cb183531439,2014-06-30,/3/1/12/1/,12
8,89,90888098,adventure-works\patrick0,Production Technician - WC10,1986-09-10,S,M,2010-02-01,False,96,68,True,d5fbe9e6-b8c5-484c-b4b8-48059e0ef267,2014-06-30,/3/1/8/2/,12
9,113,54759846,adventure-works\linda0,Production Technician - WC50,1987-07-17,M,F,2008-12-25,False,96,68,True,fb84759c-b687-43b2-8727-125efe0f5e13,2014-06-30,/3/1/11/5/,12


In [7]:
# TC12: How many vacation hours, in days, do the top 10 not salaried employees with the highest vacation hours individually have? Only return the Job title and vacation days | MEDIUM
with engine.begin() as conn:
    query = text("""select jobtitle, vacationhours/8 as vacationdays from prod.employee where salariedflag=false order by vacationhours desc limit 10""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,jobtitle,vacationdays
0,Production Technician - WC50,12
1,Production Technician - WC10,12
2,Production Technician - WC10,12
3,Stocker,12
4,Production Technician - WC50,12
5,Production Technician - WC50,12
6,Production Technician - WC10,12
7,Stocker,12
8,Production Technician - WC10,12
9,Production Technician - WC50,12


In [49]:
# TC13: How many distinct employees does each department have? Return the department group name and employee count for each department group name| HARD
with engine.begin() as conn:
    query = text("""select count(distinct a.businessentityid) ,b.groupname  
                    from prod.employeedepartmenthistory a 
                    join prod.department b
                    on a.departmentid = b.departmentid
                    group by b.groupname """)
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count,groupname
0,35,Executive General and Administration
1,19,Inventory Management
2,185,Manufacturing
3,12,Quality Assurance
4,14,Research and Development
5,28,Sales and Marketing


In [50]:
# TC14: How many active employees does each department have? Return the department group name and employee count for each department group name| HARD
with engine.begin() as conn:
    query = text("""select count(distinct a.businessentityid) ,b.groupname  
                    from prod.employeedepartmenthistory a 
                    join prod.department b
                    on a.departmentid = b.departmentid
                    where a.enddate is null
                    group by b.groupname """)
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count,groupname
0,35,Executive General and Administration
1,18,Inventory Management
2,185,Manufacturing
3,11,Quality Assurance
4,14,Research and Development
5,27,Sales and Marketing


In [51]:
# TC15: How many distinct active employees work in each shift type? Return the count and shift type only| HARD
with engine.begin() as conn:
    query = text("""select count(distinct a.businessentityid) ,b.name as Shift_Name  
                    from prod.employeedepartmenthistory a 
                    join prod.shift b
                    on a.shiftid = b.shiftid
                    where a.enddate is null
                    group by b.name """)
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count,shift_name
0,176,Day
1,62,Evening
2,52,Night


In [52]:
# TC16: Which departments have active employees that work the night shift and how many employees are there?| HARD
with engine.begin() as conn:
    query = text("""select count(distinct a.businessentityid) ,c.groupname
                    from prod.employeedepartmenthistory a 
                    join prod.shift b
                    on a.shiftid = b.shiftid
                    join prod.department c
                    on a.departmentid = c.departmentid
                    where a.enddate is null
                    and b.name in ('Night')
                    group by c.groupname""")
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,count,groupname
0,2,Executive General and Administration
1,1,Inventory Management
2,47,Manufacturing
3,2,Quality Assurance
