### SQL SUBQUERIES

In [1]:
import pandas as pd
import sqlite3

In [2]:
# create a database and create a cursor to execute the sql commands
conn = sqlite3.connect('orgwide.sqlite')
cur = conn.cursor() # object that enables the writing of sql queries

In [3]:
cur.execute(
"""
CREATE TABLE employees(
    employee_id INTEGER PRIMARY KEY AUTOINCREMENT,
    employee_name TEXT,
    department TEXT,
    salary INTEGER);
"""
)

cur.execute(
"""
INSERT INTO employees(employee_name, department, salary) VALUES
('Alice', 'HR', 50000),
('Dennis', 'IT', 70000),
('James', 'Logistics', 40000),
('Mary', 'Customer Service', 45000),
('Susan', 'Finance', 150000),
('Kevin', 'Security', 50000),
('Vallery', 'Procurement', 40000),
('Patrick', 'Data Engineer', 200000),
('Tlotliso', 'Project Manager', 500000),
('Wepukhulu', 'HR', 55000);
"""
)

conn.commit()

In [4]:
# Find all employees in the same department as Alice use a subquery

q = """
SELECT *
FROM employees
WHERE department = (
    SELECT department
    FROM employees
    WHERE employee_name = 'Alice'
                    );
"""
pd.read_sql(q, conn)

Unnamed: 0,employee_id,employee_name,department,salary
0,1,Alice,HR,50000
1,10,Wepukhulu,HR,55000


In [5]:
# Find employees who earn the highest salary in the company, use a subquery

q = """
SELECT employee_name, salary
FROM employees
WHERE salary = (
    SELECT MAX(salary)
    FROM employees
    );
"""

pd.read_sql(q, conn)

Unnamed: 0,employee_name,salary
0,Tlotliso,500000


In [6]:
# Department with the highest avg salary

""" WE ARE GIVEN A TABLE WITH EACH EMPLOYEE-NAME, 
    THEIR SALARY AND THEIR DEPARTMENT
    
    CREATE TABLE WITH DEPARTMENT, AVG_SALARY
    
    FIND THE MAXIMUM AVG_SALARY
    
    USE AN AGGREGATE FUNCTION TO FIND MAXIMUM VALUE """

q = """
SELECT department, AVG(salary) AS avg_salary
FROM employees
GROUP BY department
HAVING avg_salary = (
    SELECT MAX(avg_salary)
    FROM (
        SELECT department, AVG(salary) AS avg_salary
        FROM employees
        GROUP BY department)
    );
"""
pd.read_sql(q, conn)

Unnamed: 0,department,avg_salary
0,Project Manager,500000.0


In [7]:
q = """
SELECT department, AVG(salary) AS avg_salary
FROM employees
GROUP BY department"""

pd.read_sql(q, conn)

Unnamed: 0,department,avg_salary
0,Customer Service,45000.0
1,Data Engineer,200000.0
2,Finance,150000.0
3,HR,52500.0
4,IT,70000.0
5,Logistics,40000.0
6,Procurement,40000.0
7,Project Manager,500000.0
8,Security,50000.0


In [8]:
# Departments with the avg salaries in descending order

q = """
SELECT department, AVG(salary) AS avg_salary
FROM employees
GROUP BY department
ORDER BY avg_salary DESC;
"""
pd.read_sql(q, conn)

Unnamed: 0,department,avg_salary
0,Project Manager,500000.0
1,Data Engineer,200000.0
2,Finance,150000.0
3,IT,70000.0
4,HR,52500.0
5,Security,50000.0
6,Customer Service,45000.0
7,Procurement,40000.0
8,Logistics,40000.0
