# 2. SQL Common Table Expresions

## Setup
- `cd lessons/sql_advanced && docker-compose up -d`


In [1]:
# Setup

from sqlalchemy import create_engine
import pandas as pd

# db connection
user = 'employees_user'
password = 'employees_password'
host = 'localhost'
port = '3306'
database = 'employees'

# create engine
engine = create_engine(f'mysql+pymysql://{user}:{password}@{host}:{port}/{database}')

columns_query = f'''
SELECT column_name, table_name, data_type
FROM information_schema.columns 
WHERE table_schema = "{database}" 
ORDER BY table_name, ordinal_position
'''
columns_df = pd.read_sql(columns_query, engine)
print(columns_df)

   COLUMN_NAME            TABLE_NAME DATA_TYPE
0       emp_no      current_dept_emp       int
1      dept_no      current_dept_emp      char
2    from_date      current_dept_emp      date
3      to_date      current_dept_emp      date
4      dept_no           departments      char
5    dept_name           departments   varchar
6       emp_no              dept_emp       int
7      dept_no              dept_emp      char
8    from_date              dept_emp      date
9      to_date              dept_emp      date
10      emp_no  dept_emp_latest_date       int
11   from_date  dept_emp_latest_date      date
12     to_date  dept_emp_latest_date      date
13      emp_no          dept_manager       int
14     dept_no          dept_manager      char
15   from_date          dept_manager      date
16     to_date          dept_manager      date
17      emp_no             employees       int
18  birth_date             employees      date
19  first_name             employees   varchar
20   last_nam


# CTE Task

## Exercise 1:

Use a CTE (a Common Table Expression) and a SUM() function in the SELECT statement in a query to find out how many male employees have never signed a contract with a salary value higher than or equal to the all-time company salary average.

````sql
WITH cte AS (
	SELECT AVG(salary) AS avg_salary FROM salaries
)
SELECT
	SUM(CASE WHEN s.salary >= c.avg_salary THEN 0 ELSE 1 END) AS m_salaries_below_avg
FROM
	salaries s
	JOIN
		employees e ON s.emp_no = e.emp_no AND e.gender = 'M'
	CROSS JOIN
		cte c;
````


## Exercise 2:

Use a CTE (a Common Table Expression) and (at least one) COUNT() function in the SELECT statement of a query to find out how many male employees have never signed a contract with a salary value higher than or equal to the all-time company salary average.

````sql
WITH cte AS (
	SELECT AVG(salary) AS avg_salary FROM salaries
)
SELECT
	COUNT(CASE WHEN s.salary >= c.avg_salary THEN NULL ELSE s.salary END) AS m_salaries_below_avg
FROM
	salaries s
	JOIN
		employees e ON s.emp_no = e.emp_no AND e.gender = 'M'
	CROSS JOIN
		cte c;
````

## Exercise 3:

Use MySQL joins (and don’t use a Common Table Expression) in a query to find out how many male employees have never signed a contract with a salary value higher than or equal to the all-time company salary average (i.e. to obtain the same result as in the previous exercise).

````sql
SELECT
	COUNT(CASE WHEN s.salary >= s1.avg_salary THEN NULL ELSE s.salary END) AS m_salaries_below_avg
FROM
	salaries s
	JOIN
		employees e ON s.emp_no = e.emp_no AND e.gender = 'M'
	JOIN (
		SELECT AVG(salary) AS avg_salary FROM salaries
	) s1;
````

## Exercise 4:

Use a cross join in a query to find out how many male employees have never signed a contract with a salary value higher than or equal to the all-time company salary average (i.e. to obtain the same result as in the previous exercise).

````sql
SELECT
	COUNT(CASE WHEN s.salary >= s1.avg_salary THEN NULL ELSE s.salary END) AS m_salaries_below_avg
	*
FROM
	salaries s
	JOIN
		employees e ON s.emp_no = e.emp_no AND e.gender = 'M'
	CROSS JOIN (
		SELECT AVG(salary) AS avg_salary FROM salaries
	) s1;
````


# Multiple CTEs

## Exercise 1:

Calculate the number of females having signed a contract with a salary higher than the all-time company salary average, display the total number of female employees, and calculate their percentage. Therefore, use ultiple common table expressions and a cross join in a query.


````sql
WITH cte_avg_salary AS (
	SELECT AVG(salary) AS avg_salary FROM salaries
),
cte_f_highest_salary AS (
	SELECT
		s.emp_no, MAX(s.salary) AS f_highest_salary
	FROM
		salaries s
		JOIN employees e ON e.emp_no = s.emp_no  AND e.gender = 'F'
	GROUP BY s.emp_no
)
SELECT
	SUM(CASE WHEN c2.f_highest_salary < c1.avg_salary THEN 1 ELSE 0 END) AS f_highest_salaries_above_avg,
	COUNT(e.emp_no) AS total_no_female_contracts,
	CONCAT(ROUND((SUM(CASE WHEN c2.f_highest_salary > c1.avg_salary THEN 1 ELSE 0 END) / COUNT(e.emp_no))*100, 2), '%') AS '% percentage'
FROM
	employees e
	JOIN cte_f_highest_salary c2 ON c2.emp_no = e.emp_no
	CROSS JOIN cte_avg_salary c1;

````


## Exercise 2:

Calculate the highest salary value of employees hired after January 1, 2000, and display the employee number and the highest salary value. Therefore, use multiple common table expressions in a query. Reference the first common table expression in the second one.

```sql
WITH cte_emp_hired_from_jan_2000 AS (
	SELECT * FROM employees WHERE hire_date > '2000-01-01'
),
cte_highest_contract_salary_values AS (
	SELECT 
		e.emp_no, 
		MAX(s.salary) 
	FROM salaries s 
		JOIN cte_emp_hired_from_jan_2000 e 
			ON e.emp_no = s.emp_no 
	GROUP BY e.emp_no
)
SELECT * FROM cte_highest_contract_salary_values;
```