# CTE

- A Common Table Expression (CTE) is a named temporary result set in SQL that you can reference within a SELECT, INSERT, UPDATE, or DELETE statement. 
- CTEs are defined using the WITH clause and provide a way to create more readable and modular queries, especially for complex queries that involve multiple subqueries.

WITH cte_name (column1, column2, ...) AS (
    SELECT column1, column2, ...
    FROM your_table
    WHERE conditions
)

In [1]:
import pandas as pd
import numpy as np
import sqlite3

In [2]:
cnn = sqlite3.connect('data4.db')
cur = cnn.cursor()

In [3]:
cur.execute('''
CREATE TABLE Department (
    DepartmentID INT PRIMARY KEY,
    DepartmentName VARCHAR(50)
);
''')

<sqlite3.Cursor at 0x222b8a9a340>

In [4]:
cur.execute('''
CREATE TABLE Employee (
    EmployeeID INT PRIMARY KEY,
    Name VARCHAR(100),
    Job VARCHAR(50),
    Salary INT,
    DepartmentID INT,
    FOREIGN KEY (DepartmentID) REFERENCES Department(DepartmentID)
);
''')

<sqlite3.Cursor at 0x222b8a9a340>

In [5]:
cur.execute('''
INSERT INTO Department (DepartmentID, DepartmentName) VALUES
(1, 'Sales'),
(2, 'Marketing'),
(3, 'Finance'),
(4, 'Human Resources');
''')

<sqlite3.Cursor at 0x222b8a9a340>

In [6]:
cur.execute('''
INSERT INTO Employee (EmployeeID, Name, Job, Salary, DepartmentID) VALUES
(1, 'John Doe', 'Sales Representative', 50000, 1),
(2, 'Jane Smith', 'Marketing Specialist', 60000, 2),
(3, 'Mike Johnson', 'Financial Analyst', 70000, 3),
(4, 'Emily Davis', 'HR Manager', 55000, 4),
(5, 'Robert Brown', 'Sales Representative', 75000, 1),
(6, 'Alice White', 'Marketing Specialist', 80000, 2),
(7, 'Daniel Lee', 'Senior Financial Analyst', 90000, 3),
(8, 'Olivia Miller', 'HR Manager', 70000, 4),
(9, 'William Turner', 'Sales Representative', 55000, 1),
(10, 'Sophia Martin', 'Financial Analyst', 60000, 2);
''')

<sqlite3.Cursor at 0x222b8a9a340>

In [7]:
cur.execute('''
SELECT * FROM Department;
''')

result = cur.fetchall()
department_df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(department_df)

cur.execute('''
SELECT * FROM Employee;
''')

result = cur.fetchall()
employee_df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(employee_df)

Unnamed: 0,DepartmentID,DepartmentName
0,1,Sales
1,2,Marketing
2,3,Finance
3,4,Human Resources


Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,1,John Doe,Sales Representative,50000,1
1,2,Jane Smith,Marketing Specialist,60000,2
2,3,Mike Johnson,Financial Analyst,70000,3
3,4,Emily Davis,HR Manager,55000,4
4,5,Robert Brown,Sales Representative,75000,1
5,6,Alice White,Marketing Specialist,80000,2
6,7,Daniel Lee,Senior Financial Analyst,90000,3
7,8,Olivia Miller,HR Manager,70000,4
8,9,William Turner,Sales Representative,55000,1
9,10,Sophia Martin,Financial Analyst,60000,2


# Simple Examples :-

### Fetch employees who earn more than average salary of all employee

In [17]:
cur.execute('''
WITH avg_sal_cte (average_salary) AS (
    SELECT AVG(Salary) 
    FROM Employee
    )
SELECT *
FROM Employee, avg_sal_cte
WHERE Salary > average_salary;
''')

result = cur.fetchall()
df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID,average_salary
0,3,Mike Johnson,Financial Analyst,70000,3,66500.0
1,5,Robert Brown,Sales Representative,75000,1,66500.0
2,6,Alice White,Marketing Specialist,80000,2,66500.0
3,7,Daniel Lee,Senior Financial Analyst,90000,3,66500.0
4,8,Olivia Miller,HR Manager,70000,4,66500.0


### Retrieve Employees in Sales Department with Salaries Above 60000

In [58]:
cur.execute('''
WITH SalesCTE (EmployeeID, Name, Salary) AS (
    SELECT EmployeeID, Name, Salary
    FROM Employee
    WHERE Salary > 60000
)
SELECT * FROM SalesCTE;
''')

result = cur.fetchall()
df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Salary
0,3,Mike Johnson,70000
1,5,Robert Brown,75000
2,6,Alice White,80000
3,7,Daniel Lee,90000
4,8,Olivia Miller,70000


### Retrieve Employees in Sales Department with Salaries Above 60000 and DepartmentID = 1

In [65]:
cur.execute('''
WITH SalesCTE (EmployeeID, Name, Salary) AS (
    SELECT EmployeeID, Name, Salary
    FROM Employee
    WHERE DepartmentID = 1 AND Salary > 60000
)
SELECT * FROM SalesCTE;
''')

result = cur.fetchall()
df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Salary
0,5,Robert Brown,75000


### Calculate Average Salary for Each Department

In [69]:
cur.execute('''
WITH DepartmentAvgSalary (DepartmentID, AvgSalary) AS (
    SELECT DepartmentID, AVG(Salary) AS AvgSalary
    FROM Employee
    GROUP BY DepartmentID
)
SELECT * FROM DepartmentAvgSalary;
''')

result = cur.fetchall()
df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,DepartmentID,AvgSalary
0,1,60000.0
1,2,66666.666667
2,3,80000.0
3,4,62500.0


### Calculate Average Salary for Each Department and Display Employee Information Whose Salary is Greater than the Average Salary in their Respective Departments

In [82]:
cur.execute('''
WITH DepartmentAvgSalary (DepartmentID, AvgSalary) AS (
    SELECT DepartmentID, AVG(Salary) AS AvgSalary
    FROM Employee
    GROUP BY DepartmentID
)
SELECT e.*
FROM Employee e
JOIN DepartmentAvgSalary d ON e.DepartmentID = d.DepartmentID
WHERE e.Salary > d.AvgSalary;
''')

result = cur.fetchall()
df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,5,Robert Brown,Sales Representative,75000,1
1,6,Alice White,Marketing Specialist,80000,2
2,7,Daniel Lee,Senior Financial Analyst,90000,3
3,8,Olivia Miller,HR Manager,70000,4


### Calculate Average Salary for Each Job

In [70]:
cur.execute('''
WITH JobAvgSalary (Job, AvgSalary) AS (
    SELECT Job, AVG(Salary) AS AvgSalary
    FROM Employee
    GROUP BY Job
)
SELECT * FROM JobAvgSalary;
''')

result = cur.fetchall()
df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,Job,AvgSalary
0,Financial Analyst,65000.0
1,HR Manager,62500.0
2,Marketing Specialist,70000.0
3,Sales Representative,60000.0
4,Senior Financial Analyst,90000.0


### Calculate Average Salary for Each Job and Display Employee Information Whose Salary is Greater than the Average Salary in their Respective Jobs

In [78]:
cur.execute('''
WITH JobAvgSalary (Job, AvgSalary) AS (
    SELECT Job, AVG(Salary) AS AvgSalary
    FROM Employee
    GROUP BY Job
)
SELECT e.*
FROM Employee e
JOIN JobAvgSalary j ON e.Job = j.Job
WHERE e.Salary > j.AvgSalary;
''')

result = cur.fetchall()
df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,3,Mike Johnson,Financial Analyst,70000,3
1,5,Robert Brown,Sales Representative,75000,1
2,6,Alice White,Marketing Specialist,80000,2
3,8,Olivia Miller,HR Manager,70000,4


### Identify Employees with the Same Job Title

In [73]:
cur.execute('''
WITH DuplicateJobsCTE (Job, JobCount) AS (
    SELECT Job, COUNT(*) AS JobCount
    FROM Employee
    GROUP BY Job
)
SELECT * FROM DuplicateJobsCTE;
''')

result = cur.fetchall()
df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,Job,JobCount
0,Financial Analyst,2
1,HR Manager,2
2,Marketing Specialist,2
3,Sales Representative,3
4,Senior Financial Analyst,1


# Relational Table Example

### List employees and their respective department names

In [87]:
cur.execute('''
WITH EmployeeDepartment AS (
    SELECT Employee.Name, Employee.Job, Employee.Salary, Department.DepartmentName
    FROM Employee
    JOIN Department ON Employee.DepartmentID = Department.DepartmentID
)
SELECT *
FROM EmployeeDepartment;
''')

result = cur.fetchall()
df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,Name,Job,Salary,DepartmentName
0,John Doe,Sales Representative,50000,Sales
1,Jane Smith,Marketing Specialist,60000,Marketing
2,Mike Johnson,Financial Analyst,70000,Finance
3,Emily Davis,HR Manager,55000,Human Resources
4,Robert Brown,Sales Representative,75000,Sales
5,Alice White,Marketing Specialist,80000,Marketing
6,Daniel Lee,Senior Financial Analyst,90000,Finance
7,Olivia Miller,HR Manager,70000,Human Resources
8,William Turner,Sales Representative,55000,Sales
9,Sophia Martin,Financial Analyst,60000,Marketing


### Calculate the total salary for each department

In [89]:
cur.execute('''
WITH DepartmentTotalSalary AS (
    SELECT DepartmentID, SUM(Salary) AS TotalSalary
    FROM Employee
    GROUP BY DepartmentID
)
SELECT DepartmentName, TotalSalary
FROM Department
JOIN DepartmentTotalSalary ON Department.DepartmentID = DepartmentTotalSalary.DepartmentID;
''')

result = cur.fetchall()
df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,DepartmentName,TotalSalary
0,Sales,180000
1,Marketing,200000
2,Finance,160000
3,Human Resources,125000


### Calculate the average salary for each department

In [90]:
cur.execute('''
WITH DepartmentAverageSalary AS (
    SELECT DepartmentID, AVG(Salary) AS AverageSalary
    FROM Employee
    GROUP BY DepartmentID
)
SELECT DepartmentName, AverageSalary
FROM Department
JOIN DepartmentAverageSalary ON Department.DepartmentID = DepartmentAverageSalary.DepartmentID;
''')

result = cur.fetchall()
df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,DepartmentName,AverageSalary
0,Sales,60000.0
1,Marketing,66666.666667
2,Finance,80000.0
3,Human Resources,62500.0
