## **[An Introduction to MySQL CTE](https://www.mysqltutorial.org/mysql-cte/)**

Use MySQL CTE or common table expression to construct complex queries in a more readable manner.

In [1]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import Image, SVG

from sqlalchemy_utils import database_exists, create_database
from sqlalchemy import create_engine, inspect, MetaData, text
from sqlalchemy_schemadisplay import create_schema_graph
import pymysql

pd.set_option(
    'display.max_columns', None,
    'expand_frame_repr', True,
    'display.max_colwidth', None,
    'display.max_rows', 10,
)

pd.set_option('display.width', 65)

In [2]:
# connect to the classicmodels database
connect_args={'ssl':{'fake_flag_to_enable_tls': True}}

engine = create_engine('mysql+pymysql://namlq:abc123@localhost/classicmodels',
                       connect_args=connect_args, echo=False
                         )
inspector = inspect(engine)

### What is a common table expression or CTE

### MySQL CTE syntax

### Simple MySQL CTE examples

In [3]:
string = '''
WITH customers_in_usa AS (
    SELECT customerName, state
    FROM customers
    WHERE country = 'USA'
) 
SELECT customerName
FROM customers_in_usa
WHERE state = 'CA'
ORDER BY customerName
;'''

df1 = pd.read_sql(string, engine)

In [4]:
df2 = (
    pd.read_sql_table(
        'customers', engine, columns=['customerName', 'country', 'state'])
    .query("(country == 'USA') and (state == 'CA')")
    [['customerName']]
    .sort_values(by='customerName', key=lambda col: col.str.upper())
    .reset_index(drop=True)
)

In [5]:
df1.equals(df2)

True

In [6]:
df1.head()

Unnamed: 0,customerName
0,Boards & Toys Co.
1,Collectable Mini Designs Co.
2,Corporate Gift Ideas Co.
3,"Men 'R' US Retailers, Ltd."
4,Mini Gifts Distributors Ltd.


In [7]:
string = '''
WITH topsales2003 AS (
    SELECT
        salesRepEmployeeNumber employeeNumber,
        SUM(quantityOrdered * priceEach) sales
    FROM orders
    INNER JOIN orderdetails USING (orderNumber)
    INNER JOIN customers USING (customerNumber)
    WHERE YEAR(shippedDate) = 2003 AND status = 'Shipped'
    GROUP BY salesRepEmployeeNumber
    ORDER BY sales DESC
    LIMIT 5
)
SELECT employeeNumber, firstName, lastName, sales
FROM employees
JOIN topsales2003 USING (employeeNumber)
;'''

df1 = pd.read_sql(string, engine)

In [8]:
topsales2003 = (
    pd.read_sql_table('orders', engine)
    .merge(pd.read_sql_table('orderdetails', engine),
           on='orderNumber', how='inner')
    .merge(pd.read_sql_table('customers', engine),
           on='customerNumber', how='inner')
    .query("(shippedDate.dt.year == 2003) and (status == 'Shipped')")
    .assign(sales = lambda df: df.quantityOrdered * df.priceEach)
    .groupby('salesRepEmployeeNumber', as_index=False)
    .agg({'sales': 'sum'})
    .rename(columns={'salesRepEmployeeNumber': 'employeeNumber'})
    [['employeeNumber', 'sales']]
    .sort_values(by='sales', ascending=False, ignore_index=True)
    .head(5)
)

df2 = (
    pd.read_sql_table('employees', engine)
    .merge(topsales2003, on='employeeNumber', how='inner', sort=True)
    [['employeeNumber', 'firstName', 'lastName', 'sales']]
    .sort_values(by='sales', ascending=False, ignore_index=True)
)

In [9]:
df1.equals(df2)

True

In [10]:
df1

Unnamed: 0,employeeNumber,firstName,lastName,sales
0,1165,Leslie,Jennings,413219.85
1,1370,Gerard,Hernandez,295246.44
2,1401,Pamela,Castillo,289982.88
3,1621,Mami,Nishi,267249.4
4,1501,Larry,Bott,261536.95


### A more advanced MySQL CTE example

In [11]:
string = '''
WITH salesrep AS (
    SELECT
        employeeNumber,
        CONCAT(firstName, ' ', lastName) salesRepName
    FROM employees
    WHERE jobTitle = 'Sales Rep'
),
customer_salesrep AS (
    SELECT customerName, salesRepName
    FROM customers
    INNER JOIN salesrep ON 
        employeeNumber = salesRepEmployeeNumber
)
SELECT *
FROM customer_salesrep
ORDER BY customerName
;'''

df1 = pd.read_sql(string, engine)

In [12]:
salesrep = (
    pd.read_sql_table('employees', engine)
    .assign(salesRepName = lambda df: df.firstName.astype(str) +
            ' ' + df.lastName.astype(str))
    .query("jobTitle == 'Sales Rep'")
    [['employeeNumber', 'salesRepName']]
)

df2 = (
    pd.read_sql_table('customers', engine)
    .merge(salesrep, 
           left_on='salesRepEmployeeNumber',
           right_on='employeeNumber',
           how='inner')
    [['customerName', 'salesRepName']]
    .sort_values('customerName', 
                 key=lambda col: col.str.upper(),
                 ignore_index=True)
)

In [13]:
df1.equals(df2)

True

In [14]:
df1.head()

Unnamed: 0,customerName,salesRepName
0,Alpha Cognac,Gerard Hernandez
1,American Souvenirs Inc,Foon Yue Tseng
2,Amica Models & Co.,Pamela Castillo
3,"Anna's Decorations, Ltd",Andy Fixter
4,Atelier graphique,Gerard Hernandez


### The WITH clause usages