## **[MySQL EXISTS](https://www.mysqltutorial.org/mysql-exists/)**

Use the MySQL EXISTS operator to improve the performance of the queries.

In [1]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import Image, SVG

from sqlalchemy_utils import database_exists, create_database
from sqlalchemy import create_engine, inspect, MetaData, text
from sqlalchemy_schemadisplay import create_schema_graph
import pymysql

pd.set_option(
    'display.max_columns', None,
    'expand_frame_repr', True,
    'display.max_colwidth', None,
    'display.max_rows', 10,
)

pd.set_option('display.width', 65)

In [2]:
# connect to the classicmodels database
connect_args={'ssl':{'fake_flag_to_enable_tls': True}}

engine = create_engine('mysql+pymysql://namlq:abc123@localhost/classicmodels',
                       connect_args=connect_args, echo=False
                         )
inspector = inspect(engine)

### Introduction to MySQL EXISTS operator

syntax:

### MySQL SELECT EXISTS examples

Find the customer who has at least one order:

In [3]:
string = '''
SELECT customerNumber, customerName
FROM customers
WHERE EXISTS (
    SELECT 1 # *, column, a_constant, or anything
    FROM orders
    WHERE orders.customerNumber = customers.customerNumber)
;'''

df1 = pd.read_sql(string, engine)

In [4]:
cusnum = (
    pd.read_sql_table(
        'orders', engine, columns=['customerNumber'])
    ['customerNumber']
    .unique()
)

df2 = (
    pd.read_sql_table(
        'customers', engine, columns=['customerNumber', 'customerName'])
    .query('customerNumber in @cusnum')
    .reset_index(drop=True)
)

In [5]:
df1.equals(df2)

True

In [6]:
df1.head()

Unnamed: 0,customerNumber,customerName
0,103,Atelier graphique
1,112,Signal Gift Stores
2,114,"Australian Collectors, Co."
3,119,La Rochelle Gifts
4,121,Baane Mini Imports


Find customers who do not have any orders

In [7]:
string = '''
SELECT
    customerNumber, customerName
FROM customers
WHERE NOT EXISTS (
    SELECT 1
    FROM orders
    WHERE orders.customerNumber = customers.customerNumber)
;'''

df1 = pd.read_sql(string, engine)

In [8]:
cusnum = (
    pd.read_sql_table(
        'orders', engine, columns=['customerNumber'])
    ['customerNumber']
    .unique()
)

df2 = (
    pd.read_sql_table(
        'customers', engine, columns=['customerNumber', 'customerName'])
    .query('customerNumber not in @cusnum')
    .reset_index(drop=True)
)

In [9]:
df1.equals(df2)

True

In [10]:
df1.head()

Unnamed: 0,customerNumber,customerName
0,125,Havel & Zbyszek Co
1,168,American Souvenirs Inc
2,169,Porto Imports Co.
3,206,"Asian Shopping Network, Co"
4,223,Natürlich Autos


### MySQL UPDATE EXISTS examples

Find employees who work at the office in San Francisco

In [11]:
string = '''
SELECT employeeNumber, firstName, lastName, extension
FROM employees
WHERE EXISTS (
    SELECT 1
    FROM offices
    WHERE city = 'San Francisco' AND
        offices.officeCode = employees.officeCode)
;'''

df1 = pd.read_sql(string, engine)

In [12]:
offsan = (
    pd.read_sql_table(
        'offices', engine, columns=['city', 'officeCode'])
    .query('city == "San Francisco"')
    ['officeCode']
    .unique()
)

df2 = (
    pd.read_sql_table('employees', engine)
    .query('officeCode in @offsan')
    [['employeeNumber', 'firstName', 'lastName', 'extension']]
    .reset_index(drop=True)
)

In [13]:
df1.equals(df2)

True

In [14]:
df1

Unnamed: 0,employeeNumber,firstName,lastName,extension
0,1002,Diane,Murphy,x5800
1,1056,Mary,Patterson,x4611
2,1076,Jeff,Firrelli,x9273
3,1143,Anthony,Bow,x5428
4,1165,Leslie,Jennings,x3291
5,1166,Leslie,Thompson,x4065


Add the number 1 to the phone extension of employees who work at the office in San Francisco

In [15]:
concat_string = '''
UPDATE employees
SET extension = CONCAT(extension, '1')
WHERE EXISTS (
    SELECT 1
    FROM offices
    WHERE city = 'San Francisco' AND
        offices.officeCode = employees.officeCode)
;'''

engine.execute(concat_string)

df1 = pd.read_sql(string, engine)

# Recover the original string
# https://stackoverflow.com/questions/792294/how-to-get-first-character-of-a-string-in-sql
substring = '''
UPDATE employees
SET extension = SUBSTRING(extension, 1, 5)
WHERE EXISTS (
    SELECT 1
    FROM offices
    WHERE city = 'San Francisco' AND
        offices.officeCode = employees.officeCode)
;'''

engine.execute(substring)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fd98448ad00>

In [16]:
offsan = (
    pd.read_sql_table(
        'offices', engine, columns=['city', 'officeCode'])
    .query('city == "San Francisco"')
    ['officeCode']
    .unique()
)

df2 = (
    pd.read_sql_table('employees', engine)
    .query('officeCode in @offsan')
    .assign(extension = lambda df: df.extension.astype(str) + '1')
    [['employeeNumber', 'firstName', 'lastName', 'extension']]
    .reset_index(drop=True)
)

In [17]:
df1.equals(df2)

True

In [18]:
df1

Unnamed: 0,employeeNumber,firstName,lastName,extension
0,1002,Diane,Murphy,x58001
1,1056,Mary,Patterson,x46111
2,1076,Jeff,Firrelli,x92731
3,1143,Anthony,Bow,x54281
4,1165,Leslie,Jennings,x32911
5,1166,Leslie,Thompson,x40651


### MySQL INSERT EXISTS example

Archive customers who don't have any sales order in separate table

In [19]:
engine.execute('DROP TABLE IF EXISTS customers_archive')
engine.execute("CREATE TABLE customers_archive LIKE customers;")

string = '''
INSERT INTO customers_archive
SELECT *
FROM customers
WHERE NOT EXISTS (
    SELECT 1
    FROM orders
    WHERE orders.customerNumber = customers.customerNumber)
;'''

engine.execute(string)

df1 = pd.read_sql('SELECT * FROM customers_archive', engine)

In [20]:
cusnum = (
    pd.read_sql_table('orders', engine, columns=['customerNumber'])
    ['customerNumber']
    .unique()
)
df2 = (
    pd.read_sql_table('customers', engine)
    .query('customerNumber not in @cusnum')
    .reset_index(drop=True)
)

In [21]:
df1.equals(df2)

True

In [22]:
df1.head()

Unnamed: 0,customerNumber,customerName,contactLastName,contactFirstName,phone,addressLine1,addressLine2,city,state,postalCode,country,salesRepEmployeeNumber,creditLimit
0,125,Havel & Zbyszek Co,Piestrzeniewicz,Zbyszek,(26) 642-7555,ul. Filtrowa 68,,Warszawa,,01-012,Poland,,0.0
1,168,American Souvenirs Inc,Franco,Keith,2035557845,149 Spinnaker Dr.,Suite 101,New Haven,CT,97823,USA,1286.0,0.0
2,169,Porto Imports Co.,de Castro,Isabel,(1) 356-5555,Estrada da saúde n. 58,,Lisboa,,1756,Portugal,,0.0
3,206,"Asian Shopping Network, Co",Walker,Brydey,+612 9411 1555,Suntec Tower Three,8 Temasek,Singapore,,038988,Singapore,,0.0
4,223,Natürlich Autos,Kloss,Horst,0372-555188,Taucherstraße 10,,Cunewalde,,01307,Germany,,0.0


### MySQL DELETE EXISTS example

Delete the customers that exist in the `customers_archive` table from the `customers` table

In [23]:
string = '''
DELETE FROM customers
WHERE EXISTS (
    SELECT 1
    FROM customers_archive a
    WHERE a.customerNumber = customers.customerNumber)
;'''

engine.execute(string)

df1 = pd.read_sql('SELECT * FROM customers', engine)

# Recover the tables
string = '''
INSERT INTO customers
SELECT * from customers_archive
;'''

engine.execute(string)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fd984449f40>

In [24]:
cusnum = (
    pd.read_sql_table(
        'customers_archive', engine, columns=['customerNumber'])
    ['customerNumber']
    .unique()
)

df2 = (
    pd.read_sql_table('customers', engine)
    .query('customerNumber not in @cusnum')
    .reset_index(drop=True)
)

In [25]:
(
    df1
    .salesRepEmployeeNumber.astype(np.int64)
    .equals(df2)
)

False

In [26]:
(df1 != df2).sum().sort_values(ascending=False).head()

addressLine2      78
state             52
postalCode         6
customerNumber     0
customerName       0
dtype: int64

In [27]:
column_concern = ['addressLine2', 'state', 'postalCode']
df1[column_concern].isna().sum()

addressLine2    78
state           52
postalCode       6
dtype: int64

In [28]:
df2[column_concern].isna().sum()

addressLine2    78
state           52
postalCode       6
dtype: int64

In [29]:
# Drop the table
engine.execute('DROP TABLE IF EXISTS customers_archive')

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fd9844493a0>

### MySQL EXISTS operator vs. IN operator

Find the customer who has placed at least one order

In [30]:
# %%timeit
# 7.1 ms ± 263 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
string = '''
# EXPLAIN
SELECT customerNumber, customerName
FROM customers
WHERE customerNumber IN (
    SELECT customerNumber
    FROM orders)
;'''

pd.read_sql(string, engine)

Unnamed: 0,customerNumber,customerName
0,103,Atelier graphique
1,112,Signal Gift Stores
2,114,"Australian Collectors, Co."
3,119,La Rochelle Gifts
4,121,Baane Mini Imports
...,...,...
93,486,Motor Mint Distributors Inc.
94,487,Signal Collectibles Ltd.
95,489,"Double Decker Gift Stores, Ltd"
96,495,Diecast Collectables


In [31]:
# %%timeit
# 6.95 ms ± 290 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
string = '''
# EXPLAIN
SELECT customerNumber, customerName
FROM customers
WHERE EXISTS (
    SELECT 1
    FROM orders
    WHERE orders.customerNumber = customers.customerNumber)
;'''

pd.read_sql(string, engine)

Unnamed: 0,customerNumber,customerName
0,103,Atelier graphique
1,112,Signal Gift Stores
2,114,"Australian Collectors, Co."
3,119,La Rochelle Gifts
4,121,Baane Mini Imports
...,...,...
93,486,Motor Mint Distributors Inc.
94,487,Signal Collectibles Ltd.
95,489,"Double Decker Gift Stores, Ltd"
96,495,Diecast Collectables
