## **[MySQL UPDATE JOIN](https://www.mysqltutorial.org/mysql-update-join/)**

Use the MySQL UPDATE JOIN statement to perform the cross-table update.

In [1]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import Image, SVG

from sqlalchemy_utils import database_exists, create_database
from sqlalchemy import create_engine, inspect, MetaData, text
from sqlalchemy_schemadisplay import create_schema_graph
import pymysql

pd.set_option(
    'display.max_columns', None,
    'expand_frame_repr', True,
    'display.max_colwidth', None,
    'display.max_rows', 10,
)

pd.set_option('display.width', 65)

### MySQL UPDATE JOIN syntax

### MySQL UPDATE JOIN examples

In [2]:
# create mydb database
# https://stackoverflow.com/a/30971098/2757266
connect_args={'ssl':{'fake_flag_to_enable_tls': True}}

engine_new = create_engine(
    'mysql+pymysql://namlq:abc123@localhost/mydb',
    connect_args=connect_args)    
    
if not database_exists(engine_new.url):
    create_database(engine_new.url)

print(database_exists(engine_new.url))

True


In [3]:
engine_new.execute('DROP TABLE IF EXISTS employees, merits')

# https://stackoverflow.com/a/23293136/2757266
# engine_new.execute('SET FOREIGN_KEY_CHECKS=0')
# engine_new.execute('DROP TABLE IF EXISTS merits')
# engine_new.execute('SET FOREIGN_KEY_CHECKS=1;')

# create merits table
string = '''
CREATE TABLE merits (
    performance INT(11) NOT NULL,
    percentage FLOAT NOT NULL,
    PRIMARY KEY (performance)
); '''

engine_new.execute(string)

# create employees table
string = '''
CREATE TABLE employees (
    emp_id INT(11) NOT NULL AUTO_INCREMENT,
    emp_name VARCHAR(255) NOT NULL,
    performance INT(11) DEFAULT NULL,
    salary FLOAT DEFAULT NULL,
    PRIMARY KEY (emp_id),
    CONSTRAINT fk_performance 
    FOREIGN KEY (performance) REFERENCES merits (performance)
);'''

engine_new.execute(string)

# insert data for merits table
string = '''
INSERT INTO merits(performance, percentage)
VALUES
    (1, 0), (2, 0.01), (3, 0.03), (4, 0.05), (5, 0.08)
;'''

engine_new.execute(string)

## insert data for employees table
string = '''
INSERT INTO employees(emp_name, performance, salary)
VALUES
    ('Mary Doe', 1, 50000),
    ('Cindy Smith', 3, 65000),
    ('Sue Greenspan', 4, 75000),
    ('Grace Dell', 5, 125000),
    ('Nancy Johnson', 3, 85000),
    ('John Doe', 2, 45000),
    ('Lily Bush', 3, 55000)
;'''

# https://stackoverflow.com/a/33238549/2757266
# engine_new.execute('SET FOREIGN_KEY_CHECKS=0')
engine_new.execute(string)
# engine_new.execute('SET FOREIGN_KEY_CHECKS=1')

  engine_new.execute('DROP TABLE IF EXISTS employees, merits')


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f08250216d0>

In [4]:
pd.read_sql('SELECT * FROM merits;', engine_new)

Unnamed: 0,performance,percentage
0,1,0.0
1,2,0.01
2,3,0.03
3,4,0.05
4,5,0.08


In [5]:
pd.read_sql('SELECT * FROM employees', engine_new)

Unnamed: 0,emp_id,emp_name,performance,salary
0,1,Mary Doe,1,50000.0
1,2,Cindy Smith,3,65000.0
2,3,Sue Greenspan,4,75000.0
3,4,Grace Dell,5,125000.0
4,5,Nancy Johnson,3,85000.0
5,6,John Doe,2,45000.0
6,7,Lily Bush,3,55000.0


In [6]:
def change_type(df):
    return df.assign(
        emp_id = range(1, len(df)+1),
        emp_name = df.emp_name.str[:255],
        performance = df.performance.astype('Int64'),
        salary = df.salary.astype(float))            

merits = pd.DataFrame(columns=['performance', 'percentage'])

employees = pd.DataFrame(
    columns=['emp_id', 'emp_name', 'performance', 'salary'])

row = {'performance': [1, 2, 3, 4, 5],
       'percentage': [0, 0.01, 0.03, 0.05, 0.08]}

merits = pd.concat([merits, pd.DataFrame.from_dict(row)])

row = {'emp_name': ['Mary Doe', 'Cindy Smith', 'Sue Greenspan',
                    'Grace Dell', 'Nancy Johnson', 'John Doe', 'Lily Bush'],
       'performance': [1, 3, 4, 5, 3, 2, 3],
       'salary': [50000, 65000, 75000, 125000, 85000, 45000, 55000]}

employees = (pd.concat([employees, pd.DataFrame.from_dict(row)])
             .pipe(change_type))

### MySQL UPDATE JOIN example with INNER JOIN clause

In [7]:
string = '''
UPDATE employees
    INNER JOIN merits USING(performance)
SET 
    salary = salary + salary * percentage
;'''

engine_new.execute(string)

df1 = (pd.read_sql('SELECT * FROM employees', engine_new)
       .pipe(change_type))

In [8]:
df2 = (
    employees
    .merge(merits, on='performance', how='inner')
    .assign(salary = lambda df: df.salary * (1 + df.percentage))
    .drop(columns='percentage')
    .sort_values(by='emp_id', ignore_index=True)
    .pipe(change_type)
)

df1.equals(df2)

True

In [9]:
df1

Unnamed: 0,emp_id,emp_name,performance,salary
0,1,Mary Doe,1,50000.0
1,2,Cindy Smith,3,66950.0
2,3,Sue Greenspan,4,78750.0
3,4,Grace Dell,5,135000.0
4,5,Nancy Johnson,3,87550.0
5,6,John Doe,2,45450.0
6,7,Lily Bush,3,56650.0


### MySQL UPDATE JOIN example with LEFT JOIN

In [10]:
string = '''
INSERT INTO employees(emp_name, performance, salary)
VALUES
    ('Jack William', NULL, 43000),
    ('Ricky Bond', NULL, 52000)
;'''

engine_new.execute(string)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f0824f46370>

In [11]:
string = '''
UPDATE employees
LEFT JOIN merits USING (performance)
SET salary = salary + salary * 0.015
WHERE percentage IS NULL
;'''

engine_new.execute(string)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f0824f46100>

In [12]:
df1 = (pd.read_sql('SELECT * FROM employees', engine_new)
       .pipe(change_type))

In [13]:
row = {'emp_name': ['Jack William', 'Ricky Bond'],
       'salary': [43000, 52000]}

df2 = (pd.concat([df2, pd.DataFrame.from_dict(row)],
                 ignore_index=True)
       .pipe(change_type)
      )

mask = ~ df2.performance.isin(merits.performance)

df2.loc[mask, 'salary'] = df2.loc[mask, 'salary'] * 1.015

In [14]:
df1.equals(df2)

False

In [15]:
(df1 != df2).sum()

emp_id         0
emp_name       0
performance    0
salary         2
dtype: int64

In [16]:
df1.loc[df1.salary != df2.salary, :]

Unnamed: 0,emp_id,emp_name,performance,salary
7,8,Jack William,,43645.0
8,9,Ricky Bond,,52780.0


In [17]:
df2.loc[df1.salary != df2.salary, :]

Unnamed: 0,emp_id,emp_name,performance,salary
7,8,Jack William,,43645.0
8,9,Ricky Bond,,52780.0


In [18]:
engine_new.execute('DROP TABLE IF EXISTS employees, merits')

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f0824f43400>