## **[MySQL INSERT INTO SELECT](https://www.mysqltutorial.org/mysql-insert-into-select/)**

Use the MySQL INSERT INTO SELECT statement to insert data into a table, where data comes from the result of a SELECT statement.

In [1]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import Image, SVG

from sqlalchemy_utils import database_exists, create_database
from sqlalchemy import create_engine, inspect, MetaData, text
from sqlalchemy_schemadisplay import create_schema_graph
import pymysql

pd.set_option(
    'display.max_columns', None,
    'expand_frame_repr', True,
    'display.max_colwidth', None,
    'display.max_rows', 10,
)

pd.set_option('display.width', 65)

In [2]:
# connect to the classicmodels database
connect_args={'ssl':{'fake_flag_to_enable_tls': True}}

engine = create_engine('mysql+pymysql://namlq:abc123@localhost/classicmodels',
                       connect_args=connect_args, echo=False
                         )
inspector = inspect(engine)

### MySQL INSERT INTO SELECT Overview

### MySQL INSERT INTO SELECT example

In [3]:
# create a new table called suppliers:
string = '''
CREATE TABLE suppliers (
    supplierNumber INT AUTO_INCREMENT,
    supplierName VARCHAR(50) NOT NULL,
    phone VARCHAR(50),
    addressLine1 VARCHAR(50),
    addressLine2 VARCHAR(50),
    city VARCHAR(50),
    state VARCHAR(50),
    postalCode VARCHAR(50),
    country VARCHAR(50),
    customerNumber INT,
    PRIMARY KEY (supplierNumber)
);'''

engine.execute('DROP TABLE IF EXISTS suppliers')
engine.execute(string)

  engine.execute('DROP TABLE IF EXISTS suppliers')


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f24ec30b9a0>

In [4]:
# find all customers who locate in California, USA
string = '''
SELECT
    customerNumber, customerName, phone, 
    addressLine1, addressLine2, city,
    state, postalCode, country
FROM
    customers
WHERE
    country = 'USA' AND state = 'CA'
;'''

pd.read_sql(string, engine).head(3)

Unnamed: 0,customerNumber,customerName,phone,addressLine1,addressLine2,city,state,postalCode,country
0,124,Mini Gifts Distributors Ltd.,4155551450,5677 Strong St.,,San Rafael,CA,97562,USA
1,129,Mini Wheels Co.,6505555787,5557 North Pendale Street,,San Francisco,CA,94217,USA
2,161,Technics Stores Inc.,6505556809,9408 Furth Circle,,Burlingame,CA,94217,USA


In [5]:
# insert customers who locate in California USA
# from  the cusotmers table into the suppliers table
string = '''
INSERT INTO suppliers(
    supplierName, phone, addressLine1, 
    addressLine2, city, state, postalCode,
    country, customerNumber)
SELECT
    customerName, phone, addressLine1,
    addressLine2, city, state, postalCode,
    country, customerNumber
FROM 
    customers
WHERE
    country = 'USA' AND state = 'CA'
;'''

engine.execute(string)

df1 = pd.read_sql('SELECT * FROM suppliers', engine)

engine.execute('DROP TABLE IF EXISTS suppliers')

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f24769d6730>

In [6]:
df2 = pd.DataFrame(
    data=None,
    columns=['supplierNumber', 'supplierName', 'phone', 'addressLine1',
             'addressLine2', 'city', 'state', 'postalCode',
             'country', 'customerNumber']
)

cus_usa_ca = (
    pd.read_sql_table('customers', engine)
    .query("country == 'USA' and state == 'CA'")
    .assign(supplierNumber = lambda df: range(1, len(df)+1),
            supplierName = lambda df: df.customerName.str[:50],
            phone = lambda df: df.phone.str[:50],
            addressLine1 = lambda df: df.addressLine1.str[:50],
            addressLine2 = lambda df: df.addressLine2.str[:50],
            city = lambda df: df.city.str[:50],
            state = lambda df: df.state.str[:50],
            postalCode = lambda df: df.postalCode.str[:50],
            country = lambda df: df.country.str[:50],
            customerNumber = lambda df: df.customerNumber.astype(int)
           )
    [['supplierNumber', 'supplierName', 'phone', 'addressLine1',
      'addressLine2', 'city', 'state', 'postalCode',
      'country', 'customerNumber']]
    .reset_index(drop=True))

df2 = pd.concat([df2, cus_usa_ca])

In [7]:
df1.equals(df2)

False

In [8]:
(df1 != df2).sum()

supplierNumber     0
supplierName       0
phone              0
addressLine1       0
addressLine2      11
city               0
state              0
postalCode         0
country            0
customerNumber     0
dtype: int64

In [9]:
df1.addressLine2.isna().sum()

11

In [10]:
df1.head(3)

Unnamed: 0,supplierNumber,supplierName,phone,addressLine1,addressLine2,city,state,postalCode,country,customerNumber
0,1,Mini Gifts Distributors Ltd.,4155551450,5677 Strong St.,,San Rafael,CA,97562,USA,124
1,2,Mini Wheels Co.,6505555787,5557 North Pendale Street,,San Francisco,CA,94217,USA,129
2,3,Technics Stores Inc.,6505556809,9408 Furth Circle,,Burlingame,CA,94217,USA,161


### Using SELECT statement in the VALUES list

In [11]:
# create a new table called stats
string = '''
CREATE TABLE stats (
    totalProduct INT,
    totalCustomer INT,
    totalOrder INT
);'''

engine.execute('DROP TABLE IF EXISTS stats')
engine.execute(string)

# insert some data into the table
string = '''
INSERT INTO stats(totalProduct, totalCustomer, totalOrder)
VALUES (
    (SELECT COUNT(*) FROM products),
    (SELECT COUNT(*) FROM customers),
    (SELECT COUNT(*) FROM orders)
);'''

engine.execute(string)

df1 = pd.read_sql('SELECT * FROM stats', engine)

# drop the job done table
engine.execute('DROP TABLE IF EXISTS stats')

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f24769f5cd0>

In [12]:
df2 = pd.DataFrame(
    columns=['totalProduct', 'totalCustomer', 'totalOrder'])

row = {'totalProduct': [len(pd.read_sql_table('products', engine))],
       'totalCustomer': [len(pd.read_sql_table('customers', engine))],
       'totalOrder': [len(pd.read_sql_table('orders', engine))]}

df2 = pd.concat([df2, pd.DataFrame.from_dict(row)])

In [13]:
df1

Unnamed: 0,totalProduct,totalCustomer,totalOrder
0,110,122,326


In [14]:
df2

Unnamed: 0,totalProduct,totalCustomer,totalOrder
0,110,122,326
