## **[MySQL INSERT IGNORE statement](https://www.mysqltutorial.org/mysql-insert-ignore/)**

Use the MySQL INSERT IGNORE statement to insert data into a table.

In [1]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import Image, SVG

from sqlalchemy_utils import database_exists, create_database
from sqlalchemy import create_engine, inspect, MetaData, text
from sqlalchemy_schemadisplay import create_schema_graph
import pymysql

pd.set_option(
    'display.max_columns', None,
    'expand_frame_repr', True,
    'display.max_colwidth', None,
    'display.max_rows', 10,
)

pd.set_option('display.width', 65)

In [2]:
# connect to the classicmodels database
connect_args={'ssl':{'fake_flag_to_enable_tls': True}}

engine = create_engine('mysql+pymysql://namlq:abc123@localhost/classicmodels',
                       connect_args=connect_args, echo=False
                         )
inspector = inspect(engine)

### Introduction to MySQL INSERT IGNORE statement

syntax:

### MySQL INSERT IGNORE example

In [3]:
# create table subcribers
string = '''
CREATE TABLE subcribers (
    id INT PRIMARY KEY AUTO_INCREMENT,
    email VARCHAR(50) NOT NULL UNIQUE
);'''

engine.execute('DROP TABLE IF EXISTS subcribers')
engine.execute(string)

# add data into the table
string = '''
INSERT INTO subcribers(email)
VALUES('john.doe@gmail.com')
;'''

engine.execute(string)

  engine.execute('DROP TABLE IF EXISTS subcribers')


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f92cc763670>

In [4]:
df2 = pd.DataFrame(columns=['id', 'email'])

def change_type(df):
    df = df.assign(id = range(1, len(df)+1),
                   email = df.email.str[:50])
    return df

row = {'email': ['john.doe@gmail.com']}

df2 = pd.concat([df2, pd.DataFrame.from_dict(row)])

This gives the error: 
"Duplicate entry 'john.doe@gmail.com' for key 'subcribers.email'"

In [5]:
# no error at all
string = '''
INSERT IGNORE INTO subcribers(email)
VALUES('john.doe@gmail.com'),
      ('jane.smith@ibm.com')
;'''

engine.execute(string)

df1 = pd.read_sql('SELECT * FROM subcribers', engine)

engine.execute('DROP TABLE IF EXISTS subcribers')

df1

Unnamed: 0,id,email
0,2,jane.smith@ibm.com
1,1,john.doe@gmail.com


In [6]:
row = {'email': ['john.doe@gmail.com', 'jane.smith@ibm.com']}

df2 = (pd.concat([df2, pd.DataFrame.from_dict(row)])
       .drop_duplicates(subset='email')
       .sort_values(by='id', na_position='last')
       .pipe(change_type)
       .sort_values(by='email',
                    key=lambda col: col.str.upper(),
                    ignore_index=True)
      )

df2

Unnamed: 0,id,email
0,2,jane.smith@ibm.com
1,1,john.doe@gmail.com


### MySQL INSERT IGNORE and STRICT mode

In [7]:
string = '''
CREATE TABLE tokens (
    s VARCHAR(6)
);'''

engine.execute('DROP TABLE IF EXISTS tokens')
engine.execute(string)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f92565e2100>

Error Code: 1406, "Data too long for column 's' at row 1"

In [8]:
string = '''
INSERT IGNORE INTO tokens(s)
VALUES('abcdefg')
;'''

engine.execute(string)

df1 = pd.read_sql('SELECT * FROM tokens', engine)

engine.execute('DROP TABLE IF EXISTS tokens')

df1

Unnamed: 0,s
0,abcdef


In [9]:
df2 = pd.DataFrame(columns=['s'])

row = {'s': ['abcdefg']}

df2 = (pd.concat([df2, pd.DataFrame.from_dict(row)])
       .assign(s = lambda df: df.s.str[:6]))

df2

Unnamed: 0,s
0,abcdef
