In [1]:
import pandas as pd
from pangres import upsert, DocsExampleTable
from sqlalchemy import create_engine, text, VARCHAR
# the line below is only for informational purposes
# this is automatically imported in notebooks/IPython
from IPython.display import display

# Config

In [2]:
table_name = 'example'
connection_string = "sqlite:///:memory:"
# connection_string = "postgresql://username:password@localhost:5432"
# connection_string = "mysql://username:password@localhost:5432"

engine = create_engine(connection_string)

# this is necessary if you want to test with MySQL
# instead of SQlite or Postgres because MySQL needs
# to have a definite limit for text primary keys/indices
dtype = {'full_name':VARCHAR(50)}

# Helpers

In [3]:
def display_table():
    with engine.connect() as connection:
        query = text(f'SELECT * FROM {table_name}')
        display(pd.read_sql(query, con=connection, index_col='full_name')
                .astype({'likes_sport':bool}))

# Save data (create a table)

In [4]:
df = DocsExampleTable.df
df

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Rambo,True,2020-02-01 00:00:00+00:00,1.77
The Rock,True,2020-04-01 00:00:00+00:00,1.96
John Travolta,False,NaT,


In [5]:
upsert(engine=engine, df=df, table_name='example', if_row_exists='update', dtype=dtype, create_table=True) # default
display_table()

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Rambo,True,2020-02-01 00:00:00.000000,1.77
The Rock,True,2020-04-01 00:00:00.000000,1.96
John Travolta,False,,


# `INSERT... ON CONFLICT UPDATE` with a new df

In [6]:
new_df = DocsExampleTable.new_df
new_df

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Travolta,True,2020-04-04 00:00:00+00:00,1.88
Arnold Schwarzenegger,True,NaT,1.88


In [7]:
# new records like "Arnold Schwarzenegger" are added and existing records like "John Travolta" are update
upsert(engine=engine, df=new_df, table_name='example', if_row_exists='update', dtype=dtype, create_table=False)
display_table()

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Rambo,True,2020-02-01 00:00:00.000000,1.77
The Rock,True,2020-04-01 00:00:00.000000,1.96
John Travolta,True,2020-04-04 00:00:00.000000,1.88
Arnold Schwarzenegger,True,,1.88


# `INSERT... ON CONFLICT IGNORE` with yet another df

In [8]:
new_df2 = DocsExampleTable.new_df2
new_df2

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Travolta,True,NaT,2.5
John Cena,True,NaT,1.84


In [9]:
# new records like "John Cena" are added but existing records are not updated
upsert(engine=engine, df=new_df2, table_name='example', if_row_exists='ignore', dtype=dtype, create_table=False)
display_table()

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Rambo,True,2020-02-01 00:00:00.000000,1.77
The Rock,True,2020-04-01 00:00:00.000000,1.96
John Travolta,True,2020-04-04 00:00:00.000000,1.88
Arnold Schwarzenegger,True,,1.88
John Cena,True,,1.84
