# Description

This is a simple demonstration of the `pangres` library.

In [1]:
import pandas as pd
from pangres import upsert, DocsExampleTable
from sqlalchemy import create_engine, text, VARCHAR
# the line below is only for informational purposes
# this is automatically imported in notebooks/IPython
from IPython.display import display

# Config

In [2]:
table_name = 'example'
connection_string = "sqlite:///:memory:"
connection_string = "postgresql://username:password@localhost:5432"
connection_string = "mysql://username:password@localhost:3306"

engine = create_engine(connection_string)

# this is necessary if you want to test with MySQL
# instead of SQlite or Postgres because MySQL needs
# to have a definite limit for text primary keys/indices
dtype = {'full_name':VARCHAR(50)}

ModuleNotFoundError: No module named 'MySQLdb'

In [6]:
!pip install pymysql --force-reinstall

Collecting pymysql
  Using cached PyMySQL-1.0.2-py3-none-any.whl (43 kB)
Installing collected packages: pymysql
  Attempting uninstall: pymysql
    Found existing installation: PyMySQL 1.0.2
    Uninstalling PyMySQL-1.0.2:
      Successfully uninstalled PyMySQL-1.0.2
Successfully installed pymysql-1.0.2


# Helpers

In [3]:
def display_table():
    with engine.connect() as connection:
        query = text(f'SELECT * FROM {table_name}')
        display(pd.read_sql(query, con=connection, index_col='full_name')
                .astype({'likes_sport':bool}))

In [2]:
def 

Runtime type is 'generator'


<generator object Executor.execute_yield at 0x7fc51ba843a0>

# Save data (create a table)

In [4]:
df = DocsExampleTable.df
df

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Rambo,True,2020-02-01 00:00:00+00:00,1.77
The Rock,True,2020-04-01 00:00:00+00:00,1.96
John Travolta,False,NaT,


In [5]:
upsert(con=engine, df=df, table_name='example', if_row_exists='update', dtype=dtype,
       chunksize=1000, create_table=True)  # default
display_table()

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Rambo,True,2020-02-01 00:00:00.000000,1.77
The Rock,True,2020-04-01 00:00:00.000000,1.96
John Travolta,False,,


# `INSERT... ON CONFLICT UPDATE` with a new df

In [6]:
new_df = DocsExampleTable.new_df
new_df

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Travolta,True,2020-04-04 00:00:00+00:00,1.88
Arnold Schwarzenegger,True,NaT,1.88


In [7]:
# new records like "Arnold Schwarzenegger" are added and existing records like "John Travolta" are update
upsert(con=engine, df=new_df, table_name='example', if_row_exists='update', dtype=dtype,
       chunksize=1000, create_table=False)  # we know the table already exists so we can skip this
display_table()

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Rambo,True,2020-02-01 00:00:00.000000,1.77
The Rock,True,2020-04-01 00:00:00.000000,1.96
John Travolta,True,2020-04-04 00:00:00.000000,1.88
Arnold Schwarzenegger,True,,1.88


# `INSERT... ON CONFLICT IGNORE` with yet another df

In [8]:
new_df2 = DocsExampleTable.new_df2
new_df2

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Travolta,True,NaT,2.5
John Cena,True,NaT,1.84


In [9]:
# new records like "John Cena" are added but existing records are not updated
upsert(con=engine, df=new_df2, table_name='example', if_row_exists='ignore', dtype=dtype,
       chunksize=1000, create_table=False)
display_table()

Unnamed: 0_level_0,likes_sport,updated,size_in_meters
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
John Rambo,True,2020-02-01 00:00:00.000000,1.77
The Rock,True,2020-04-01 00:00:00.000000,1.96
John Travolta,True,2020-04-04 00:00:00.000000,1.88
Arnold Schwarzenegger,True,,1.88
John Cena,True,,1.84
