In [76]:
import awswrangler as wr

### Create a connection to Redshift database

In [77]:
redshift_con = wr.redshift.connect(connection = "adriano_redshift_cluster")

### Query existing records in Redshift table

In [78]:
existing_records_df =  wr.redshift.read_sql_query(sql=
                                 'SELECT * FROM public.category_1', con=redshift_con)
existing_records_df

Unnamed: 0,catid,catgroup,catname,catdesc,date_modified
0,1,Sports,MLB,Major League Baseball,2021-03-11
1,6,Shows,Musicals,Musical theatre,2021-03-11
2,11,Concerts,Classical,"All symphony, concerto, and choir concerts",2021-03-11
3,4,Sports,NBA,National Basketball Association,2021-03-11
4,7,Shows,Plays,All non-musical theatre,2021-03-11
5,10,Concerts,Jazz,All jazz singers and bands,2021-03-11
6,3,Sports,NFL,National Football League,2021-03-11
7,8,Shows,Opera,All opera and light opera,2021-03-11
8,2,Sports,NHL,National Hockey League,2021-03-11
9,5,Sports,MLS,Major League Soccer,2021-03-11


### Read updated data from S3

In [79]:
upsert_records_df = wr.s3.read_csv("s3://adriano-datalake-us-east-1/raw/categories/categories.csv")
upsert_records_df

Unnamed: 0,catid,catgroup,catname,catdesc,date_modified
0,12,Concerts,Electro Swing,Mix of Jazz and Electronic Dance,2023-03-05
1,13,Concerts,House,electronic dance music,2023-03-05
2,9,Concerts,Country pop,a fusion genre of country music and pop music,2023-03-05


### Upsert To Redshift < 1000 records

In [80]:
output =wr.redshift.to_sql(df=upsert_records_df, con=redshift_con, 
                     table='category_1',schema='public', mode='upsert',
                     primary_keys=['catid'],use_column_names=True)

In [82]:
existing_records_df =  wr.redshift.read_sql_query(sql=
                        'SELECT * FROM public.category_1',
                        con=redshift_con)
existing_records_df

Unnamed: 0,catid,catgroup,catname,catdesc,date_modified
0,3,Sports,NFL,National Football League,2021-03-11
1,8,Shows,Opera,All opera and light opera,2021-03-11
2,2,Sports,NHL,National Hockey League,2021-03-11
3,5,Sports,MLS,Major League Soccer,2021-03-11
4,13,Concerts,House,electronic dance music,2023-03-05
5,9,Concerts,Country pop,a fusion genre of country music and pop music,2023-03-05
6,1,Sports,MLB,Major League Baseball,2021-03-11
7,6,Shows,Musicals,Musical theatre,2021-03-11
8,11,Concerts,Classical,"All symphony, concerto, and choir concerts",2021-03-11
9,4,Sports,NBA,National Basketball Association,2021-03-11


### Upsert to Redshift > 1000 Records

In [83]:
s3_path = 's3://adriano-datalake-us-east-1/raw/temp_path/'
dtype = {'catid':'smallint','catgroup': 'varchar(10)',
        'catname':'varchar(20)','catdesc': 'varchar(50)','date_modified':'date'}

output =wr.redshift.copy(df=upsert_records_df, path=s3_path,
                        con=redshift_con,  table='category_1',
                        schema='public',use_column_names=True, 
                        mode='upsert',primary_keys=['catid'],
                        index=False,
                        dtype=dtype)

### Upsert using Latest Date

In [84]:
upsert_records_date_df = wr.s3.read_csv("s3://adriano-datalake-us-east-1/raw/categories/categories_date_modified.csv")
upsert_records_date_df

Unnamed: 0,catid,catgroup,catname,catdesc,date_modified
0,13,Concerts,House,Electronic Dance Music,2023-03-25
1,9,Concerts,Plays,Pop,2023-03-01


In [85]:
output =wr.redshift.to_sql(df=upsert_records_df, con=redshift_con, 
                    table='category_1',schema='public', mode='upsert', 
                    primary_keys=['catid'],
                    precombine_key='date_modified',
                    use_column_names=True)

### Select Final Records from Database

In [86]:
existing_records_df =  wr.redshift.read_sql_query(sql=
                       'SELECT * FROM public.category_1',
                        con=redshift_con)
existing_records_df

Unnamed: 0,catid,catgroup,catname,catdesc,date_modified
0,3,Sports,NFL,National Football League,2021-03-11
1,8,Shows,Opera,All opera and light opera,2021-03-11
2,2,Sports,NHL,National Hockey League,2021-03-11
3,5,Sports,MLS,Major League Soccer,2021-03-11
4,12,Concerts,Electro Swing,Mix of Jazz and Electronic Dance,2023-03-05
5,1,Sports,MLB,Major League Baseball,2021-03-11
6,6,Shows,Musicals,Musical theatre,2021-03-11
7,11,Concerts,Classical,"All symphony, concerto, and choir concerts",2021-03-11
8,4,Sports,NBA,National Basketball Association,2021-03-11
9,7,Shows,Plays,All non-musical theatre,2021-03-11


### Close redshift Connection

In [87]:
redshift_con.close()