## 1: Create database and tables

In [0]:
%sql
create database if not exists scd;

In [0]:
%sql
CREATE OR REPLACE TABLE scd.Client(
	ID BIGINT  GENERATED BY DEFAULT AS IDENTITY ,
	ClientName string ,
	Country string ,
	Town string ,
	County string ,
	Address1 string ,
	Address2 string ,
	ClientType string ,
	ClientSize string);

In [0]:
%sql
INSERT 	into scd.Client ( ClientName, Country, Town, County, Address1, Address2, ClientType, ClientSize) VALUES ( 'NA', 'UK', 'Uttoxeter', 'Staffs', '6, Grove Drive', NULL, 'Private', 'M');
INSERT 	into scd.Client ( ClientName, Country, Town, County, Address1, Address2, ClientType, ClientSize) VALUES ( 'John Smith', 'UK', 'Uttoxeter', 'Staffs', '4, Grove Drive', NULL, 'Private', 'M');
INSERT 	into scd.Client ( ClientName, Country, Town, County, Address1, Address2, ClientType, ClientSize) VALUES ( 'Bauhaus Motors', 'UK', 'Oxford', 'Oxo', 'Suite 27', '12-14 Turl Street', 'Business', 'S');
INSERT 	into scd.Client ( ClientName, Country, Town, County, Address1, Address2, ClientType, ClientSize) VALUES ( 'Honest Fred', 'UK', 'Stoke', 'Staffs', NULL, NULL, 'Business', 'S');
INSERT 	into scd.Client ( ClientName, Country, Town, County, Address1, Address2, ClientType, ClientSize) VALUES ( 'Fast Eddie', 'Wales', 'Cardiff', NULL, NULL, NULL, 'Business', 'L');
INSERT 	into scd.Client ( ClientName, Country, Town, County, Address1, Address2, ClientType, ClientSize) VALUES ( 'Slow Sid', 'France', 'Avigno', 'Vaucluse', '2, Rue des Courtisans', NULL, 'Private', 'M');


num_affected_rows,num_inserted_rows
1,1


In [0]:
%sql
CREATE TABLE scd.Client_SCD1(
	ClientID BIGINT  GENERATED BY DEFAULT AS IDENTITY,
	BusinessKey int  ,
	ClientName string,
	Country string ,
	Town string ,
	County string ,
	Address1 string ,
	Address2 string ,
	ClientType string ,
	ClientSize string
)

In [0]:
%sql
CREATE TABLE scd.Client_SCD2(
	ClientID BIGINT  GENERATED BY DEFAULT AS IDENTITY,
	BusinessKey int  ,
	ClientName string,
	Country string ,
	Town string ,
	County string ,
	Address1 string ,
	Address2 string ,
	ClientType string ,
	ClientSize string,
    ValidFrom timestamp ,
	ValidTo timestamp ,
	IsCurrent int
    )

In [0]:
%sql
CREATE TABLE scd.Client_SCD3(
	ClientID BIGINT  GENERATED BY DEFAULT AS IDENTITY,
	BusinessKey int,
	ClientName  string,
	Country string ,
	Country_Prev1  string,
	Country_Prev1_ValidTo  timestamp,
	Country_Prev2  string,
	Country_Prev2_ValidTo timestamp
) 

In [0]:
%sql
CREATE TABLE scd.Client_SCD6(
    row_key BIGINT  GENERATED BY DEFAULT AS IDENTITY,
    ClientID BIGINT,
	BusinessKey int  ,
	ClientName string,
	Country string ,
	Town string ,
	County string ,
	Address1 string ,
	Address2 string ,
	ClientType string ,
	ClientSize string,
    ValidFrom timestamp ,
	ValidTo timestamp ,
	IsCurrent int,
    Country_Prev  string)
    ;

### == SCD Type 1: overwrite ==
- This method overwrites old with new data, and therefore does not track historical data.

[Types of Slowly changing dimensions](https://en.wikipedia.org/wiki/Slowly_changing_dimension")

- The disadvantage of the Type 1 method is that there is no history in the data warehouse. It has the advantage however that it's easy to maintain.

- If one has calculated an aggregate table summarizing facts by supplier state, it will need to be recalculated when the Supplier_State is changed.

In [0]:
%sql -- SCD 1
MERGE into scd.Client_SCD1 AS DST USING scd.Client AS SRC ON (SRC.ID = DST.BusinessKey)
WHEN MATCHED
AND (
  coalesce(DST.ClientName, '') <> coalesce(SRC.ClientName, '')
  OR coalesce(DST.Country, '') <> coalesce(SRC.Country, '')
  OR coalesce(DST.Town, '') <> coalesce(SRC.Town, '')
  OR coalesce(DST.Address1, '') <> coalesce(SRC.Address1, '')
  OR coalesce(DST.Address2, '') <> coalesce(SRC.Address2, '')
  OR coalesce(DST.ClientType, '') <> coalesce(SRC.ClientType, '')
  OR coalesce(DST.ClientSize, '') <> coalesce(SRC.ClientSize, '')
) THEN
UPDATE
SET
  DST.ClientName = SRC.ClientName,
  DST.Country = SRC.Country,
  DST.Town = SRC.Town,
  DST.Address1 = SRC.Address1,
  DST.Address2 = SRC.Address2,
  DST.ClientType = SRC.ClientType,
  DST.ClientSize = SRC.ClientSize
  WHEN NOT MATCHED THEN
INSERT
  (
    BusinessKey,
    ClientName,
    Country,
    Town,
    County,
    Address1,
    Address2,
    ClientType,
    ClientSize
  )
VALUES
  (
    SRC.ID,
    SRC.ClientName,
    SRC.Country,
    SRC.Town,
    SRC.County,
    Address1,
    Address2,
    ClientType,
    ClientSize
  );

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
2,1,0,1


In [0]:
%sql
select * from scd.Client_SCD1

ClientID,BusinessKey,ClientName,Country,Town,County,Address1,Address2,ClientType,ClientSize
1,2,Bauhaus Motors,UK,Oxford,Oxo,Suite 27,12-14 Turl Street,Business,S
2,5,Slow Sid,France,Avigno,Vaucluse,"2, Rue des Courtisans",,Private,M
3,1,John Smith,UK,Uttoxeter,Staffs,"4, Grove Drive",,Private,M
9,6,,UK,Uttoxeter,Staffs,"5, Grove Drive",,Private,M
4,3,Honest Fred,UK,Stoke,Staffs,100,,Business,S
5,4,Fast Eddie,Wales,Cardiff,,,,Business,L


### == SCD Type 2: add new row ==
This method tracks historical data by creating multiple records for a given natural key in the dimensional tables with separate surrogate keys and/or different version numbers. Unlimited history is preserved for each insert.

In [0]:
%sql 
 
-- ========================================
-- Merge SQL API is available since DBR 5.1
-- ========================================
 
MERGE INTO scd.Client_SCD2
USING (
   -- These rows will either UPDATE the current addresses of existing customers or INSERT the new addresses of new customers
  SELECT updates.ID as mergeKey, updates.*
  FROM scd.Client updates
  
  UNION ALL
  
  -- These rows will INSERT new addresses of existing customers 
  -- Setting the mergeKey to NULL forces these rows to NOT MATCH and be INSERTed.
  SELECT NULL as mergeKey, updates.*
  FROM scd.Client as updates JOIN scd.Client_SCD2 
  ON updates.ID = scd.Client_SCD2.BusinessKey 
  WHERE scd.Client_SCD2.IsCurrent = 1 
  ---AND updates.address <> customers.address 
  
) staged_updates
ON scd.Client_SCD2.BusinessKey = mergeKey
WHEN MATCHED AND scd.Client_SCD2.IsCurrent = 1  THEN  
  UPDATE SET IsCurrent = 0, ValidTo = CURRENT_TIMESTAMP()   
WHEN NOT MATCHED THEN 
  INSERT (BusinessKey, ClientName, Country, Town, County, Address1, Address2, ClientType, ClientSize, ValidFrom, IsCurrent) 
  VALUES(staged_updates.ID, staged_updates.ClientName, staged_updates.County,staged_updates.Town, staged_updates.County, staged_updates.Address1, staged_updates.Address2, staged_updates.ClientType, staged_updates.ClientSize, CURRENT_TIMESTAMP(), 1) 

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
13,6,0,7


In [0]:
%sql
select * from scd.Client_SCD2

ClientID,BusinessKey,ClientName,Country,Town,County,Address1,Address2,ClientType,ClientSize,ValidFrom,ValidTo,IsCurrent
1,2,Bauhaus Motors,Oxo,Oxford,Oxo,Suite 27,12-14 Turl Street,Business,S,2022-09-02T11:43:13.792+0000,2022-09-02T11:45:11.997+0000,0
2,5,Slow Sid,Vaucluse,Avigno,Vaucluse,"2, Rue des Courtisans",,Private,M,2022-09-02T11:43:13.792+0000,2022-09-02T11:45:11.997+0000,0
3,1,John Smith,Staffs,Uttoxeter,Staffs,"4, Grove Drive",,Private,M,2022-09-02T11:43:13.792+0000,2022-09-02T11:45:11.997+0000,0
14,2,Bauhaus Motors,Oxo,Oxford,Oxo,Suite 27,12-14 Turl Street,Business,S,2022-09-02T11:45:11.997+0000,,1
4,6,,Staffs,Uttoxeter,Staffs,"5, Grove Drive",,Private,M,2022-09-02T11:43:13.792+0000,2022-09-02T11:45:11.997+0000,0
15,5,Slow Sid,Vaucluse,Avigno,Vaucluse,"2, Rue des Courtisans",,Private,M,2022-09-02T11:45:11.997+0000,,1
5,3,Honest Fred,Staffs,Stoke,Staffs,100,,Business,S,2022-09-02T11:43:13.792+0000,2022-09-02T11:45:11.997+0000,0
16,1,John Smith,Staffs,Uttoxeter,Staffs,"4, Grove Drive",,Private,M,2022-09-02T11:45:11.997+0000,,1
17,3,Honest Fred,Staffs,Stoke,Staffs,00000 UNKNOWN,,Business,S,2022-09-02T11:45:11.997+0000,,1
11,7,NA-2,Staffs,Uttoxeter,Staffs,"5, Grove Drive",,Private,M,2022-09-02T11:45:11.997+0000,,1


### == Type 3: add new attribute ==
This method tracks changes using separate columns and preserves limited history. The Type 3 preserves limited history as it is limited to the number of columns designated for storing historical data. The original table structure in Type 1 and Type 2 is the same but Type 3 adds additional columns.

In [0]:
%sql
MERGE	into	scd.Client_SCD3				AS DST
USING		scd.Client			AS SRC
ON			(SRC.ID = DST.BusinessKey)

WHEN MATCHED 
AND		(DST.Country <> SRC.Country
		 OR DST.ClientName <> SRC.ClientName)

THEN UPDATE 

SET		DST.Country = SRC.Country
		,DST.ClientName = SRC.ClientName
		,DST.Country_Prev1 = DST.Country
		,DST.Country_Prev1_ValidTo = current_timestamp() - INTERVAL '1' DAY
		,DST.Country_Prev2 = DST.Country_Prev1
		,DST.Country_Prev2_ValidTo = DST.Country_Prev1_ValidTo
        
WHEN NOT MATCHED THEN

INSERT (BusinessKey, ClientName, Country)
VALUES (SRC.ID, SRC.ClientName, SRC.Country)
;

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
1,1,0,0


In [0]:
%sql
select * from scd.Client_SCD3 order by 1

ClientID,BusinessKey,ClientName,Country,Country_Prev1,Country_Prev1_ValidTo,Country_Prev2,Country_Prev2_ValidTo
1,2,Bauhaus Motors,UK,,,,
2,5,Slow Sid,France,,,,
3,1,John Smith,UK,,,,
4,3,Honest Fred,INDIA,UK,2022-09-01T12:17:35.935+0000,,
5,7,NA-2,UK,,,,
6,6,,UK,,,,
7,4,Fast Eddie,Wales,,,,


### == SCD Type 6: combined approach ==
The Type 6 method combines the approaches of types 1, 2 and 3 (1 + 2 + 3 = 6)

In [0]:
%sql 
 
-- ========================================
-- Merge SQL API 
-- ========================================
 
MERGE INTO scd.Client_SCD6
USING (
   -- These rows will either UPDATE the current addresses of existing customers or INSERT the new addresses of new customers
  SELECT updates.ID as mergeKey, updates.*,cast(current_timestamp() AS LONG) as ClientId
  FROM scd.Client updates
  
  UNION ALL
  
  -- These rows will INSERT new addresses of existing customers 
  -- Setting the mergeKey to NULL forces these rows to NOT MATCH and be INSERTed.
  SELECT NULL as mergeKey, updates.*, coalesce(scd.Client_SCD6.ClientId,cast(current_timestamp() AS LONG)) as ClientId
  FROM scd.Client as updates JOIN scd.Client_SCD6 
  ON updates.ID = scd.Client_SCD6.BusinessKey 
  WHERE scd.Client_SCD6.IsCurrent = 1 
  ---AND updates.address <> customers.address 
  
) staged_updates
ON scd.Client_SCD6.BusinessKey = mergeKey
WHEN MATCHED AND scd.Client_SCD6.IsCurrent = 1  THEN  
  UPDATE SET IsCurrent = 0, ValidTo = CURRENT_TIMESTAMP()   
WHEN NOT MATCHED THEN 
  INSERT ( ClientID,BusinessKey, ClientName, Country, Town, County, Address1, Address2, ClientType, ClientSize, ValidFrom, IsCurrent, Country_Prev) 
  VALUES(
  staged_updates.ClientID,staged_updates.ID, staged_updates.ClientName, staged_updates.Country,staged_updates.Town, staged_updates.County, staged_updates.Address1, staged_updates.Address2, staged_updates.ClientType, staged_updates.ClientSize, CURRENT_TIMESTAMP(), 1,staged_updates.Country) 

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
3,1,0,2


In [0]:
%sql
MERGE	into	scd.Client_SCD6				AS DST
USING		scd.Client			AS SRC
ON			(SRC.ID = DST.BusinessKey)

WHEN MATCHED 
AND		(DST.Country <> SRC.Country
		 OR DST.ClientName <> SRC.ClientName)

THEN UPDATE 

SET		DST.Country = SRC.Country
		,DST.ClientName = SRC.ClientName        
WHEN NOT MATCHED THEN

INSERT (
ClientID,BusinessKey, ClientName, Country, Town, County, Address1, Address2, ClientType, ClientSize, ValidFrom, IsCurrent) 
  VALUES(
 cast(current_timestamp() AS LONG), SRC.ID, SRC.ClientName, SRC.Country,SRC.Town, SRC.County, SRC.Address1, SRC.Address2, SRC.ClientType, SRC.ClientSize, CURRENT_TIMESTAMP(), 1) ;

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
3,3,0,0


In [0]:
%sql
MERGE	into	scd.Client_SCD6				AS DST
USING		scd.Client			AS SRC
ON			(SRC.ID = DST.BusinessKey)

WHEN MATCHED 
AND		(DST.Country <> SRC.Country
		 OR DST.ClientName <> SRC.ClientName) and ValidFrom is NULL

THEN UPDATE 

SET		DST.Country_Prev = DST.Country
        
WHEN NOT MATCHED THEN

INSERT (
ClientID,BusinessKey, ClientName, Country, Town, County, Address1, Address2, ClientType, ClientSize, ValidFrom, IsCurrent) 
  VALUES(
 cast(current_timestamp() AS LONG), SRC.ID, SRC.ClientName, SRC.Country,SRC.Town, SRC.County, SRC.Address1, SRC.Address2, SRC.ClientType, SRC.ClientSize, CURRENT_TIMESTAMP(), 1) ;

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
0,0,0,0


In [0]:
%sql
select * from   scd.Client_SCD6 order by row_key 

row_key,ClientID,BusinessKey,ClientName,Country,Town,County,Address1,Address2,ClientType,ClientSize,ValidFrom,ValidTo,IsCurrent,Country_Prev
21,1662384241,3,Honest Fred,HBD5,Stoke,INDIA,00000 UNKNOWN,,Business,S,2022-09-05T13:24:01.751+0000,2022-09-05T13:24:39.106+0000,0,HBD1
23,1662384241,3,Honest Fred,HBD5,Stoke,INDIA,00000 UNKNOWN,,Business,S,2022-09-05T13:24:39.106+0000,2022-09-05T13:25:16.129+0000,0,HBD2
25,1662384241,3,Honest Fred,HBD5,Stoke,INDIA,00000 UNKNOWN,,Business,S,2022-09-05T13:25:16.129+0000,2022-09-05T13:28:41.946+0000,0,HBD3
26,1662384521,8,HONEST TED,UK,Uttoxeter,Staffs,"6, Grove Drive",,Private,M,2022-09-05T13:28:41.946+0000,,1,UK
29,1662384241,3,Honest Fred,HBD5,Stoke,INDIA,00000 UNKNOWN,,Business,S,2022-09-05T13:28:41.946+0000,,1,HBD5
