#### Slowly Changing Dimensions(SCD) Type 0 scenario is only append. If source data is sending new data then we can use this option

In [0]:
# %fs ls dbfs:/user/hive/warehouse/dept

# %fs rm -r dbfs:/user/hive/warehouse/dept

In [0]:
%sql

DROP TABLE IF EXISTS EMP;

CREATE TABLE EMP(EID INT,ENAME VARCHAR(20),ELOC VARCHAR(20));

SELECT * FROM EMP;

EID,ENAME,ELOC


In [0]:
%sql

INSERT INTO EMP VALUES(1,'PrasadP','Shridi'),(2,'Satish','Guntur'),(3,'PrasadA','Lathur');

num_affected_rows,num_inserted_rows
3,3


In [0]:
%sql

-- Another way of inserting records into a table

INSERT INTO EMP 
SELECT 1 as EID, 'PrasadP' as ENAME, 'Shridi' as ELOC
UNION ALL
SELECT 2 as EID, 'Satish' as ENAME, 'Guntur' as ELOC
UNION ALL
SELECT 3 as EID, 'PrasadA' as ENAME, 'Lathur' as ELOC;

SELECT * FROM EMP;

EID,ENAME,ELOC
1,PrasadP,Shridi
3,PrasadA,Lathur
2,Satish,Guntur


In [0]:
%sql

-- creating a source table containing new data
DROP TABLE IF EXISTS EMP_SOURCE;

CREATE TABLE EMP_SOURCE(EID INT,ENAME VARCHAR(20),ELOC VARCHAR(20));

SELECT * FROM EMP_SOURCE;

EID,ENAME,ELOC


In [0]:
%sql

INSERT INTO EMP_SOURCE
SELECT 4 AS EID, 'Ankit' AS ENAME, 'Jamshedpur' AS ELOC
UNION ALL
SELECT 2 AS EID, 'Satish' AS ENAME, 'Guntur' AS ELOC
UNION ALL
SELECT 3 AS EID, 'PrasadA' AS ENAME, 'Lathur' AS ELOC
UNION ALL
SELECT 5 AS EID, 'Sriram' AS ENAME, 'Rajmundary' AS ELOC;

SELECT * FROM EMP_SOURCE;

EID,ENAME,ELOC
5,Sriram,Rajmundary
4,Ankit,Jamshedpur
3,PrasadA,Lathur
2,Satish,Guntur


In [0]:
%sql

-- Now checking the new data by comparing both the table

-- Method 1
-- SELECT * FROM EMP_SOURCE WHERE EID NOT IN (SELECT EID FROM EMP);

-- Method 2 using ANTI JOIN
SELECT * FROM EMP_SOURCE src ANTI JOIN EMP tgt ON src.EID = tgt.EID;

EID,ENAME,ELOC
5,Sriram,Rajmundary
4,Ankit,Jamshedpur


In [0]:
%sql

-- Now inserting the new records using above condition. Note SCD Type 0 only appends the new data into existing table. It doesnt change the existing data in case of any changes. We can use either method 1 or 2 for selecting new data

INSERT INTO EMP SELECT * FROM EMP_SOURCE src ANTI JOIN EMP tgt ON src.EID = tgt.EID;

num_affected_rows,num_inserted_rows
2,2


In [0]:
%sql

SELECT * FROM EMP;

EID,ENAME,ELOC
5,Sriram,Rajmundary
4,Ankit,Jamshedpur
1,PrasadP,Shridi
3,PrasadA,Lathur
2,Satish,Guntur


#### SCD Type 1: Involves UPSERT, which includes adding new data and overwriting the history.

#### Example 1: employee table

In [0]:
%sql

DROP TABLE IF EXISTS emp_target;

CREATE TABLE emp_target(
  eid INT,
  ename VARCHAR(20),
  eloc VARCHAR(20)
);

SELECT * FROM emp_target;

eid,ename,eloc


In [0]:
%sql

INSERT INTO emp_target VALUES
(1,'PrasadP','Shridi'),
(2,'Satish','Bangalore'),
(3,'PrasadA','Latur');

SELECT * FROM emp_target;

eid,ename,eloc
1,PrasadP,Shridi
2,Satish,Bangalore
3,PrasadA,Latur


In [0]:
%sql

DROP TABLE IF EXISTS emp_source;

CREATE TABLE emp_source(
  id INT,
  name VARCHAR(20),
  loc VARCHAR(20)
);

SELECT * FROM emp_source;

id,name,loc


In [0]:
%sql

INSERT INTO emp_source
SELECT 1 as id, 'PrasadP' as name, 'Hyderabad' as loc
UNION ALL
SELECT 4 as id, 'Ankit' as name, 'Bangalore' as loc
UNION ALL
SELECT 5 as id, 'Sriram' as name, 'Bangalore' as loc;

SELECT * FROM emp_source;

id,name,loc
1,PrasadP,Hyderabad
5,Sriram,Bangalore
4,Ankit,Bangalore


In [0]:
%sql

-- Now we will merge source table into target table

MERGE INTO emp_target as t
USING emp_source as s
ON t.eid = s.id
WHEN MATCHED THEN
UPDATE SET t.ename=s.name,t.eloc=s.loc
WHEN NOT MATCHED THEN
INSERT(eid,ename,eloc) VALUES (id,name,loc);

SELECT * FROM emp_target;

eid,ename,eloc
1,PrasadP,Hyderabad
5,Sriram,Bangalore
4,Ankit,Bangalore
2,Satish,Bangalore
3,PrasadA,Latur


#### Example 2: Event table

##### In this example we will consider EVENTS table which has the data. We will use upsert UPDATES table into event table. Also we will delete the record from EVENTS table if delete flag present in UPDATES table indicates true, else we will either update or insert for false flag

In [0]:
%sql

DROP TABLE IF EXISTS events;

CREATE TABLE events(event_id int, event_date date,data string,delete boolean);

In [0]:
%sql

INSERT INTO events VALUES
(1,'2023-01-01','Hello World',0),
(2,'2023-01-02','How are you',0),
(3,'2023-01-03','How do you do',0),
(4,'2023-01-04','Have you received your coupon',0);

SELECT * FROM events;

event_id,event_date,data,delete
1,2023-01-01,Hello World,False
2,2023-01-02,How are you,False
3,2023-01-03,How do you do,False
4,2023-01-04,Have you received your coupon,False


In [0]:
%sql

DROP TABLE IF EXISTS updates;

CREATE TABLE updates(
  event_id int,
  event_date date,
  data string,
  delete boolean
);

SELECT * FROM updates;

event_id,event_date,data,delete


In [0]:
%sql

INSERT INTO updates VALUES
(5,'2023-01-05',"What's up",0),
(1,'2023-02-01',"Where have you been",0),
(2,'2023-02-02',"Why did you leave early",1),
(6,'2023-01-06',"When is the time",0);

SELECT * FROM updates;

event_id,event_date,data,delete
5,2023-01-05,What's up,False
1,2023-02-01,Where have you been,False
2,2023-02-02,Why did you leave early,True
6,2023-01-06,When is the time,False


In [0]:
%sql

-- code for merge operation with delete record as per flag

MERGE INTO events as src
USING updates as tgt
ON src.event_id=tgt.event_id
WHEN MATCHED AND tgt.delete=1 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET src.event_date=tgt.event_date, src.data=tgt.data
WHEN NOT MATCHED THEN
INSERT (event_id,event_date,data,delete) VALUES (event_id,event_date,data,delete);

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
4,1,1,2


In [0]:
%sql

SELECT * FROM events;

event_id,event_date,data,delete
3,2023-01-03,How do you do,False
4,2023-01-04,Have you received your coupon,False
5,2023-01-05,What's up,False
1,2023-02-01,Where have you been,False
6,2023-01-06,When is the time,False
