#### Slowly Changing Dimensions (SCD) Type 3

In [0]:
%sql

DROP TABLE IF EXISTS students;

CREATE TABLE students(
  id int,
  name string,
  curr_loc string,
  prev_loc string
);

SELECT * FROM students;

id,name,curr_loc,prev_loc


In [0]:
%sql

INSERT INTO students VALUES
(1,'PrasadP','Bangalore',null),
(2,'Satish','Bangalore','Guntur'),
(3,'PrasadA','Bangalore','Pune');

SELECT * FROM students;

id,name,curr_loc,prev_loc
1,PrasadP,Bangalore,
2,Satish,Bangalore,Guntur
3,PrasadA,Bangalore,Pune


In [0]:
%sql

DROP TABLE IF EXISTS stu_source1;

CREATE TABLE stu_source1(
  id int,
  name string,
  curr_loc string
);

SELECT * FROM stu_source1;

id,name,curr_loc


In [0]:
%sql

INSERT INTO stu_source1 VALUES
(1,'PrasadP','Shridi'),
(3,'PrasadA','Latur'),
(4,'Ankit','Bangalore'),
(5,'Sriram','Bangalore');

SELECT * FROM stu_source1;

id,name,curr_loc
1,PrasadP,Shridi
3,PrasadA,Latur
4,Ankit,Bangalore
5,Sriram,Bangalore


In [0]:
%sql

-- Method1: MERGING the source into target

MERGE INTO students as tgt
USING stu_source1 as src
ON tgt.id = src.id
WHEN MATCHED AND tgt.curr_loc <> src.curr_loc THEN
UPDATE SET tgt.prev_loc = tgt.curr_loc, tgt.curr_loc = src.curr_loc
WHEN NOT MATCHED THEN
INSERT (id,name,curr_loc,prev_loc) VALUES(id,name,curr_loc,null);

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
4,2,0,2


In [0]:
%sql

SELECT * FROM students;

id,name,curr_loc,prev_loc
1,PrasadP,Shridi,Bangalore
3,PrasadA,Latur,Bangalore
4,Ankit,Bangalore,
5,Sriram,Bangalore,
2,Satish,Bangalore,Guntur


In [0]:
%sql

-- Merging using second method

-- before that lets create a new source table

DROP TABLE IF EXISTS stu_source2;

CREATE TABLE stu_source2(
  id int,
  name string,
  curr_loc string
);

SELECT * FROM stu_source2;

id,name,curr_loc


In [0]:
%sql

INSERT INTO stu_source2 VALUES
(3,'PrasadA','Pune'),
(4,'Ankit','Pune');

SELECT * FROM stu_source2;

id,name,curr_loc
3,PrasadA,Pune
4,Ankit,Pune


In [0]:
%sql

-- Method 2: Here as well we will use MERGE operation, but we will first update the source table by joining it with target table and an additional prev_loc column using case when condition. Now we can directly merge this based on matched and unmatched merge condition.

SELECT source.id, 
    source.name, 
    source.curr_loc,
    CASE WHEN source.curr_loc <> target.curr_loc THEN target.curr_loc
    ELSE target.prev_loc END as prev_loc
FROM stu_source2 as source
LEFT JOIN students as target
ON source.id = target.id 

id,name,curr_loc,prev_loc
3,PrasadA,Pune,Latur
4,Ankit,Pune,Bangalore


In [0]:
%sql

-- Now implementing the above query in merge statement

MERGE INTO students as tgt
USING (
  SELECT source.id, 
    source.name, 
    source.curr_loc,
    CASE WHEN source.curr_loc <> target.curr_loc THEN target.curr_loc
    ELSE target.prev_loc END as prev_loc
FROM stu_source2 as source
LEFT JOIN students as target
ON source.id = target.id 
) as src
ON tgt.id = src.id
WHEN MATCHED THEN
UPDATE SET *
WHEN NOT MATCHED THEN
INSERT *;

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
2,2,0,0


In [0]:
%sql

SELECT * FROM students;

id,name,curr_loc,prev_loc
1,PrasadP,Shridi,Bangalore
5,Sriram,Bangalore,
2,Satish,Bangalore,Guntur
3,PrasadA,Pune,Latur
4,Ankit,Pune,Bangalore
