In [0]:
from pyspark.sql import SparkSession, functions as f

# Reading Hospital A departments data
df_hosa = spark.read.parquet("/mnt/bronze/hosa/departments")

# Reading Hospital B departments data
df_hosb = spark.read.parquet("/mnt/bronze/hosb/departments")

# Union two departments dataframes
df_merged = df_hosa.unionByName(df_hosb)

# Create the dept_id column and reanme deptid to src_dept_id
df_merged = df_merged.withColumn("SRC_Dept_id", f.col("deptid"))\
                     .withColumn("Dept_id", f.concat(f.col("deptid"), f.lit("_"), f.col("datasource"))) \
                     .drop("deptid")

df_merged.createOrReplaceTempView("departments")

In [0]:
df_merged.display()

In [0]:
%sql
USE CATALOG ar_hos_adb_ws; 

CREATE SCHEMA IF NOT EXISTS silver;

CREATE TABLE IF NOT EXISTS silver.departments (
  Dept_Id         STRING,
  SRC_Dept_Id     STRING,
  Name            STRING,
  datasource      STRING,
  is_quarantined  BOOLEAN
) USING DELTA;


In [0]:
%sql
truncate table silver.departments

In [0]:
%sql
insert into silver.departments
SELECT
  Dept_Id,
  SRC_Dept_Id,
  Name,
  datasource,
    CASE
        WHEN SRC_Dept_Id IS NULL OR Name IS NULL THEN TRUE 
        ELSE FALSE
    END AS is_quarantined
FROM departments 
