In [1]:
import pandas as pd

In [2]:
users = pd.read_csv('users.csv')

### Create streaming bronze table

In [None]:
# Load only users and updated users data everytime pipeline loads
CREATE STREAMING LIVE TABLE Users_BronzeLiveIncremental

-- optionally define the schema


USING DELTA 

PARTITIONED BY (product_id)

COMMENT "Live Bronze table for LoanApplications"

AS

SELECT  user_id::Int, first_name::String, last_name::String gender::String, 
        avg_airtime::Double, date_of_birth::Date, education_status::String, 
        employment_status::String, bank::String, monthly_income::Double,
        number_children::Int, owns_car::Boolean, payment_system::String,
        state_of_origin: String, professional_category::String, email_is_validated::Boolean,
                                
        INPUT_FILE_NAME() AS FileName
        CURRENT_TIMESTAMP() AS CreatedOn
        
FROM cloud_files(
    
                    "/mtn/datalake/Raw/Users/",
                    "csv",
                    map("inferSchema","true")
                )

### Create streaming bronze view

In [None]:
CREATE STREAMING LIVE VIEW Users_BronzeLiveIncrementalView
(    
    CONSTRAINT Valid_user_id   EXPECT (user_id IS NOT NULL AND user_id > 0) ON VIOLATION FAIL UPDATE
)

AS

SELECT  user_id, first_name, last_name, gender, avg_airtime, date_of_birth, 
        education_status, employment_status, bank, monthly_income, number_of_children, 
        owns_car, state_of_origin, professional_category, CreatedOn,
        DATEDIFF(CURRENT_DATE(), date_of_birth) / 365.25 AS age

FROM STREAM(live.UsersBronzeLiveIncremental)

### Create live table

In [None]:
CREATE LIVE TABLE ExistingUsers_BronzeLive

COMMENT "Bronze Live Table Existing Users"

AS

SELECT *
FROM parquet.`/mnt/datalake/Raw/users.parquet`

### Merge into Existing Users incrementally

In [None]:
APPLY CHANGES INTO live.ExistingUsers_BronzeLive

FROM STREAM(Users_BronzeLiveIncrementalView)

KEYS (user_id)


# APPLY AS DELETE WHEN OPERATION = 'DELETE'

SEQUENCE BY CreatedOn