# Welcome to the ABAC + Data Classification Demo!

To get started fill in the Catalog and Schema variables below, and start a Data Classification on your Catalog

In [0]:
# Specify a Catalog and Schema name you'd like to add a table to. If the Catalog or Schema doesn't exist, the setup script will create the appropriate object for you.

catalog = # "TO DO"
schema = # "TO DO"

In [0]:
dbutils.notebook.run("Setup/Setup Notebook", 300, {"catalog": f"{catalog}", "schema": f"{schema}"})

# Stop!

Before continuing, please ensure that you've [started a Data Classification job](https://docs.databricks.com/aws/en/data-governance/unity-catalog/data-classification#enable-data-classification) at the Catalog level that includes the Schema you specified above. Once done, run all of the following cells.

In [0]:
# Masks any SSN input by returning the last four digits of the SSN
spark.sql(
    f"CREATE OR REPLACE FUNCTION {catalog}.{schema}.mask_SSN(ssn STRING) RETURN CONCAT('***-**-', RIGHT(ssn, 4))"
) 

In [0]:
# Creates Tag Policy based on detected SSNs
spark.sql(
    f"""
    CREATE OR REPLACE POLICY mask_SSN
      ON SCHEMA {catalog}.{schema}
      COMMENT 'Mask SSN by only showing last four digits'
      COLUMN MASK {catalog}.{schema}.mask_SSN
      TO `account users`
      FOR TABLES
      MATCH COLUMNS
        hasTag('class.us_ssn') as ssn
      ON COLUMN ssn;
    """
)

In [0]:
# Masks any credit card input by returning the first six digits of the credit card number 
spark.sql(
    f"CREATE OR REPLACE FUNCTION {catalog}.{schema}.mask_CCNumber(cc_number STRING) RETURN CONCAT(LEFT(cc_number, 6), '**********');"
) 

In [0]:
spark.sql(
    f"""
    CREATE OR REPLACE POLICY mask_CC
      ON SCHEMA {catalog}.{schema}
      COMMENT 'Mask Credit Card by only showing first six digits'
      COLUMN MASK {catalog}.{schema}.mask_CCNumber
      TO `account users`
      FOR TABLES
      MATCH COLUMNS
        hasTag('class.credit_card') as cc
      ON COLUMN cc;
    """
)

In [0]:
# Masks any email input by randomizing the string 
spark.sql(
    f"CREATE OR REPLACE FUNCTION {catalog}.{schema}.mask_email(email STRING) RETURN RIGHT(uuid(), LEN(email));"
) 

In [0]:
spark.sql(
    f"""
    CREATE OR REPLACE POLICY mask_email
      ON SCHEMA {catalog}.{schema}
      COMMENT 'Mask email by randomizing string'
      COLUMN MASK {catalog}.{schema}.mask_email
      TO `account users`
      FOR TABLES
      MATCH COLUMNS
        hasTag('class.email_address') as email
      ON COLUMN email;
    """
)

In [0]:
# Masks any phone number input by returning the middle three digits of the phone number
spark.sql(
    f"CREATE OR REPLACE FUNCTION {catalog}.{schema}.mask_phone_number(phone_number STRING) RETURN CONCAT('***', SUBSTRING (phone_number, 6,5), '****');"
) 

In [0]:
spark.sql(
    f"""
    CREATE OR REPLACE POLICY mask_phone_number
      ON SCHEMA {catalog}.{schema}
      COMMENT 'Mask phone number by showing the middle three digits'
      COLUMN MASK {catalog}.{schema}.mask_phone_number
      TO `account users`
      FOR TABLES
      MATCH COLUMNS
        hasTag('class.phone_number') as phone_number
      ON COLUMN phone_number;
    """
)

In [0]:
# Masks any location input by randomizing the string 
spark.sql(
    f"CREATE OR REPLACE FUNCTION {catalog}.{schema}.mask_location(location STRING) RETURN RIGHT(uuid(), LEN(location));"
) 

In [0]:
# Masks any IP address input by randomizing the string
spark.sql(
    f"CREATE OR REPLACE FUNCTION {catalog}.{schema}.mask_ip(ip STRING) RETURN '1.1.1.1'"
)

In [0]:
spark.sql(
    f"""
    CREATE OR REPLACE POLICY mask_ip
      ON SCHEMA {catalog}.{schema}
      COMMENT 'Mask phone number by showing the middle three digits'
      COLUMN MASK {catalog}.{schema}.mask_ip
      TO `account users`
      FOR TABLES
      MATCH COLUMNS
        hasTag('class.ip_address') as ip_address
      ON COLUMN ip_address;
    """
)

# Congrats! 
You made it to the end of the notebook. Once the data classification is complete, navigate to the table you created and view your masked data.