# PROTECTO ASYNC

## LOADING IN NECESSARY TABLE

In [None]:
from snowflake.snowpark.functions import col, call_udf, array_agg,flatten,parse_json
from snowflake.snowpark.context import get_active_session
import pandas as pd
session = get_active_session()

# Load the top 5 rows from the PII_DATA table
df = session.table("PII_DATA").select("NAME").limit(10)

# Aggregate the selected rows into an array
aggregated_df = df.agg(array_agg(col("NAME")).alias("names_array"))

## 1. ASYNC MASK- AUTO DETECT

In [None]:
# Calling PROTECTO_MASK UDF
protecto_asyn_mask = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_ASYNC_MASK",col("names_array"),"None","None")


tracking_id = aggregated_df.select(protecto_asyn_mask)
tracking_id.show()

## 1.1. PROTECTO ASYNC MASK - STATUS

In [None]:
protecto_async_mask_result = call_udf("PROTECTO_ASYNC_MASK_RESULT","0e4349fc-7a57-4bc9-aa35-442e8ce43d3b02082024123658","status")

auto_df = aggregated_df.select(flatten(protecto_async_mask_result)).select(col("VALUE"))
auto_df.select(flatten(col("VALUE"))).select(col("KEY").alias("Name"),col("VALUE").alias("MAKED_VALUE"))

## 1.2. PROTECTO ASYNC MASK - TOKEN VALUE

In [None]:

protecto_async_mask_result = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_ASYNC_MASK_RESULT","0e4349fc-7a57-4bc9-aa35-442e8ce43d3b02082024123658","token_value")

auto_df = aggregated_df.select(flatten(protecto_async_mask_result)).select(col("VALUE"))
auto_df.select(flatten(col("VALUE"))).select(col("KEY").alias("Name"),col("VALUE").alias("MAKED_VALUE"))

## 1.3. PROTECTO ASYNC MASK - TOXICITY ANALYSIS

In [None]:
protecto_async_mask_result = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_ASYNC_MASK_RESULT","0e4349fc-7a57-4bc9-aa35-442e8ce43d3b02082024123658","toxicity_analysis")

auto_df = aggregated_df.select(flatten(protecto_async_mask_result)).select(col("VALUE"))

toxicity_analysis = auto_df.select(flatten(col("VALUE"))).select(col("KEY").alias("Name"),col("VALUE").alias("TOXICITY_ANALYSIS"))
toxicity_analysis

In [None]:
analysis_knockdown = toxicity_analysis.select(col("NAME"),flatten(col("toxicity_analysis"))).select(col("NAME"),col("KEY"),col("VALUE"))
analysis_knockdown.pivot("KEY",["severe_toxicity","insult","obscene","threat","identity_attack","toxicity"]).sum("VALUE")

## 1.4. PROTECTO ASYNC MASK RESULT - RAW DATA

In [None]:
protecto_async_mask_result = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_ASYNC_MASK_RESULT","0e4349fc-7a57-4bc9-aa35-442e8ce43d3b02082024123658","raw_json")

auto_df = aggregated_df.select(protecto_async_mask_result)
auto_df

## 2.PROTECTO ASYNC MASK - FORMAT & TOKEN TYPE

In [None]:
# Calling PROTECTO_MASK UDF
protecto_asyn_mask = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_ASYNC_MASK",col("names_array"),"Text Token","Person Name")


tracking_id = aggregated_df.select(protecto_asyn_mask)
tracking_id.show()

## 2.1 PROTECTO ASYNC MASK - TOKEN VALUE

In [None]:
protecto_async_mask_result = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_ASYNC_MASK_RESULT","afafb76f-07f6-4018-8662-c132c47e22dc02082024125354","token_value")

token_df = aggregated_df.select(flatten(protecto_async_mask_result)).select(col("VALUE"))
token_df.select(flatten(col("VALUE"))).select(col("KEY").alias("Name"),col("VALUE").alias("MAKED_VALUE"))

## WRITING ASYNC MASKED TO TABLE

In [None]:
write_df = token_df.select(flatten(col("VALUE"))).select(col("KEY").alias("Name"),col("VALUE").alias("MASKED_VALUE"))
write_df.write.mode("overwrite").save_as_table("MASKED_PII_NAME_ASYNC")

## 3. PROTECTO ASYNC UNMASK

In [None]:
from snowflake.snowpark.functions import col, call_udf, array_agg,flatten


# Load the top 5 rows from the PII_DATA table
masked_pii_name = session.table("MASKED_PII_NAME_ASYNC")

# Aggregate the selected rows into an array
aggregated_masked = masked_pii_name.agg(array_agg(col("MASKED_VALUE")).alias("masked_array"))


In [None]:
protecto_async_unmask = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_ASYNC_UNMASK",col("masked_array"))
tracking_id = aggregated_df.select(protecto_asyn_mask)
tracking_id.show()

## 3.1  PROTECTO ASYNC UNMASK RESULT

In [None]:
protecto_async_unmask_result = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_ASYNC_UNMASK_RESULT","1f9b85d0-0026-4d45-9cf5-53bc6d7f0c7a02082024130449","value")


value_df = aggregated_masked.select(flatten(protecto_async_unmask_result)).select(col("VALUE"))
pd.concat([masked_pii_name.to_pandas(), value_df.select(col("VALUE").alias("ORIGINAL_NAME")).to_pandas()], axis=1).head(10)