# PROTECTO SYNC

## LOADING IN NECESSARY TABLE

In [None]:
from snowflake.snowpark.functions import col, call_udf, array_agg,flatten
from snowflake.snowpark.context import get_active_session
import pandas as pd
session = get_active_session()

# Load the top 5 rows from the PII_DATA table
df = session.table("PII_DATA").select("NAME").limit(10)

# Aggregate the selected rows into an array
aggregated_df = df.agg(array_agg(col("NAME")).alias("names_array"))

## 1.PROTECTO MASK -  AUTO DETECT

In [None]:
# Calling PROTECTO_MASK UDF
protecto_mask = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_MASK",col("names_array"),"None","None","token_value")


auto_df = aggregated_df.select(flatten(protecto_mask)).select(col("VALUE"))
pd.concat([df.to_pandas(), auto_df.to_pandas()], axis=1).head(5)

## 1.1 PROTECTO AUTO DETECT - TOXICITY ANALYSIS

In [None]:
# Calling PROTECTO_MASK UDF
protecto_mask = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_MASK",col("names_array"),"None","None","toxicity_analysis")


auto_df = aggregated_df.select(flatten(protecto_mask)).select(col("VALUE"))
pd.concat([df.to_pandas(), auto_df.to_pandas()], axis=1).head(5)

## 1.2 PROTECTO AUTO DETECT - TOXICITY ANALYSIS - REFINED

In [None]:
# Calling PROTECTO_MASK UDF
protecto_mask = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_MASK",col("names_array"),"None","None","toxicity")

auto_df = aggregated_df.select(flatten(protecto_mask)).select(col("VALUE"))
pd.concat([df.to_pandas(), auto_df.to_pandas()], axis=1).head(5)

## 1.3 PROTECTO AUTO DETECT - RAW JSON

In [None]:
protecto_mask = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_MASK",col("names_array"),"None","None","raw_json")

auto_df = aggregated_df.select(protecto_mask)
auto_df

## 2. PROTECTO MASK - FORMAT & TOKEN TYPE

In [None]:
# Calling PROTECTO_MASK UDF
protecto_mask = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_MASK",col("names_array"),"Text Token","Person Name","token_value")


token_df = aggregated_df.select(flatten(protecto_mask)).select(col("VALUE"))
pd.concat([df.to_pandas(), token_df.to_pandas()], axis=1).head()


## WRITING MASKED RESULTS TO TABLE

In [None]:
masked_df = pd.concat([df.to_pandas(), token_df.to_pandas()], axis=1)
df_snowpark = session.create_dataframe(masked_df)

# Write the Snowpark DataFrame to a Snowflake table
df_snowpark.write.mode("overwrite").save_as_table("MASKED_PII_NAME")

## 3 PROTECTO UNMASK

In [None]:
from snowflake.snowpark.functions import col, call_udf, array_agg,flatten


# Load the top 5 rows from the PII_DATA table
masked_pii_name = session.table("MASKED_PII_NAME")

# Aggregate the selected rows into an array
aggregated_masked = masked_pii_name.agg(array_agg(col("VALUE")).alias("masked_array"))
aggregated_masked.show()

In [None]:
protecto_unmask = call_udf("PROTECTO_VAULT.VAULT_SCHEMA.PROTECTO_UNMASK",col("masked_array"))


value_df = aggregated_masked.select(flatten(protecto_unmask)).select(col("VALUE"))
#value_df.show()
pd.concat([masked_pii_name.to_pandas(), value_df.select(col("VALUE").alias("ORIGINAL_NAME")).to_pandas()], axis=1).head(10)