In [3]:
# expr("base64(aes_encrypt(cpf, '1234567890asdfghjklç', 'ECB', 'PKCS'))")

In [93]:
from pyspark.sql import SparkSession, Row
from pyspark.sql.functions import expr, aes_decrypt, base64, unbase64, unhex, lit, aes_encrypt

In [35]:
# Create SparkSession
spark = (SparkSession.builder
           .appName('Criptografia')
           .config("packages", "org.apache.spark:mysql-connector-java-8.0.13.jar")
           .getOrCreate()
        )

In [46]:
df = spark.createDataFrame([(
    "AAAAAAAAAAAAAAAAAAAAAPSd4mWyMZ5mhvjiAPQJnfg=",
    "abcdefghijklmnop12345678ABCDEFGH", "CBC", "DEFAULT",)],
    ["input", "key", "mode", "padding"]
)
df.select(aes_decrypt(
    unbase64(df.input), df.key, df.mode, df.padding).alias('r')
).collect()
# [Row(r=bytearray(b'Spark'))]

[Row(r=bytearray(b'Spark'))]

In [47]:
df.select(aes_decrypt(unbase64(df.input), df.key, df.mode).alias('r')).collect()
# [Row(r=bytearray(b'Spark'))]

[Row(r=bytearray(b'Spark'))]

In [None]:
df = spark.createDataFrame([(
    "83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94",
    "0000111122223333",)],
    ["input", "key"]
)
df.select(aes_decrypt(unhex(df.input), df.key).alias('r')).collect()
# [Row(r=bytearray(b'Spark'))]

In [103]:
df = spark.createDataFrame([(
    "AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4",
    "abcdefghijklmnop12345678ABCDEFGH", "GCM", "DEFAULT",
    "This is an AAD mixed into the input",)],
    ["input", "key", "mode", "padding", "aad"]
)
df.select(aes_decrypt(
    unbase64(df.input), df.key, df.mode, df.padding, df.aad).alias('r')
).collect()
# [Row(r=bytearray(b'Spark'))]

[Row(r=bytearray(b'Spark'))]

In [116]:
simpleData = [("James","Sales","NY",90000,34,10000, '111.222.333-44')]
schema = ["employee_name","department","state","salary","age","bonus", 'cpf']
df_pessoas = spark.createDataFrame(data=simpleData, schema = schema)

# Cria uma nova coluna chamada "cpf_criptografado" no DataFrame "df_pessoas"
# df_pessoas = df_pessoas.withColumn("cpf_criptografado", expr("base64(aes_encrypt(cpf, 'minhachavede128b', 'ECB', 'PKCS'))"))
df_pessoas = df_pessoas.withColumn("cpf_criptografado", base64(aes_encrypt('cpf', lit('minhachavede128b'), lit('ECB'), lit('PKCS'))))
df_pessoas.show(truncate=False)

+-------------+----------+-----+------+---+-----+--------------+------------------------+
|employee_name|department|state|salary|age|bonus|cpf           |cpf_criptografado       |
+-------------+----------+-----+------+---+-----+--------------+------------------------+
|James        |Sales     |NY   |90000 |34 |10000|111.222.333-44|CX4CAhPGbwAcB3KpNkHlwQ==|
+-------------+----------+-----+------+---+-----+--------------+------------------------+



In [120]:
df_decript = df_pessoas.select(aes_decrypt(unbase64('cpf_criptografado'), lit('minhachavede128b'), lit('ECB'), lit('PKCS')).cast('string').alias('r'))
df_decript.show()

+--------------+
|             r|
+--------------+
|111.222.333-44|
+--------------+



In [None]:
from pyspark.sql.functions import expr

# Cria uma nova coluna chamada "cpf_descriptografado" no DataFrame "df_pessoas" 
df_pessoas = (
        df_pessoas.select('cpf'
            ,aes_encrypt('cpf', lit('minhachavede128b'), 'ECB', 'PKCS')
            ,base64('cpf').cast('string').alias('cript')
            ,unbase64('cpf_criptografado').cast('string').alias('descript'))
    )
# Mostra o DataFrame resultante com a coluna "cpf_descriptografado" 
df_pessoas.show(truncate=False)   