# DATA ENCRIPTION

## Create Delta Table

In [0]:
from delta.tables import *

In [0]:
dbutils.fs.rm('dbfs:/user/hive/warehouse/ssn_table/', True)

Out[14]: True

In [0]:
DeltaTable.createOrReplace(spark) \
    .tableName("ssn_table") \
    .addColumn("id", "INT") \
    .addColumn("name", "STRING") \
    .addColumn("gender", "STRING") \
    .addColumn("ssn", "STRING") \
    .addColumn("dept", "STRING") \
    .execute()

Out[15]: <delta.tables.DeltaTable at 0x7fa133d78b80>

In [0]:
%sql

INSERT INTO ssn_table VALUES (1, "Carl Mike", "m", "170000", "Support");
INSERT INTO ssn_table VALUES (2, "Mikel Clark", "m", "254300", "IT");
INSERT INTO ssn_table VALUES (3, "Bob Smith", "m", "220000", "IT");
INSERT INTO ssn_table VALUES (4, "Mary Scala", "f", "230000", "Sales");;
INSERT INTO ssn_table VALUES (5, "Susan Liam", "f", "150000", "Sales");
INSERT INTO ssn_table VALUES (6, "Xi Wuan", "f", "150000", "IT");
INSERT INTO ssn_table VALUES (7, "Luis Fuentes", "m", "257100", "IT");
INSERT INTO ssn_table VALUES (8, "Pedro Jose", "m", "254300", "IT");

num_affected_rows,num_inserted_rows
1,1


In [0]:
%sql

SELECT * FROM ssn_table;

id,name,gender,ssn,dept
1,Carl Mike,m,170000,Support
4,Mary Scala,f,230000,Sales
5,Susan Liam,f,150000,Sales
7,Luis Fuentes,m,257100,IT
2,Mikel Clark,m,254300,IT
8,Pedro Jose,m,254300,IT
3,Bob Smith,m,220000,IT
6,Xi Wuan,f,150000,IT


## Pip install cryptography

In [0]:
!pip install cryptography

You should consider upgrading via the '/local_disk0/.ephemeral_nfs/envs/pythonEnv-218d0b2b-b715-4e22-b4c2-21abdc5753fa/bin/python -m pip install --upgrade pip' command.[0m


## Generate key

In [0]:
from cryptography.fernet import Fernet

key = Fernet.generate_key()
f = Fernet(key)

## Encript Sample Data

In [0]:
sensitive_data = b"sensitive_email@gmail.com"
encript_test_data = f.encrypt(sensitive_data)
print(encript_test_data)

b'gAAAAABm0g_0mHQdnoQNpq_Ddd3FdUPlcP2QXkPyZdh9dlvZQkQwF5QJhVT1Man6CZuk4445lpg-HuD4dT2NkNaJwTDmE5KwAljzlgMtYMKnmdj3dEWiCxY='


## Decript Sample Data

In [0]:
print(f.decrypt(encript_test_data))

b'sensitive_email@gmail.com'


## Define UDFs

In [0]:
def encrypt_data(data, key):
    from cryptography.fernet import Fernet
    f = Fernet(key)
    data_b = bytes(data, 'utf-8')
    return str(f.encrypt(data_b).decode('ascii'))

In [0]:
def decrypt_data(data, key):
    from cryptography.fernet import Fernet
    f = Fernet(key)
    return f.decrypt(data.encode()).decode()

## Register UDFs

In [0]:
from pyspark.sql.functions import udf, lit, md5
from pyspark.sql.types import StringType

In [0]:
encryption = udf(encrypt_data, StringType())
decryption = udf(decrypt_data, StringType())

## Encrypt data using UDF

In [0]:
df = spark.table("ssn_table")

df_encrypt = df.withColumn("ssn_encript", encryption("ssn", lit(key)))

df_encrypt.show(truncate=False)

+---+------------+------+------+-------+----------------------------------------------------------------------------------------------------+
|id |name        |gender|ssn   |dept   |ssn_encript                                                                                         |
+---+------------+------+------+-------+----------------------------------------------------------------------------------------------------+
|1  |Carl Mike   |m     |170000|Support|gAAAAABm0hCqlTZeS1oJdYRI0leRLLuZmAOHYCkneuXGAJjmMwuv69oZEQEvkjjApygPG4K12enHBINNkKE45vciVxKomliWqA==|
|4  |Mary Scala  |f     |230000|Sales  |gAAAAABm0hCrxz2BkTeg5VKvBFE7dFhzEhT2c8C5GOpXcoy0kEeykC6h-xTUzLy2DzRn90Zf9l3-IODEj8weoHczH_1haZgLmg==|
|5  |Susan Liam  |f     |150000|Sales  |gAAAAABm0hCrgJM_qd850V6IQSSKOAPj_2zFWcAwO_8NJby2Gf27e1iNC-KqkgaothRN-Tz51n8vNJQXPrTESxUeFlFAZTEiZA==|
|7  |Luis Fuentes|m     |257100|IT     |gAAAAABm0hCrkOfVb0FybPXo6mqJSDVQKZUznbl7X0TSrctbgnvjxSzvlBc3xesECEeCowekYWlj3b3qkMsYhyW_gYVe6RhVaQ==|
|2  |M

## Decrypt data using UDF

In [0]:
df_decrypt = df_encrypt.withColumn("ssn_decript", decryption("ssn_encript", lit(key))).drop("ssn_encript")

df_decrypt.show(truncate=False)

+---+------------+------+------+-------+-----------+
|id |name        |gender|ssn   |dept   |ssn_decript|
+---+------------+------+------+-------+-----------+
|1  |Carl Mike   |m     |170000|Support|170000     |
|4  |Mary Scala  |f     |230000|Sales  |230000     |
|5  |Susan Liam  |f     |150000|Sales  |150000     |
|7  |Luis Fuentes|m     |257100|IT     |257100     |
|2  |Mikel Clark |m     |254300|IT     |254300     |
|8  |Pedro Jose  |m     |254300|IT     |254300     |
|3  |Bob Smith   |m     |220000|IT     |220000     |
|6  |Xi Wuan     |f     |150000|IT     |150000     |
+---+------------+------+------+-------+-----------+

