In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf, col
from pyspark.sql.types import StringType

from Crypto.Cipher import AES
import base64
from pyspark.sql.functions import col, lit, when, regexp_replace, sha2, concat_ws
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DateType
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession, SQLContext
import os
import sys
from pyspark.sql.functions import *
import psycopg2

In [2]:
os.environ["JAVA_HOME"] = "C:/Users/User/AppData/Local/Programs/Eclipse Adoptium/jdk-11.0.25.9-hotspot"
os.environ["PYSPARK_PYTHON"] = "C:/Users/User/AppData/Local/Programs/Python/Python311/python.exe"
os.environ["PYSPARK_DRIVER_PYTHON"] = "C:/Users/User/AppData/Local/Programs/Python/Python311/python.exe"


In [3]:
conf = SparkConf() \
    .setAppName("Encryption Example") \
    .setMaster("local") \
    .set("spark.driver.extraClassPath","C:/jars/*")

In [4]:
sc = SparkContext.getOrCreate(conf=conf)
spark = SparkSession(sc)

In [5]:
spark

In [6]:
from Crypto.Cipher import AES
import base64

key = b'ThisIsASecretKey'  # 16 bytes key (for AES-128)

def pad(text):
    """Pads text to be a multiple of 16 bytes"""
    pad_len = 16 - (len(text.encode('utf-8')) % 16)
    return text + chr(pad_len) * pad_len

def unpad(text):
    """Removes padding"""
    pad_len = ord(text[-1])
    return text[:-pad_len]

def encrypt(plain_text):
    try:
        cipher = AES.new(key, AES.MODE_ECB)
        padded_text = pad(plain_text)
        encrypted_bytes = cipher.encrypt(padded_text.encode('utf-8'))
        encoded = base64.b64encode(encrypted_bytes).decode('utf-8')
        return encoded
    except Exception as e:
        print(f"Encryption error: {e}")
        return None

def decrypt(encrypted_text):
    try:
        cipher = AES.new(key, AES.MODE_ECB)
        decoded_encrypted_bytes = base64.b64decode(encrypted_text)
        decrypted_bytes = cipher.decrypt(decoded_encrypted_bytes)
        decrypted_text = decrypted_bytes.decode('utf-8')
        return unpad(decrypted_text)
    except Exception as e:
        print(f"Decryption error: {e}")
        return None


In [7]:
encrypt_udf = udf(encrypt, StringType())
decrypt_udf = udf(decrypt, StringType())

data = [
    ("1", "john.doe@example.com", "+359812345678"),
    ("2", "maria.ivanova@example.com", "+359876543210")
]

columns = ["id", "email", "phone_number"]

df = spark.createDataFrame(data, columns)

In [8]:
sample_email = "john.doe@example.com"

enc_email = encrypt(sample_email)
print(f"Encrypted: {enc_email}")

dec_email = decrypt(enc_email)
print(f"Decrypted: {dec_email}")


Encrypted: pkCut/tcKWXNPY34Z+xoOfDrSlfSrhJHhSih+HZ8v30=
Decrypted: john.doe@example.com


In [9]:
df.printSchema()

root
 |-- id: string (nullable = true)
 |-- email: string (nullable = true)
 |-- phone_number: string (nullable = true)



In [13]:
encrypted_df = df.withColumn("email_encrypted", encrypt_udf(col("email"))) \
                 .withColumn("phone_encrypted", encrypt_udf(col("phone_number")))

encrypted_df.show(truncate=False)

decrypted_df = encrypted_df.withColumn("email_decrypted", decrypt_udf(col("email_encrypted"))) \
                           .withColumn("phone_decrypted", decrypt_udf(col("phone_encrypted")))

decrypted_df.show(truncate=False)


+---+-------------------------+-------------+--------------------------------------------+------------------------+
|id |email                    |phone_number |email_encrypted                             |phone_encrypted         |
+---+-------------------------+-------------+--------------------------------------------+------------------------+
|1  |john.doe@example.com     |+359812345678|pkCut/tcKWXNPY34Z+xoOfDrSlfSrhJHhSih+HZ8v30=|XuoGshQ3AiKxpgTHJy0tUw==|
|2  |maria.ivanova@example.com|+359876543210|YG2ZJCkdlDG1Xhcly57jg1UO543p/y8qgl0KrJ6n7lk=|5lzMhndBrspezM/svaRn4w==|
+---+-------------------------+-------------+--------------------------------------------+------------------------+

+---+-------------------------+-------------+--------------------------------------------+------------------------+-------------------------+---------------+
|id |email                    |phone_number |email_encrypted                             |phone_encrypted         |email_decrypted          |phon