In [25]:
import base64
from dotenv import load_dotenv

from constants import *
from utility_functions import *
import pandas as pd
import pyarrow.parquet as pq
import pyarrow as pa

In [26]:
load_dotenv()

True

In [47]:
engineer_s3_credentials = get_aws_credentials(ROLE_ENGINEER)
hr_s3_credentials = get_aws_credentials(ROLE_HR)
admin_s3_credentials = get_aws_credentials(ROLE_ADMIN)

Assumed ENGINEER role successfully.
Assumed HR role successfully.
Assumed ADMIN role successfully.


In [48]:
engineer_s3_client = create_aws_client_for(engineer_s3_credentials, "s3")
hr_s3_client = create_aws_client_for(hr_s3_credentials, "s3")
admin_s3_client = create_aws_client_for(admin_s3_credentials, "s3")

In [49]:
list_s3_bucket_objects(engineer_s3_client)
list_s3_bucket_objects(hr_s3_client)
list_s3_bucket_objects(admin_s3_client)

Objects in bucket 's3-rbac-in-data-lakes-experiments':
 - employee_data_encrypted.parquet
 - employee_data_raw.parquet
 - hejMedDigTest.parquet
Objects in bucket 's3-rbac-in-data-lakes-experiments':
 - employee_data_encrypted.parquet
 - employee_data_raw.parquet
 - hejMedDigTest.parquet
Objects in bucket 's3-rbac-in-data-lakes-experiments':
 - employee_data_encrypted.parquet
 - employee_data_raw.parquet
 - hejMedDigTest.parquet


In [50]:
engineer_kms_client = create_aws_client_for(engineer_s3_credentials, "kms")
hr_kms_client = create_aws_client_for(hr_s3_credentials, "kms")
admin_kms_client = create_aws_client_for(admin_s3_credentials, "kms")

In [51]:
admin_kms_client.list_keys()

{'Keys': [{'KeyId': '5722129f-2136-4ef4-8b53-5a242b553f34',
   'KeyArn': 'arn:aws:kms:eu-north-1:501994300007:key/5722129f-2136-4ef4-8b53-5a242b553f34'},
  {'KeyId': '86c41c3f-fc21-4730-a20b-b755e5b63ebb',
   'KeyArn': 'arn:aws:kms:eu-north-1:501994300007:key/86c41c3f-fc21-4730-a20b-b755e5b63ebb'},
  {'KeyId': 'd229ff0a-b839-4732-9dd8-602c38a4487b',
   'KeyArn': 'arn:aws:kms:eu-north-1:501994300007:key/d229ff0a-b839-4732-9dd8-602c38a4487b'}],
 'Truncated': False,
 'ResponseMetadata': {'RequestId': '458f73bb-4e9c-4780-be63-53571b2623f9',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '458f73bb-4e9c-4780-be63-53571b2623f9',
   'cache-control': 'no-cache, no-store, must-revalidate, private',
   'expires': '0',
   'pragma': 'no-cache',
   'date': 'Wed, 10 Dec 2025 22:00:34 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '452',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

In [52]:
usable_keys(engineer_kms_client)

['d229ff0a-b839-4732-9dd8-602c38a4487b']

In [53]:
usable_keys(hr_kms_client)

['86c41c3f-fc21-4730-a20b-b755e5b63ebb',
 'd229ff0a-b839-4732-9dd8-602c38a4487b']

In [54]:
usable_keys(admin_kms_client)

['5722129f-2136-4ef4-8b53-5a242b553f34',
 '86c41c3f-fc21-4730-a20b-b755e5b63ebb',
 'd229ff0a-b839-4732-9dd8-602c38a4487b']

## Create parquet with encrypted columns

In [55]:
admin_crypto_factory = make_crypto_factory_for_kms(admin_kms_client)

In [56]:
encryption_config = pe.EncryptionConfiguration(
    footer_key="file-access-key",              
    column_keys={
        "salary-key": ["Salary"],     
        "password-key": ["Password"] 
    },
    plaintext_footer=False         
)

In [57]:
kms_conn_config = pe.KmsConnectionConfig(
    kms_instance_id=f"aws-kms-{REGION}",
    kms_instance_url=f"https://kms.{REGION}.amazonaws.com"
)

In [58]:
file_encryption_props = admin_crypto_factory.file_encryption_properties(
    kms_conn_config,
    encryption_config
)

In [59]:
df = pd.read_csv(EMPLOYEE_DATA_RAW_CSV_PATH)
df

Unnamed: 0,ID,Name,Email,Department,Salary,Password
0,1,Alice,alice@example.com,HR,55000,DummyPassword1
1,2,Bob,bob@example.com,Engineering,72000,DummyPassword2
2,3,Charlie,charlie@example.com,Marketing,63000,DummyPassword3
3,4,David,david@example.com,Finance,80000,DummyPassword4
4,5,Eva,eva@example.com,Engineering,75000,DummyPassword5
5,6,Frank,frank@example.com,HR,70000,DummyPassword6
6,7,Grace,grace@example.com,Sales,85000,DummyPassword7
7,8,Hannah,hannah@example.com,Finance,65000,DummyPassword8
8,9,Ian,ian@example.com,Marketing,54800,DummyPassword9
9,10,Julia,julia@example.com,Sales,74500,DummyPassword10


In [60]:
table = pa.Table.from_pandas(df)

with pq.ParquetWriter(
    EMPLOYEE_DATA_ENCRYPTED_PATH,
    table.schema,
    encryption_properties=file_encryption_props
) as writer:
    writer.write_table(table)


In [61]:
# Upload the encrypted Parquet file to S3
admin_s3_client.upload_file(EMPLOYEE_DATA_ENCRYPTED_PATH, BUCKET_NAME, EMPLOYEE_DATA_ENCRYPTED_KEY)
print(f"\nUploaded encrypted Parquet to s3://{BUCKET_NAME}/{EMPLOYEE_DATA_ENCRYPTED_KEY}")


Uploaded encrypted Parquet to s3://s3-rbac-in-data-lakes-experiments/employee_data_encrypted.parquet
