# Experiment 2 â€“ Key Management Service + RBAC

**Goal:** Test Role-Based Access Control (RBAC) through having an external Key Management Service (KMS) manage keys for each role. Use the keys to decrypt the data if possible.

In [1]:
# Imports
import os
from dotenv import load_dotenv
from constants import *
from utility_functions import *
from constants import *
import pandas as pd

In [2]:
# Load environment variables
load_dotenv()

True

In [None]:
# Setup for connection towards AWS KMS
kms_conn_config = pe.KmsConnectionConfig(
    kms_instance_id=f"aws-kms-{REGION}",
    kms_instance_url=f"https://kms.{REGION}.amazonaws.com"
)

In [None]:
def read_parquet_best_effort_for_role(
    kms_client,
    parquet_path
):
    """
    Try to read an encrypted Parquet file for a given role (KMS client).

    - If the role can decrypt the footer but not all columns:
        Try each column individually and keep only those that can be read.
      * Else:
          Raise as soon as any column fails.

    - If the role cannot decrypt the footer at all (no footer key access),
      opening ParquetFile will raise immediately.
    """
    print(f"\n=== Reading {parquet_path} with role KMS client {kms_client} ===")

    crypto_factory = make_crypto_factory_for_kms(kms_client)
    decryption_props = crypto_factory.file_decryption_properties(kms_conn_config)

    try:
        pf = pq.ParquetFile(
            parquet_path,
            decryption_properties=decryption_props
        )
    except Exception as e:
        print(f"Failed to open Parquet file: {e}")
        return None

    schema = pf.schema
    print("File columns:", schema.names)

    if not skip_unreadable_columns:
        # Try reading everything in one go; will raise if any column cannot be decrypted
        try:
            table = pf.read()
            return table.to_pandas()
        except Exception as e:
            print(f"Failed to read full table: {e}")
            return None
        
    # Try reading columns one by one
    readable_cols = []
    for name in schema.names:
        try:
            pf.read(columns=[name])  # attempt to read single column
            readable_cols.append(name)
        except Exception as e:
            print(f"Skipping column {name} due to decryption error: {e}")

    if not readable_cols:
        print("No readable columns for this role.")
        return None

    # Finally read only the columns that worked
    table = pf.read(columns=readable_cols)
    df = table.to_pandas()
    print(f"Readable columns for this role: {readable_cols}")
    return df

In [None]:
def decrypt_parquet(parquet_file, keys):
    # Decrypt the file content
    fernet = Fernet(fernet_key)
    decrypted_data = fernet.decrypt(file)

    # Load decrypted data into a DataFrame
    df = pd.read_csv(BytesIO(decrypted_data))

    # Display the specified column
    print(df[column_name])

In [9]:
keys = get_keys_for_role(ROLE_ENGINEER)
print("Retrieved keys for ENGINEER:", keys)

Assumed ENGINEER role successfully.
Retrieved keys for ENGINEER: ['d229ff0a-b839-4732-9dd8-602c38a4487b']


In [11]:
keys = get_keys_for_role(ROLE_HR)
print("Retrieved keys for HR:", keys)
file = retrieve_data(BUCKET_NAME, EMPLOYEE_DATA_RAW_KEY)

Assumed HR role successfully.
Retrieved keys for HR: ['86c41c3f-fc21-4730-a20b-b755e5b63ebb', 'd229ff0a-b839-4732-9dd8-602c38a4487b']
Failed to retrieve 'employee_data_raw.parquet' from S3: 'str' object has no attribute 'get_object'


In [15]:
key = get_key(ADMIN)
print("Retrieved key for HR:", key)
file = retrieve_data(BUCKET_NAME, SALARY_ENCRYPTED_FILE_NAME)
df = decrypt_salary_with_key(key["fernet_key"], file, "Salary")
decrypt_password_with_key(key["admin_key"], df, "Password")

Retrieved key for HR: {'fernet_key': 'U1eIY6p4bKjOaMycX1VyMshD0tRmfWqC7xJ0MMT8oO0=', 'admin_key': None}

 successfully loaded 'sample_sensitive_data_encrypted.parquet' into DataFrame.
No key provided, cannot decrypt.


Unnamed: 0,ID,Name,Email,Department,Salary,Password
0,1,Alice,alice@example.com,HR,55000,gAAAAABpLrCMtGSy6lBrX4Ge4B_tADTvGpQkWCZ9MzGOi2...
1,2,Bob,bob@example.com,Engineering,72000,gAAAAABpLrCMqaffpffNlnWpyXigdcRQs35FD0evnJoPoV...
2,3,Charlie,charlie@example.com,Marketing,63000,gAAAAABpLrCMIeIebdmsd7i0hXysvI9OfMakQ_aSQys72G...
3,4,David,david@example.com,Finance,80000,gAAAAABpLrCMtj98pR40ir08dPPe10vTiJcJMu8eVpDDLy...
4,5,Eva,eva@example.com,Engineering,75000,gAAAAABpLrCMau4X6IX1fNkwVhznV7ncx_dOuB7sQqq2Px...
5,6,Frank,frank@example.com,HR,50679,gAAAAABpLrCM8nF4MU55PAZKcbCsm0iq_BmbgY44a3RTsw...
6,7,Grace,grace@example.com,Sales,98115,gAAAAABpLrCMt2n2Qz8PUvMNzDze3xA8catvbqhXacJ4lw...
7,8,Hannah,hannah@example.com,Finance,87984,gAAAAABpLrCM_JVNJnll5BE_4cEEc5KojU2FBrdM0usQe8...
8,9,Ian,ian@example.com,Marketing,118777,gAAAAABpLrCMT0zpUl-kD3td_FCJOqDyNQHajggsZmN06v...
9,10,Julia,julia@example.com,Sales,69355,gAAAAABpLrCMgKWCdV9nVhIQ1PkL4mDMt3Ih6f7zpcqmAD...


In [None]:
key = get_key(HR)
print("Retrieved key for HR:", key)
file = retrieve_data(BUCKET_NAME, SALARY_ENCRYPTED_FILE_NAME)
decrypt_salary_with_key(key["fernet_key"], file, "Salary")


Retrieved key for HR: {'fernet_key': 'U1eIY6p4bKjOaMycX1VyMshD0tRmfWqC7xJ0MMT8oO0='}

 successfully loaded 'sample_sensitive_data_encrypted.parquet' into DataFrame.


Unnamed: 0,ID,Name,Email,Department,Salary,Password
0,1,Alice,alice@example.com,HR,55000,gAAAAABpLrCMtGSy6lBrX4Ge4B_tADTvGpQkWCZ9MzGOi2...
1,2,Bob,bob@example.com,Engineering,72000,gAAAAABpLrCMqaffpffNlnWpyXigdcRQs35FD0evnJoPoV...
2,3,Charlie,charlie@example.com,Marketing,63000,gAAAAABpLrCMIeIebdmsd7i0hXysvI9OfMakQ_aSQys72G...
3,4,David,david@example.com,Finance,80000,gAAAAABpLrCMtj98pR40ir08dPPe10vTiJcJMu8eVpDDLy...
4,5,Eva,eva@example.com,Engineering,75000,gAAAAABpLrCMau4X6IX1fNkwVhznV7ncx_dOuB7sQqq2Px...
5,6,Frank,frank@example.com,HR,50679,gAAAAABpLrCM8nF4MU55PAZKcbCsm0iq_BmbgY44a3RTsw...
6,7,Grace,grace@example.com,Sales,98115,gAAAAABpLrCMt2n2Qz8PUvMNzDze3xA8catvbqhXacJ4lw...
7,8,Hannah,hannah@example.com,Finance,87984,gAAAAABpLrCM_JVNJnll5BE_4cEEc5KojU2FBrdM0usQe8...
8,9,Ian,ian@example.com,Marketing,118777,gAAAAABpLrCMT0zpUl-kD3td_FCJOqDyNQHajggsZmN06v...
9,10,Julia,julia@example.com,Sales,69355,gAAAAABpLrCMgKWCdV9nVhIQ1PkL4mDMt3Ih6f7zpcqmAD...
