# Experiment 3 – Simulate Data Access Layer (DAL)

In this experiment, we simulate a Data Access Layer (DAL) that enforces Role-Based Access Control (RBAC) and key management while decrypting Parquet columns based on user roles.

In [None]:
from utility_functions import retrieve_data, decrypt_salary_with_key, decrypt_password_with_key, get_key
import os

BUCKET_NAME = os.getenv("BUCKET_NAME")
SALARY_ENCRYPTED_FILE_NAME = os.getenv("SALARY_ENCRYPTED_FILE_NAME")

ImportError: cannot import name 'DataAccessLayerClient' from 'utility_functions' (c:\Users\Rasmus\ITU\RBAC_IN_DATA_LAKES\src\experiments\utility_functions.py)

# Experiment 3 – Simulate Data Acces Layer (DAL)

Experiment simulating Data Acess Layer by:

- Creating a dummy class accessed by: `dal://...`
- The class should:
  - Uniformly enforces RBAC + Key Management
  - Decrypts Parquet columns based on role
  - Returns the parquet files with columns either decrypted/encrypted

We simulate what a DAL *would* look like in Python,
and then show why that doesn't qualify as a real, shared DAL.

        #Token = get_token
        #client.get_data(PATH, TOKEN)
        #role = data_access_layer.authenticate_user(TOKEN)
        #key = kms.getKey(role)
        #data = retrieve_data(PATH)
        #decryptdata = decryptParquet(key, data)
        #dal.return to client

In [None]:
class DataAccessLayerClient:
    """
    Hypothetical DAL client

    In the ideal world, both Snowflake and Python would effectively
    call this logic whenever they read data from S3.
    """
    def __init__(self, base_url):
        self.base_url = base_url
        
    def authenticate_user(self, token):
        print("Authenticating user with token:", token)
        """
        Simulates user authentication.
        In a real DAL, this would verify the token and return user info.
        """
        if not token:
            raise Exception("Authentication failed: No token provided")
        elif token == "Engineer_token":
            return {"user_id": "engineer_user", "roles": ["RBAC_IN_DATA_LAKES_ROLE_READ_ONLY"]}
        elif token == "HR_token":
            return {"user_id": "hr_user", "roles": ["RBAC_IN_DATA_LAKES_ROLE_SECURE_ANALYST"]}
        elif token == "Admin_token":
            return {"user_id": "admin_user", "roles": ["RBAC_IN_DATA_LAKES_ROLE_ADMIN"]}
        else:
            raise Exception("Authentication failed: Invalid token")

    
    def get_data(self, bucket_path, file_name, Token):
        """
        Simulates reading a Parquet file with DAL logic. That is, it simulates the above behaviour:
        - authenticate the user
        - check RBAC/KMS policy
        - fetch Parquet from underlying bucket and file name
        - decrypt protected columns if allowed
        - return a DataFrame or bytes of a Parquet file
        """
        ### AUTHENTICATION GOES HERE - NOT IMPLEMENTED ###
        #get_data(PATH, TOKEN)
        role = self.authenticate_user(Token)["roles"][0]
        
        #getKey returns key or None
        key = get_key(role)
        
        #data = retrive data file
        df = retrieve_data(bucket_path, file_name)
        #data = decryptParquet(key, data)
        if key is not None:
            df = decrypt_salary_with_key(key["fernet_key"], df, "Salary")
        if role == ADMIN:
            df = decrypt_password_with_key(key["admin_key"], df, "Password")
    
        return df
        

In [None]:
dal = DataAccessLayerClient(base_url="dal://rbac-sensitive/")

In [None]:
print("Raw dataframe read directly from S3:")
display(encrypted_data_df.head())

Raw dataframe read directly from S3:


Unnamed: 0,ID,Name,Email,Department,Salary,Password
0,1,Alice,alice@example.com,HR,gAAAAABpLrCMmHguX0r0xqGo92_ACuSCBKuA5vtWeQzLWH...,gAAAAABpLrCMtGSy6lBrX4Ge4B_tADTvGpQkWCZ9MzGOi2...
1,2,Bob,bob@example.com,Engineering,gAAAAABpLrCMHnO3jeaL2o1_rUru3c-R75ZcIIwMbcnFO9...,gAAAAABpLrCMqaffpffNlnWpyXigdcRQs35FD0evnJoPoV...
2,3,Charlie,charlie@example.com,Marketing,gAAAAABpLrCMRUQBH41jbiG0bFtmzXordV8vZvPXuHjkhs...,gAAAAABpLrCMIeIebdmsd7i0hXysvI9OfMakQ_aSQys72G...
3,4,David,david@example.com,Finance,gAAAAABpLrCM4S53NvA6aQpgA8Z1d17dAIEpQvp_nNO1in...,gAAAAABpLrCMtj98pR40ir08dPPe10vTiJcJMu8eVpDDLy...
4,5,Eva,eva@example.com,Engineering,gAAAAABpLrCMRSI_KPTpK02SBff78xRmhqPPoE-_0r0jOR...,gAAAAABpLrCMau4X6IX1fNkwVhznV7ncx_dOuB7sQqq2Px...


In [None]:
Token = "Engineer_token"
data = dal.get_data(bucket_path = BUCKET_NAME, file_name = SALARY_ENCRYPTED_FILE_NAME, Token=Token)
display(data.head())

Authenticating user with token: Engineer_token

 successfully loaded 'sample_sensitive_data_encrypted.parquet' into DataFrame.


Unnamed: 0,ID,Name,Email,Department,Salary,Password
0,1,Alice,alice@example.com,HR,gAAAAABpLrCMmHguX0r0xqGo92_ACuSCBKuA5vtWeQzLWH...,gAAAAABpLrCMtGSy6lBrX4Ge4B_tADTvGpQkWCZ9MzGOi2...
1,2,Bob,bob@example.com,Engineering,gAAAAABpLrCMHnO3jeaL2o1_rUru3c-R75ZcIIwMbcnFO9...,gAAAAABpLrCMqaffpffNlnWpyXigdcRQs35FD0evnJoPoV...
2,3,Charlie,charlie@example.com,Marketing,gAAAAABpLrCMRUQBH41jbiG0bFtmzXordV8vZvPXuHjkhs...,gAAAAABpLrCMIeIebdmsd7i0hXysvI9OfMakQ_aSQys72G...
3,4,David,david@example.com,Finance,gAAAAABpLrCM4S53NvA6aQpgA8Z1d17dAIEpQvp_nNO1in...,gAAAAABpLrCMtj98pR40ir08dPPe10vTiJcJMu8eVpDDLy...
4,5,Eva,eva@example.com,Engineering,gAAAAABpLrCMRSI_KPTpK02SBff78xRmhqPPoE-_0r0jOR...,gAAAAABpLrCMau4X6IX1fNkwVhznV7ncx_dOuB7sQqq2Px...


In [None]:
Token = "HR_token"
data = dal.get_data(bucket_path = BUCKET_NAME, file_name = SALARY_ENCRYPTED_FILE_NAME, Token=Token)
display(data.head())


Authenticating user with token: HR_token

 successfully loaded 'sample_sensitive_data_encrypted.parquet' into DataFrame.


Unnamed: 0,ID,Name,Email,Department,Salary,Password
0,1,Alice,alice@example.com,HR,55000,gAAAAABpLrCMtGSy6lBrX4Ge4B_tADTvGpQkWCZ9MzGOi2...
1,2,Bob,bob@example.com,Engineering,72000,gAAAAABpLrCMqaffpffNlnWpyXigdcRQs35FD0evnJoPoV...
2,3,Charlie,charlie@example.com,Marketing,63000,gAAAAABpLrCMIeIebdmsd7i0hXysvI9OfMakQ_aSQys72G...
3,4,David,david@example.com,Finance,80000,gAAAAABpLrCMtj98pR40ir08dPPe10vTiJcJMu8eVpDDLy...
4,5,Eva,eva@example.com,Engineering,75000,gAAAAABpLrCMau4X6IX1fNkwVhznV7ncx_dOuB7sQqq2Px...


In [None]:
Token = "Admin_token"
data = dal.get_data(bucket_path = BUCKET_NAME, file_name = SALARY_ENCRYPTED_FILE_NAME, Token=Token)
display(data.head())

Authenticating user with token: Admin_token

 successfully loaded 'sample_sensitive_data_encrypted.parquet' into DataFrame.


Unnamed: 0,ID,Name,Email,Department,Salary,Password
0,1,Alice,alice@example.com,HR,55000,2o1e*OBZWW^7Kd
1,2,Bob,bob@example.com,Engineering,72000,GSxZX$zD5Qlf8^
2,3,Charlie,charlie@example.com,Marketing,63000,02QChWYlypvP#i
3,4,David,david@example.com,Finance,80000,m5AKDGR88&T*A8
4,5,Eva,eva@example.com,Engineering,75000,Yt%hUAUT2RX5$A


In [None]:
dal = DataAccessLayerClient(base_url="dal://rbac-sensitive/")

In [None]:
Token = "Engineer_token"
data = dal.get_data(bucket_path=BUCKET_NAME, file_name=SALARY_ENCRYPTED_FILE_NAME, Token=Token)
display(data.head())

In [None]:
Token = "HR_token"
data = dal.get_data(bucket_path=BUCKET_NAME, file_name=SALARY_ENCRYPTED_FILE_NAME, Token=Token)
display(data.head())

In [None]:
Token = "Admin_token"
data = dal.get_data(bucket_path=BUCKET_NAME, file_name=SALARY_ENCRYPTED_FILE_NAME, Token=Token)
display(data.head())