# Experiment 1 – Show Encrypted Data Cannot Be Read

**Goal:** Demonstrate that encrypted data stored in an AWS S3 bucket cannot be read
without the appropriate decryption key / permissions.

We will:

1. List objects in the test S3 bucket.
2. Attempt to read a Parquet file with and *encrypted* column
3. Attempt to read the same encrypted data via Snowflake external stage.


In [1]:
# Imports
import io
import boto3
import pandas as pd
import pyarrow.parquet as pq
import pyarrow as pa
from cryptography.fernet import Fernet
from io import BytesIO
from botocore.exceptions import NoCredentialsError, ClientError
from dotenv import load_dotenv
import snowflake.connector
import os

In [2]:
# Load environment variables
load_dotenv()

True

In [3]:
BUCKET_NAME = "rher-s3-test-bucket"
INPUT_FILE = "sample_sensitive_data.parquet"
OUTPUT_FILE_LOCATION = "sample_sensitive_data_encrypted.parquet"
COLUMN_TO_ENCRYPT = "salary" 
FERNET_KEY = os.getenv("FERNET_KEY")

## Utility Functions

In [4]:
def list_s3_objects(bucket_name):
    try:
        s3 = boto3.client("s3")
        response = s3.list_objects_v2(Bucket=bucket_name)
        if 'Contents' not in response:
            print(f"No objects found in bucket: {bucket_name}")
            return []
        print(f"Objects in bucket '{bucket_name}':")
        for obj in response['Contents']:
            print(f" - {obj['Key']} (LastModified: {obj['LastModified']}, Size: {obj['Size']} bytes)")
        return [obj['Key'] for obj in response['Contents']]

    except NoCredentialsError:
        print("AWS credentials not found. Please configure them first.")
        return []
    except ClientError as e:
        print(f" AWS Client Error: {e}")
        return []
    except Exception as e:
        print(f" Unexpected error: {e}")
        return []

def read_s3_parquet(bucket_name, object_key):
    """Read a Parquet file from S3"""
    s3 = boto3.client("s3")
    response = s3.get_object(Bucket=bucket_name, Key=object_key)
    data = BytesIO(response['Body'].read())
    df = pd.read_parquet(data, engine="pyarrow")
    print(f"\n successfully loaded '{object_key}' into DataFrame.")
    return df

def encrypt_column_in_parquet(bucket_name, input_file, output_file_location, column_to_encrypt, fernet_key):
    """Encrypt a specific column in a Parquet file and upload the result to S3."""
    fernet = Fernet(fernet_key)
    s3 = boto3.client("s3")
    buffer = io.BytesIO()
    s3.download_fileobj(bucket_name, input_file, buffer)
    buffer.seek(0)
    table = pq.read_table(buffer)
    df = table.to_pandas()
    print(f"Encrypting column: {column_to_encrypt}")
    if column_to_encrypt in df.columns:
        df[column_to_encrypt] = df[column_to_encrypt].astype(str).apply(
            lambda x: fernet.encrypt(x.encode()).decode()
        )
    else:
        raise ValueError(f"Column '{column_to_encrypt}' not found in Parquet file")

    output_buffer = io.BytesIO()
    pq.write_table(pa.Table.from_pandas(df), output_buffer)
    output_buffer.seek(0)
    print("Uploading encrypted Parquet file to S3...")
    s3.upload_fileobj(output_buffer, bucket_name, output_file_location)

    print("Done!")
    print(f"Encrypted file uploaded to s3://{bucket_name}/{output_file_location}")


## Encrypt salary column in file

In [5]:

encrypt_column_in_parquet(BUCKET_NAME, INPUT_FILE, OUTPUT_FILE_LOCATION, COLUMN_TO_ENCRYPT, FERNET_KEY)

Encrypting column: salary
Uploading encrypted Parquet file to S3...
Done!
Encrypted file uploaded to s3://rher-s3-test-bucket/sample_sensitive_data_encrypted.parquet


## List all .parquet fiels in bucket

In [6]:
all_keys = list_s3_objects(BUCKET_NAME)

all_parquet_files = [k for k in all_keys if k.endswith(".parquet")]
print("\nParquet objects found:")
for k in all_parquet_files:
    print(" -", k)

encrypted_candidates = [k for k in all_parquet_files if "encrypted" in k.lower()]

print("\nEncrypted candidates:")
for k in encrypted_candidates:
    print(" -", k)


Objects in bucket 'rher-s3-test-bucket':
 - sample_sensitive_data.parquet (LastModified: 2025-11-03 11:16:22+00:00, Size: 2152 bytes)
 - sample_sensitive_data_encrypted.parquet (LastModified: 2025-11-17 23:30:41+00:00, Size: 5306 bytes)

Parquet objects found:
 - sample_sensitive_data.parquet
 - sample_sensitive_data_encrypted.parquet

Encrypted candidates:
 - sample_sensitive_data_encrypted.parquet


In [21]:
raw_data_df = read_s3_parquet(BUCKET_NAME, all_parquet_files[0])
encrypted_data_df = read_s3_parquet(BUCKET_NAME, encrypted_candidates[0])
print(f"DataFrame shape: {raw_data_df.shape}")
print(f"DataFrame shape: {encrypted_data_df.shape}")


 successfully loaded 'sample_sensitive_data.parquet' into DataFrame.

 successfully loaded 'sample_sensitive_data_encrypted.parquet' into DataFrame.
DataFrame shape: (10, 5)
DataFrame shape: (10, 5)


In [22]:
raw_data_df.head()

Unnamed: 0,id,name,email,department,salary
0,1,Alice,alice@example.com,HR,55000
1,2,Bob,bob@example.com,Engineering,72000
2,3,Charlie,charlie@example.com,Marketing,63000
3,4,David,david@example.com,Finance,80000
4,5,Eva,eva@example.com,Engineering,75000


In [23]:
encrypted_data_df.head()

Unnamed: 0,id,name,email,department,salary
0,1,Alice,alice@example.com,HR,gAAAAABpG7AgLIKxoISF9yhJJ6UisxsVbDdFUPPEc_P6PC...
1,2,Bob,bob@example.com,Engineering,gAAAAABpG7AgDo6H9xZlK6x217uoj3KUilpFO8nmD52E_b...
2,3,Charlie,charlie@example.com,Marketing,gAAAAABpG7AgDA1tum3KzValXNiUovBk5xOFn1KNz9TcXS...
3,4,David,david@example.com,Finance,gAAAAABpG7Aga5J-KjKu9XiImAzQofxep-M4pB4b62u41j...
4,5,Eva,eva@example.com,Engineering,gAAAAABpG7Agb_mU3ELaSomvOGFnbKQxdJGKyijL3jLYPY...


## Try to run with snowflake integration

In [24]:
SNOWFLAKE_ACCOUNT   = os.getenv("SNOWFLAKE_ACCOUNT")
SNOWFLAKE_USER      = os.getenv("SNOWFLAKE_USER")
SNOWFLAKE_PASSWORD  = os.getenv("SNOWFLAKE_PASSWORD")
SNOWFLAKE_ROLE      = os.getenv("SNOWFLAKE_ROLE")
SNOWFLAKE_WAREHOUSE = os.getenv("SNOWFLAKE_WAREHOUSE")
SNOWFLAKE_DATABASE  = os.getenv("SNOWFLAKE_DATABASE")
SNOWFLAKE_SCHEMA    = os.getenv("SNOWFLAKE_SCHEMA")

In [11]:
STAGE_NAME = "s3_test_stage"          
EXT_TABLE  = "EMPLOYEES_SALARY_ENCRYPTED" 

print("=== Experiment 1 (Snowflake) – Attempt to query encrypted data ===")

conn = snowflake.connector.connect(
    account=SNOWFLAKE_ACCOUNT,
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    passcode=258542, ## UPDATE THIS!!
    role=SNOWFLAKE_ROLE,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA,
)

try:
    with conn.cursor() as cur:
        sql = f"SELECT * FROM {EXT_TABLE} LIMIT 10"
        print("Running:", sql)
        cur.execute(sql)
        rows = cur.fetchall()
        print("\nQuery returned rows:")
        for r in rows:
            print(r)

        if not rows:
            print("\nRESULT: No rows returned. Check if the external table could not "
                  "access or parse the encrypted S3 objects.")
        else:
            print("\nRESULT: Rows returned. Inspect if data is readable vs encrypted.")
except Exception as e:
    print("\nSnowflake query failed – this is expected if encryption / KMS permissions prevent access to the S3 objects.")
    print("Error:", e)
finally:
    conn.close()


=== Experiment 1 (Snowflake) – Attempt to query encrypted data ===
Running: SELECT * FROM EMPLOYEES_SALARY_ENCRYPTED LIMIT 10

Query returned rows:
(1, 'Alice', 'alice@example.com', 'HR', 'gAAAAABpG4i9PqxfyVUSN5fShPPsEE_d_AU--SIMTATsA740C7VnGhw239g8FtaKSFVp4j02CPxwk0CUdeRuLLt4g2hP99tSvw==')
(2, 'Bob', 'bob@example.com', 'Engineering', 'gAAAAABpG4i93EP4KYErsubr2EaqHz08fKmKpROq5XkQ90gKCaRRf4DJOUb2ocvMp1vMNW4lzCjOhbq7WEaFR4jIYjFLYzH0vA==')
(3, 'Charlie', 'charlie@example.com', 'Marketing', 'gAAAAABpG4i9w4Jm5leymKvDlksha3dcjk-1umQX5RHXo4kdsVZdFd-XFD0PhIsDPtd5CckE-TA2mFlGmEl6IvOvaj5YBvmqvw==')
(4, 'David', 'david@example.com', 'Finance', 'gAAAAABpG4i9P5zV5_T_aCNrx3yGCoje9lxAT4zlI-w2hvkZZdad6mL0pzdMLfmVbj_pTkLSIFzUCaLCM1B1PcbMWqjswhV7xg==')
(5, 'Eva', 'eva@example.com', 'Engineering', 'gAAAAABpG4i92ui1EDcCDhMpBqft8e1K9L44czjZ7aAoDGAfZJtz-ug9yA8mlRJ5_esTEa9lCCc2mV6OOOeD7z2iJikImuRqUw==')
(6, 'Frank', 'frank@example.com', 'HR', 'gAAAAABpG4i9a2upLEBspBc7msC0GTW13wW5catAlEg4V0EMO-4o3bhoGbGBo7apX

# Experiment 2 – Simulate Key Management Service + RBAC

In [25]:
READ_ONLY_ROLE = "RBAC_IN_DATA_LAKES_ROLE_READ_ONLY"
SECURE_ROLE    = "RBAC_IN_DATA_LAKES_ROLE_SECURE_ANALYST"

def decrypt_salary(enc_salary):
    """Real decrypt (KMS primitive). Only this function knows the key."""
    decrypted_bytes = Fernet(FERNET_KEY).decrypt(enc_salary.encode())
    return int(decrypted_bytes.decode())

def simulate_kms(enc_salary, role):
    if role == SECURE_ROLE:
        return decrypt_salary(enc_salary)
    else:
        return enc_salary  

## Test with Read-only Role

In [27]:
df_test_ro = encrypted_data_df.copy()
df_test_ro["salary_kms_output"] = df_test_ro["salary"].apply(
    lambda s: simulate_kms(s, READ_ONLY_ROLE)
)

df_test_ro.head()

Unnamed: 0,id,name,email,department,salary,salary_kms_output
0,1,Alice,alice@example.com,HR,gAAAAABpG7AgLIKxoISF9yhJJ6UisxsVbDdFUPPEc_P6PC...,gAAAAABpG7AgLIKxoISF9yhJJ6UisxsVbDdFUPPEc_P6PC...
1,2,Bob,bob@example.com,Engineering,gAAAAABpG7AgDo6H9xZlK6x217uoj3KUilpFO8nmD52E_b...,gAAAAABpG7AgDo6H9xZlK6x217uoj3KUilpFO8nmD52E_b...
2,3,Charlie,charlie@example.com,Marketing,gAAAAABpG7AgDA1tum3KzValXNiUovBk5xOFn1KNz9TcXS...,gAAAAABpG7AgDA1tum3KzValXNiUovBk5xOFn1KNz9TcXS...
3,4,David,david@example.com,Finance,gAAAAABpG7Aga5J-KjKu9XiImAzQofxep-M4pB4b62u41j...,gAAAAABpG7Aga5J-KjKu9XiImAzQofxep-M4pB4b62u41j...
4,5,Eva,eva@example.com,Engineering,gAAAAABpG7Agb_mU3ELaSomvOGFnbKQxdJGKyijL3jLYPY...,gAAAAABpG7Agb_mU3ELaSomvOGFnbKQxdJGKyijL3jLYPY...


## Test with Secure Analyst Role

In [28]:
df_test_sa = encrypted_data_df.copy()
df_test_sa["salary_kms_output"] = df_test_sa["salary"].apply(
    lambda s: simulate_kms(s, SECURE_ROLE)
)

df_test_sa.head()

Unnamed: 0,id,name,email,department,salary,salary_kms_output
0,1,Alice,alice@example.com,HR,gAAAAABpG7AgLIKxoISF9yhJJ6UisxsVbDdFUPPEc_P6PC...,55000
1,2,Bob,bob@example.com,Engineering,gAAAAABpG7AgDo6H9xZlK6x217uoj3KUilpFO8nmD52E_b...,72000
2,3,Charlie,charlie@example.com,Marketing,gAAAAABpG7AgDA1tum3KzValXNiUovBk5xOFn1KNz9TcXS...,63000
3,4,David,david@example.com,Finance,gAAAAABpG7Aga5J-KjKu9XiImAzQofxep-M4pB4b62u41j...,80000
4,5,Eva,eva@example.com,Engineering,gAAAAABpG7Agb_mU3ELaSomvOGFnbKQxdJGKyijL3jLYPY...,75000
