# Experiment 1 – Show Encrypted Data Cannot Be Read

**Goal:** Demonstrate that encrypted data stored in an AWS S3 bucket cannot be read
without the appropriate decryption key / permissions.

We will:

1. List objects in the test S3 bucket.
2. Attempt to read a Parquet file with and *encrypted* column
3. Attempt to read the same encrypted data via Snowflake external stage.


In [1]:
# Imports
import io
import boto3
import pandas as pd
import pyarrow.parquet as pq
import pyarrow as pa
from cryptography.fernet import Fernet
from io import BytesIO
from botocore.exceptions import NoCredentialsError, ClientError
from dotenv import load_dotenv
import snowflake.connector
import os

In [2]:
# Load environment variables
load_dotenv()

True

In [3]:
BUCKET_NAME = "rher-s3-test-bucket"
INPUT_FILE = "sample_sensitive_data.parquet"
OUTPUT_FILE_LOCATION = "sample_sensitive_data_encrypted.parquet"
COLUMN_TO_ENCRYPT = "salary" 

## Utility Functions

In [4]:
def list_s3_objects(bucket_name):
    try:
        s3 = boto3.client("s3")
        response = s3.list_objects_v2(Bucket=bucket_name)
        if 'Contents' not in response:
            print(f"No objects found in bucket: {bucket_name}")
            return []
        print(f"Objects in bucket '{bucket_name}':")
        for obj in response['Contents']:
            print(f" - {obj['Key']} (LastModified: {obj['LastModified']}, Size: {obj['Size']} bytes)")
        return [obj['Key'] for obj in response['Contents']]

    except NoCredentialsError:
        print("AWS credentials not found. Please configure them first.")
        return []
    except ClientError as e:
        print(f" AWS Client Error: {e}")
        return []
    except Exception as e:
        print(f" Unexpected error: {e}")
        return []

def read_s3_parquet(bucket_name, object_key):
    """Read a Parquet file from S3 and print a preview."""
    try:
        s3 = boto3.client("s3")
        response = s3.get_object(Bucket=bucket_name, Key=object_key)
        data = BytesIO(response['Body'].read())
        df = pd.read_parquet(data, engine="pyarrow")
        print(f"\n successfully loaded '{object_key}' into DataFrame.")
        print(" Data Preview:")
        print(df.head()) 
        print(f"DataFrame shape: {df.shape}")
        
    except Exception as e:
        print(f"Unexpected error reading Parquet: {e}")

def encrypt_column_in_parquet(input_file, output_file_location, column_to_encrypt, fernet_key):
    """Encrypt a specific column in a Parquet file and upload the result to S3."""
    fernet = Fernet(fernet_key)
    s3 = boto3.client("s3")
    buffer = io.BytesIO()
    s3.download_fileobj(BUCKET_NAME, input_file, buffer)
    buffer.seek(0)
    table = pq.read_table(buffer)
    df = table.to_pandas()
    print(f"Encrypting column: {column_to_encrypt}")
    if column_to_encrypt in df.columns:
        df[column_to_encrypt] = df[column_to_encrypt].astype(str).apply(
            lambda x: fernet.encrypt(x.encode()).decode()
        )
    else:
        raise ValueError(f"Column '{column_to_encrypt}' not found in Parquet file")

    output_buffer = io.BytesIO()
    pq.write_table(pa.Table.from_pandas(df), output_buffer)
    output_buffer.seek(0)
    print("Uploading encrypted Parquet file to S3...")
    s3.upload_fileobj(output_buffer, BUCKET_NAME, output_file_location)

    print("Done!")
    print(f"Encrypted file uploaded to s3://{BUCKET_NAME}/{output_file_location}")


## Encrypt salary column in file

In [8]:
encrypt_column_in_parquet(INPUT_FILE, OUTPUT_FILE_LOCATION, COLUMN_TO_ENCRYPT, os.getenv("FERNET_KEY"))

Encrypting column: salary
Uploading encrypted Parquet file to S3...
Done!
Encrypted file uploaded to s3://rher-s3-test-bucket/sample_sensitive_data_encrypted.parquet


## List all .parquet fiels in bucket

In [9]:
all_keys = list_s3_objects(BUCKET_NAME)

all_parquet_files = [k for k in all_keys if k.endswith(".parquet")]
print("\nParquet objects found:")
for k in all_parquet_files:
    print(" -", k)

encrypted_candidates = [k for k in all_parquet_files if "encrypted" in k.lower()]

print("\nEncrypted candidates:")
for k in encrypted_candidates:
    print(" -", k)


Objects in bucket 'rher-s3-test-bucket':
 - sample_sensitive_data.parquet (LastModified: 2025-11-03 11:16:22+00:00, Size: 2152 bytes)
 - sample_sensitive_data_encrypted.parquet (LastModified: 2025-11-17 22:16:00+00:00, Size: 5303 bytes)

Parquet objects found:
 - sample_sensitive_data.parquet
 - sample_sensitive_data_encrypted.parquet

Encrypted candidates:
 - sample_sensitive_data_encrypted.parquet


In [10]:
print("=== Read unencrypted file ===")

read_s3_parquet(BUCKET_NAME, all_parquet_files[0])
read_s3_parquet(BUCKET_NAME, all_parquet_files[1])


=== Read unencrypted file ===

 successfully loaded 'sample_sensitive_data.parquet' into DataFrame.
 Data Preview:
   id     name                email   department  salary
0   1    Alice    alice@example.com           HR   55000
1   2      Bob      bob@example.com  Engineering   72000
2   3  Charlie  charlie@example.com    Marketing   63000
3   4    David    david@example.com      Finance   80000
4   5      Eva      eva@example.com  Engineering   75000
DataFrame shape: (10, 5)

 successfully loaded 'sample_sensitive_data_encrypted.parquet' into DataFrame.
 Data Preview:
   id     name                email   department  \
0   1    Alice    alice@example.com           HR   
1   2      Bob      bob@example.com  Engineering   
2   3  Charlie  charlie@example.com    Marketing   
3   4    David    david@example.com      Finance   
4   5      Eva      eva@example.com  Engineering   

                                              salary  
0  gAAAAABpG56aQbUFPj7UZoOqFk0wBUNzwAnlSiMX3E3S5y...  


In [11]:
SNOWFLAKE_ACCOUNT   = os.getenv("SNOWFLAKE_ACCOUNT")
SNOWFLAKE_USER      = os.getenv("SNOWFLAKE_USER")
SNOWFLAKE_PASSWORD  = os.getenv("SNOWFLAKE_PASSWORD")
SNOWFLAKE_ROLE      = os.getenv("SNOWFLAKE_ROLE")
SNOWFLAKE_WAREHOUSE = os.getenv("SNOWFLAKE_WAREHOUSE")
SNOWFLAKE_DATABASE  = os.getenv("SNOWFLAKE_DATABASE")
SNOWFLAKE_SCHEMA    = os.getenv("SNOWFLAKE_SCHEMA")

In [12]:
STAGE_NAME = "s3_test_stage"          
EXT_TABLE  = "EMPLOYEES_SALARY_ENCRYPTED" 

print("=== Experiment 1 (Snowflake) – Attempt to query encrypted data ===")

conn = snowflake.connector.connect(
    account=SNOWFLAKE_ACCOUNT,
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    passcode=797264, ## UPDATE THIS!!
    role=SNOWFLAKE_ROLE,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA,
)

try:
    with conn.cursor() as cur:
        sql = f"SELECT * FROM {EXT_TABLE} LIMIT 10"
        print("Running:", sql)
        cur.execute(sql)
        rows = cur.fetchall()
        print("\nQuery returned rows:")
        for r in rows:
            print(r)

        if not rows:
            print("\nRESULT: No rows returned. Check if the external table could not "
                  "access or parse the encrypted S3 objects.")
        else:
            print("\nRESULT: Rows returned. Inspect if data is readable vs encrypted.")
except Exception as e:
    print("\nSnowflake query failed – this is expected if encryption / KMS permissions prevent access to the S3 objects.")
    print("Error:", e)
finally:
    conn.close()


=== Experiment 1 (Snowflake) – Attempt to query encrypted data ===


DatabaseError: 250001 (08001): Failed to connect to DB: XBB95219.snowflakecomputing.com:443. TOTP Invalid. You have 3 attempts remaining before the user will be blocked from using authenticator apps for a few minutes.