In [1]:
# Cell 1: Install required packages
!pip install cryptography pandas faker numpy

Collecting faker
  Downloading faker-37.4.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.4.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-37.4.0


Smart Consent Engine

In [4]:
from datetime import datetime, timedelta
import json
import hashlib
from IPython.display import display, JSON

class ConsentEngine:
    def __init__(self, patient_id):
        self.patient_id = patient_id
        self.consent_log = []

    def grant_consent(self, recipient, purpose, ttl_hours=24, fields=None):
        """Generate a time-limited consent record"""
        consent = {
            "patient_id": self.patient_id,
            "recipient": recipient,
            "purpose": purpose,
            "granted_at": datetime.utcnow().isoformat(),
            "expires_at": (datetime.utcnow() + timedelta(hours=ttl_hours)).isoformat(),
            "fields": fields or ["allergies", "medications"],
            "signature": None
        }
        consent["signature"] = self._hash_consent(consent)
        self.consent_log.append(consent)
        return consent

    def _hash_consent(self, consent):
        """Create tamper-proof hash of consent terms"""
        data = json.dumps(consent, sort_keys=True).encode()
        return hashlib.sha256(data).hexdigest()

    def validate_consent(self, recipient, field):
        """Check if consent is valid"""
        now = datetime.utcnow()
        for consent in self.consent_log:
            if (consent["recipient"] == recipient and
                field in consent["fields"] and
                datetime.fromisoformat(consent["expires_at"]) > now):
                return consent["signature"]
        return False

# Demo
engine = ConsentEngine("patient-123")
consent = engine.grant_consent(
    recipient="kampala_hospital",
    purpose="emergency_treatment",
    ttl_hours=168,
    fields=["allergies", "blood_type"]
)

print(" Consent Engine Ready")
display(JSON(consent))

 Consent Engine Ready


<IPython.core.display.JSON object>

Field-Level Encryption

In [5]:
from cryptography.fernet import Fernet
import pandas as pd
import base64

class HealthDataEncryptor:
    def __init__(self, key=None):
        self.key = key or Fernet.generate_key()
        self.cipher = Fernet(self.key)

    def encrypt_field(self, value):
        if pd.isna(value): return value
        return self.cipher.encrypt(str(value).encode()).decode()

    def decrypt_field(self, encrypted):
        if pd.isna(encrypted): return encrypted
        return self.cipher.decrypt(encrypted.encode()).decode()

    def encrypt_dataframe(self, df, sensitive_fields):
        encrypted_df = df.copy()
        for field in sensitive_fields:
            if field in encrypted_df.columns:
                encrypted_df[field] = encrypted_df[field].apply(self.encrypt_field)
        return encrypted_df

# Demo with sample data
encryptor = HealthDataEncryptor()
sample_data = pd.DataFrame({
    "patient_id": ["101", "102"],
    "hiv_status": ["positive", "negative"],
    "mental_health": ["depression", None]
})

encrypted_df = encryptor.encrypt_dataframe(sample_data, ["hiv_status", "mental_health"])

print(" Encryption Ready")
print("Original Data:")
display(sample_data)
print("\nEncrypted Data:")
display(encrypted_df)

 Encryption Ready
Original Data:


Unnamed: 0,patient_id,hiv_status,mental_health
0,101,positive,depression
1,102,negative,



Encrypted Data:


Unnamed: 0,patient_id,hiv_status,mental_health
0,101,gAAAAABoVZFXwlAJLwomq3vbCMHprlxkY4vbyduHe1EXI4...,gAAAAABoVZFXwHO1GbWBwOdFcMTBHv7dGcALazriWYJknU...
1,102,gAAAAABoVZFXHLaJg2Ruo2ehFzfNQJoYx8neuSiacZtJ1V...,


K - Anonymization (k=5)

In [6]:
import numpy as np
from faker import Faker

def anonymize_dataset(df, quasi_ids, sensitive_attrs, k=5):
    """
    params:
        quasi_ids: ["age", "zip_code"] - columns to generalize
        sensitive_attrs: ["diagnosis"] - columns to protect
    """
    # 1. Generalize quasi-identifiers
    if "age" in quasi_ids:
        df["age"] = (df["age"] // 10) * 10  # Age brackets of 10 years

    if "zip_code" in quasi_ids:
        df["zip_code"] = df["zip_code"].str[:3] + "XX"

    # 2. Add noise to continuous sensitive attributes
    for attr in sensitive_attrs:
        if df[attr].dtype in [np.int64, np.float64]:
            df[attr] = df[attr] + np.random.normal(0, 2, len(df))

    # 3. Remove groups smaller than k
    group_counts = df.groupby(quasi_ids).size()
    valid_groups = group_counts[group_counts >= k].index
    mask = df.set_index(quasi_ids).index.isin(valid_groups)
    return df[mask]

# Generate sample data
fake = Faker()
data = {
    "patient_id": [str(i) for i in range(100)],
    "age": [fake.random_int(18, 90) for _ in range(100)],
    "zip_code": [fake.zipcode() for _ in range(100)],
    "diagnosis": np.random.choice(["Malaria", "HIV", "Diabetes", "Hypertension"], 100)
}
df = pd.DataFrame(data)

# Anonymize
anon_df = anonymize_dataset(
    df.copy(),
    quasi_ids=["age", "zip_code"],
    sensitive_attrs=["age", "diagnosis"],
    k=5
)

print(" k-Anonymization Ready (k=5)")
print(f"Original records: {len(df)}")
print(f"Anonymized records: {len(anon_df)}")
display(anon_df.head(3))

 k-Anonymization Ready (k=5)
Original records: 100
Anonymized records: 0


Unnamed: 0,patient_id,age,zip_code,diagnosis


Integration Test

In [7]:
# Cell 5: Integration Test
# Create synthetic patient
patient_data = pd.DataFrame({
    "patient_id": ["123"],
    "name": ["John Doe"],
    "blood_type": ["A+"],
    "allergies": ["peanuts"],
    "hiv_status": ["negative"],
    "age": [35],
    "zip_code": ["12345"]
})

# Step 1: Get consent
engine = ConsentEngine("123")
consent = engine.grant_consent(
    recipient="emergency_team",
    purpose="ER_admission",
    fields=["blood_type", "allergies"]
)

# Step 2: Encrypt sensitive data
encrypted_data = encryptor.encrypt_dataframe(
    patient_data,
    sensitive_fields=["hiv_status"]
)

# Step 3: Anonymize for research
research_data = anonymize_dataset(
    patient_data.copy(),
    quasi_ids=["age", "zip_code"],
    sensitive_attrs=["age"],
    k=1
)

print(" Integrated Pipeline Results")
print("\n1. Consent Record:")
display(JSON(consent))

print("\n2. Encrypted Clinical Data:")
display(encrypted_data)

print("\n3. Research-Ready Anonymized Data:")
display(research_data)

 Integrated Pipeline Results

1. Consent Record:


<IPython.core.display.JSON object>


2. Encrypted Clinical Data:


Unnamed: 0,patient_id,name,blood_type,allergies,hiv_status,age,zip_code
0,123,John Doe,A+,peanuts,gAAAAABoVZJOtupxgzyuIpOPnICo93b8obKr3DXUmIYP47...,35,12345



3. Research-Ready Anonymized Data:


Unnamed: 0,patient_id,name,blood_type,allergies,hiv_status,age,zip_code
0,123,John Doe,A+,peanuts,negative,29.912072,123XX


Save encrypted data

In [8]:
# Save encrypted data
encrypted_data.to_csv("encrypted_patients.csv", index=False)

# Save consent logs
import json
with open("consent_logs.json", "w") as f:
    json.dump(engine.consent_log, f)