In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.metrics import classification_report
import joblib
import os
from cryptography.fernet import Fernet
import logging

# --- Security setup ---
# Key management: In production, store/load key securely (e.g., vault, environment variable)
key = Fernet.generate_key()
cipher = Fernet(key)

logging.basicConfig(filename='audit.log', level=logging.INFO)

def log_action(action):
    logging.info(action)

def encrypt_file(input_filename, output_filename, cipher):
    with open(input_filename, 'rb') as f:
        encrypted = cipher.encrypt(f.read())
    with open(output_filename, 'wb') as f:
        f.write(encrypted)
    log_action(f"Encrypted {input_filename} to {output_filename}")

def decrypt_file(input_filename, output_filename, cipher):
    with open(input_filename, 'rb') as f:
        decrypted = cipher.decrypt(f.read())
    with open(output_filename, 'wb') as f:
        f.write(decrypted)
    log_action(f"Decrypted {input_filename} to {output_filename}")

def save_model_secure(model, filename, cipher):
    temp = 'temp_model.pkl'
    joblib.dump(model, temp)
    encrypt_file(temp, filename, cipher)
    os.remove(temp)
    log_action(f"Model saved securely as {filename}")

def load_model_secure(filename, cipher):
    temp = 'temp_model.pkl'
    decrypt_file(filename, temp, cipher)
    model = joblib.load(temp)
    os.remove(temp)
    log_action(f"Model loaded securely from {filename}")
    return model

# --- 1. Data Load Security (PII check, logging) ---
log_action("Loading data from creditcard.csv")
df = pd.read_csv('/content/balanced_fraud_data.csv')

# PII Check (example columns, adapt as needed)
pii_columns = ['Name', 'SSN', 'Address', 'Phone', 'Email', 'DOB']
pii_found = [col for col in pii_columns if col in df.columns]
if pii_found:
    log_action(f"PII columns found and flagged: {pii_found}")
else:
    log_action("No PII columns found")

# --- 2. Preprocessing & Feature Engineering ---
df = df.dropna()
scaler = StandardScaler()
df['Amount_Norm'] = scaler.fit_transform(df[['Amount']])
df['Hour'] = (df['Time'] // 3600) % 24

X = df.drop(['Class', 'Time', 'Amount'], axis=1)
y = df['Class']

# --- 3. Data Split Security ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)
np.save('X_train.npy', X_train)
np.save('X_test.npy', X_test)
np.save('y_train.npy', y_train)
np.save('y_test.npy', y_test)

encrypt_file('X_train.npy', 'X_train_encrypted.npy', cipher)
encrypt_file('X_test.npy', 'X_test_encrypted.npy', cipher)
encrypt_file('y_train.npy', 'y_train_encrypted.npy', cipher)
encrypt_file('y_test.npy', 'y_test_encrypted.npy', cipher)
os.remove('X_train.npy')
os.remove('X_test.npy')
os.remove('y_train.npy')
os.remove('y_test.npy')

log_action("Data split and securely stored as encrypted files.")

# --- 4. Data Decryption for Training (simulate secure environment) ---
decrypt_file('X_train_encrypted.npy', 'X_train.npy', cipher)
decrypt_file('y_train_encrypted.npy', 'y_train.npy', cipher)
X_train = np.load('X_train.npy', allow_pickle=True)
y_train = np.load('y_train.npy', allow_pickle=True)
os.remove('X_train.npy')
os.remove('y_train.npy')
log_action("Training data decrypted for model training.")

# --- 5. Model Training Security (logging only, secure env assumed) ---
log_action("Training XGBoost model...")
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
log_action("Model training complete.")

# --- 6. Model Save Security ---
save_model_secure(xgb_model, 'xgboost_best_model_encrypted.pkl', cipher)

# --- 7. Model Load Security & Evaluation ---
decrypt_file('X_test_encrypted.npy', 'X_test.npy', cipher)
decrypt_file('y_test_encrypted.npy', 'y_test.npy', cipher)
X_test = np.load('X_test.npy', allow_pickle=True)
y_test = np.load('y_test.npy', allow_pickle=True)
os.remove('X_test.npy')
os.remove('y_test.npy')

xgb_loaded = load_model_secure('xgboost_best_model_encrypted.pkl', cipher)
y_pred = xgb_loaded.predict(X_test)
print(classification_report(y_test, y_pred))
log_action("Model loaded and evaluated securely on test data.")

Parameters: { "use_label_encoder" } are not used.



              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     13005
         1.0       0.93      0.79      0.86        34

    accuracy                           1.00     13039
   macro avg       0.97      0.90      0.93     13039
weighted avg       1.00      1.00      1.00     13039

