# Role Mining with Clustering

Discover implicit roles through access pattern clustering.

In [None]:
import sys
import os
sys.path.insert(0, os.path.abspath('..'))

from src.models.role_miner import RoleMiner
from src.data.preprocessors import IAMDataPreprocessor
import pandas as pd

# Load and preprocess data
df = pd.read_csv('../data/sample_iam_logs.csv')
preprocessor = IAMDataPreprocessor()
df = preprocessor.preprocess_for_training(df)

# Train role miner
print("Discovering roles...")
miner = RoleMiner(n_clusters=8)
miner.train(df, auto_tune_clusters=False)

# Get role summary
role_summary = miner.get_role_summary()
print("\nDiscovered Roles:")
print(role_summary)

# Role health analysis
health = miner.detect_role_explosion()
print("\nRole Health:")
for key, value in health.items():
    print(f"  {key}: {value}")

# Save model
miner.save('../models/trained/role_miner.joblib')