<a href="https://colab.research.google.com/github/TOM-BOHN/SFDC-User-Permissions-AI/blob/main/Notebooks/SFDC_User_Permission_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup

### Install Repo and Packages


In [1]:
import os

repo_url = "https://github.com/TOM-BOHN/SFDC-User-Permissions-AI.git"
repo_dir = "SFDC-User-Permissions-AI"  # Directory name for the cloned repo

if os.path.exists(repo_dir):
  # If the directory exists, pull the latest changes
  print(f"Repository '{repo_dir}' already exists. Pulling latest changes...")
  !git pull

else:
  # If the directory doesn't exist, clone the repo
  print(f"Cloning repository '{repo_dir}'...")
  !git clone {repo_url}

Cloning repository 'SFDC-User-Permissions-AI'...
Cloning into 'SFDC-User-Permissions-AI'...
remote: Enumerating objects: 408, done.[K
remote: Counting objects: 100% (84/84), done.[K
remote: Compressing objects: 100% (57/57), done.[K
remote: Total 408 (delta 40), reused 65 (delta 26), pack-reused 324 (from 1)[K
Receiving objects: 100% (408/408), 1.38 MiB | 5.32 MiB/s, done.
Resolving deltas: 100% (210/210), done.


In [2]:
# Install the Python SDK for google gen ai
!pip install -Uq "google-genai==1.7.0"

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/144.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.7/144.7 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h

### Add Required Libraries and Tools

In [3]:
from google import genai
from google.genai import types

from IPython.display import Markdown, display

genai.__version__

###################################

import sys
import os
import io
from pprint import pprint
import glob
import time
from datetime import datetime

import enum
import json

import pandas as pd

###################################

os.chdir('/content/SFDC-User-Permissions-AI')

###################################

# Import the processing functions
from src.processing import extract_json_fields
from src.utils.data_utils import save_data
from src.llms import (
    # Chat session management
    create_chat_session,

    # Category evaluation
    category_eval_summary,
    CategoryRating,
    CategoryLabel,
    classify_category,

    # Risk evaluation
    risk_eval_summary,
    RiskRating,
    classify_risk_rating,

    # Cloud evaluation
    cloud_eval_summary,
    CloudRating,
    classify_cloud,

    # Description evaluation
    description_eval_summary,
    QualityRating,
    classify_description
)



###################################

# Import the scraping functions
from src.scraping import (
    extract_permission_data,
    clean_permission_data,
    save_permission_data,
    scrape_permissions_from_file
)

### Setup the API key and Client

To run the following cell, your API key must be stored it in a Google secret named `GOOGLE_API_KEY`.

If you don't already have an API key, you can grab one from [AI Studio](https://aistudio.google.com/app/apikey). You can find [detailed instructions in the docs](https://ai.google.dev/gemini-api/docs/api-key).

In [4]:
# To run the following cell, your API key must be stored it in a [Google secret] named `GOOGLE_API_KEY`.
from google.colab import userdata
# Create the client
client = genai.Client(api_key=userdata.get('GOOGLE_API_KEY'))

### Automated Retry Functionality

In [5]:
# This codelab sends a lot of requests, so set up an automatic retry
# that ensures your requests are retried when per-minute quota is reached.
from google.api_core import retry

is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})

if not hasattr(genai.models.Models.generate_content, '__wrapped__'):
  genai.models.Models.generate_content = retry.Retry(
      predicate=is_retriable)(genai.models.Models.generate_content)

# Scrape the Permissions

In [6]:
html_file_paths = [
    '/content/SFDC-User-Permissions-AI/data/raw/perm_sets_app_perms_salesforce_00DgK000001iK7J.mhtml',
    '/content/SFDC-User-Permissions-AI/data/raw/perm_sets_system_perms_salesforce_00DgK000001iK7J.mhtml',
    '/content/SFDC-User-Permissions-AI/data/raw/perm_sets_app_perms_salesforce_00DHu000002irdp.mhtml',
    '/content/SFDC-User-Permissions-AI/data/raw/perm_sets_system_perms_salesforce_00DHu000002irdp.mhtml',
]
df = scrape_permissions_from_file(
    html_file_paths  = html_file_paths
  , output_path = None
)

Processing /content/SFDC-User-Permissions-AI/data/raw/perm_sets_app_perms_salesforce_00DgK000001iK7J.mhtml...
Processing /content/SFDC-User-Permissions-AI/data/raw/perm_sets_system_perms_salesforce_00DgK000001iK7J.mhtml...
Processing /content/SFDC-User-Permissions-AI/data/raw/perm_sets_app_perms_salesforce_00DHu000002irdp.mhtml...
Processing /content/SFDC-User-Permissions-AI/data/raw/perm_sets_system_perms_salesforce_00DHu000002irdp.mhtml...

Found total of 866 permissions across 4 files.
Number of blank Descriptions: 9
Number of blank API Names: 0

First few permissions:
                                     Permission Name  \
0                        Access Conversation Entries   
1  Access Virtual Desktop Infrastructure for Voic...   
2                      Agentforce Service Agent User   
3                 Agent Initiated Outbound Messaging   
4                                Configure Messaging   

                      API Name Permission Requirement  \
0                  CanAcces

In [7]:
df.head(100)

Unnamed: 0,Permission Name,API Name,Permission Requirement,Description
0,Access Conversation Entries,CanAccessCE,,Grants users access to Conversation Entries
1,Access Virtual Desktop Infrastructure for Voic...,CitrixVDIPlatform,,Grant users virtual desktop (VDI) support to c...
2,Agentforce Service Agent User,AgentforceServiceAgentUser,,Analyze topics and perform actions as an auton...
3,Agent Initiated Outbound Messaging,LMOutboundMessagingUserPerm,,Initiate messaging with customers.
4,Configure Messaging,ConfigureLiveMessage,,
...,...,...,...,...
95,Access to manage the template framework apps,AppFrameworkManageApp,,"Allows access to create, update, and manage ap..."
96,Access to manage the template framework templates,AppFrameworkManageTemplate,,"Allows access to create, update, and manage ap..."
97,Access to View-Only Licensed Templates and Apps,ViewOnlyEmbeddedAppUser,,View apps based on View-Only License.
98,Access to view templated apps and assets,AppFrameworkViewApp,,"Allows access to app, analytics, data, and oth..."


# Import Source Data

In [None]:
# Load the input table from the source
#url = "https://raw.githubusercontent.com/TOM-BOHN/SFDC-User-Permissions-AI/refs/heads/main/data/input/User_Permission_Reference_Data__Sample.csv"
url = "https://raw.githubusercontent.com/TOM-BOHN/SFDC-User-Permissions-AI/refs/heads/main/data/input/user_permission_reference_data__full_list.csv"
perm_list_df = pd.read_csv(url)
if "Expanded Description" not in perm_list_df.columns:
    perm_list_df["Expanded Description"] = ""
print(f"Number of records: {len(perm_list_df)}")
perm_list_df.head()

: 

# Expanded Description Creation

In [None]:
# Load the promt from the source file
with open('/content/SFDC-User-Permissions-AI/src/prompts/templates/prompt_user_perm_description.md', 'r') as f:
    PROMPT_USER_PERM_DESCRIPTION = f.read()
# Display the prompt
print(PROMPT_USER_PERM_DESCRIPTION)

In [None]:
# Create a new chat session
#description_chat_session = create_chat_session(client = client, model_name='gemini-2.0-flash')

# Evaluate a single permission
description_text_eval, description_struct_eval, full_fidelity_eval = description_eval_summary(
    prompt=PROMPT_USER_PERM_DESCRIPTION,
    name=perm_list_df['Permission Name'][0],
    api_name=perm_list_df['API Name'][0],
    description=perm_list_df['Description'][0],
    model_name='gemini-2.0-flash',
    client=client,
    chat_session=None  # Create a new session,
    debug=True
)

# Display the result
print(description_text_eval)
print(f"Description: {description_struct_eval.name} ({description_struct_eval.value})")
display(Markdown(full_fidelity_eval)) 

In [None]:
# Execute a batch of classifying the inputs using the template
description_results_df = classify_description(
      input_df = perm_list_df
    , prompt = PROMPT_USER_PERM_DESCRIPTION
    , checkpoint_dir = "data/checkpoints"
    , job_id = None
    , resume_from_checkpoint = False
    , model_name = 'gemini-2.0-flash'
    , client = client
    , chat_session = None
    , total_records = 5
    , checkin_interval = 60
    , checkpoint_interval = 20
    , debug = True
    , verbose = False
  )
description_results_df

In [None]:
# Preview a record from the raw output
description_results_df['Evaluation'][1]

In [None]:
# Convert the JSON text output to columns
description_df = extract_json_fields(
    description_results_df
  , json_column='Evaluation'
  , fields = {
      'expanded_description': 'Expanded Description',
      'salesforce_feature': 'Salesforce Feature',
      'salesforce_cloud': 'Salesforce Cloud',
      'quality_score_label': 'Quality Score Label',
      'quality_score_value': 'Quality Score Value',
      'weighted_quality_score': 'Weighted Quality Score',
      'scores': 'Scores',
      'rationale': 'Rationale',
      'confidence': 'Confidence',
      'top_urls': 'Top URLs'
  }
  , debug = True
)

In [None]:
# Preview a record from the parsed output table
description_df.iloc[0].to_dict()

In [None]:
# Save the results DataFrame
save_data(
    data=description_df,
    filename='description_output_'+str(datetime.now().strftime("%Y%m%d_%H%M%S")),  # This will save to data/output/
    data_type='output',  # This will save to data/output/
    format='csv',
    index=False
)

In [None]:
# Find all matching files
output_files = glob.glob('data/output/description_output_*.csv')
if output_files:
    # Get the most recent file (by modification time)
    latest_file = max(output_files, key=os.path.getmtime)
    print(f"Loading: {latest_file}")
    description_df = pd.read_csv(latest_file)
else:
    print("No description_output file found.")
    description_df = None


if description_df is not None:
    merged_df = perm_list_df.merge(
        description_df[["API Name", "Expanded Description", "Salesforce Feature", "Salesforce Cloud"]],
        on="API Name",
        how="left",
        suffixes=('', '_desc')
    )
    # Overwrite the columns with the merged values (if present)
    for col in ["Expanded Description", "Salesforce Feature", "Salesforce Cloud"]:
        desc_col = f"{col}_desc"
        if desc_col in merged_df.columns:
            merged_df[col] = merged_df[desc_col].combine_first(merged_df[col])
            merged_df.drop(columns=[desc_col], inplace=True)
    perm_list_df = merged_df
else:
    print("Descriptions not loaded; perm_list_df unchanged.")

print(f"Number of records: {len(perm_list_df)}")
perm_list_df.head()

# Permission Risk Rating

In [9]:
# Load the promt from the source file
with open('/content/SFDC-User-Permissions-AI/src/prompts/templates/prompt_user_perm_risk_rating.md', 'r') as f:
    PROMPT_USER_PERM_RISK_RATING = f.read()
# Display the prompt
print(PROMPT_USER_PERM_RISK_RATING)

<!---
# Permission Risk Evaluation Prompt Template  
# --------------------------------------------------
# This template can be imported and formatted with the specific
# `permission_name` and `permission_api_name` and `permission_description` variables to create
# a concrete evaluation prompt for any Salesforce permission.
# --------------------------------------------------
-->

# Instruction

You are a **Salesforce security risk assessor**.
Your task is to evaluate the **inherent risk level** of a Salesforce permission (or capability) when granted to a user.
We will provide you with the permission name and a short description of what it allows.
Analyze the permission against the **Evaluation Criteria** below and assign one of the five **Risk Levels** defined in the Rating Rubric.
Give step‑by‑step reasoning for your decision, citing the specific criteria that most influenced your rating.

# Evaluation

## Metric Definition

- **Permission Risk** [aka weighted_score] measures the po

In [10]:
words = len(PROMPT_USER_PERM_RISK_RATING.split())
chars = len(PROMPT_USER_PERM_RISK_RATING)
print(f"\nPrompt Template Statistics:")
print(f"Total words: {words}")
print(f"Total characters: {chars}")


Prompt Template Statistics:
Total words: 897
Total characters: 7473


In [11]:
# Create a new chat session
#risk_chat_session = create_chat_session(client = client, model_name='gemini-2.0-flash')

# Evaluate a single permission
risk_text_eval, risk_struct_eval = risk_eval_summary(
    prompt=PROMPT_USER_PERM_RISK_RATING,
    name=perm_list_df['Permission Name'][0],
    api_name=perm_list_df['API Name'][0],
    description=perm_list_df['Description'][0],
    expanded_description=perm_list_df['Expanded Description'][0],
    model_name='gemini-2.0-flash',
    client=client,
    chat_session=None  # Create a new session
)

# Display the result
print(risk_text_eval)
print(f"Risk Rating: {risk_struct_eval.name} ({risk_struct_eval.value})")

```json
{
  "risk_rating_tier": "Controlled",
  "risk_rating_score": "2",
  "weighted_score": 2.0,
  "scores": {
    "Data_Sensitivity": 2,
    "Scope_of_Impact": 2,
    "Configurational_Authority": 2,
    "External_Data_Exposure": 1,
    "Regulatory_Obligation": 2,
    "Segregation_of_Duties": 2,
    "Auditability": 2,
    "Reversibility": 2
  },
  "rationale": "Granting user access activations introduces a moderate level of risk, primarily related to data sensitivity and scope of impact. Activating user access inherently involves sensitive data, as it pertains to who can access what within the system. The scope of impact is moderate, affecting user permissions and potentially the data they can access. While there's potential for misuse, the limited configurational authority and external data exposure reduce the overall risk, making it suitable for a controlled environment with active monitoring.",
  "confidence": "High"
}
```
Risk Rating: CONTROLLED (2)


In [12]:
# Execute a batch of classifying the inputs using the template
risk_results_df = classify_risk_rating(
      input_df = perm_list_df
    , prompt = PROMPT_USER_PERM_RISK_RATING
    , checkpoint_dir = "data/checkpoints"
    , job_id = None
    , resume_from_checkpoint = False
    , model_name = 'gemini-2.0-flash'
    , client = client
    , chat_session = None
    , total_records = 5
    , checkin_interval = 60
    , checkpoint_interval = 20
    , debug = True
    , verbose = False
  )
risk_results_df

Starting job 20250502_203930 to process 5 records.
####################



  results_df = pd.concat([results_df, new_row], ignore_index=True)



####################
Total time taken: 11.86 seconds to process 5 records.
Average time per record: 2.37 seconds


Unnamed: 0,Permission Name,API Name,Description,Risk Rating,Evaluation,Processing Time
0,Allows user access activations,AccessActivation,Allows user access activations.,RiskRating.GENERAL,"```json\n{\n ""risk_rating_tier"": ""General"",\n...",2.33
1,Access Application Form-Related Functionality,AccessApplicationFormObjects,Grants users access to application form-relate...,RiskRating.CONTROLLED,"```json\n{\n ""risk_rating_tier"": ""Controlled""...",2.31
2,Manage Budgets,AccessBudgetManagement,Lets users view and manage budget management r...,RiskRating.GENERAL,"```json\n{\n ""risk_rating_tier"": ""Controlled""...",2.26
3,Access Data Cloud Data Explorer,AccessCdpDataExplorer,Allows user access Data Cloud Data Explorer.,RiskRating.SENSITIVE,"```json\n{\n ""risk_rating_tier"": ""Sensitive"",...",2.48
4,Access Data Cloud Profile Explorer,AccessCdpProfileExplorer,Allows user access Data Cloud Profile Explorer.,RiskRating.SENSITIVE,"```json\n{\n ""risk_rating_tier"": ""Sensitive"",...",2.47


In [13]:
# Preview a record from the raw output
risk_results_df['Evaluation'][1]

'```json\n{\n  "risk_rating_tier": "Controlled",\n  "risk_rating_score": "2",\n  "weighted_score": 2.2,\n  "scores": {\n    "Data_Sensitivity": 2,\n    "Scope_of_Impact": 3,\n    "Configurational_Authority": 1,\n    "External_Data_Exposure": 2,\n    "Regulatory_Obligation": 2,\n    "Segregation_of_Duties": 2,\n    "Auditability": 3,\n    "Reversibility": 2\n  },\n  "rationale": "The \'Access Application Form-Related Functionality\' permission grants access to potentially sensitive application form data, impacting the scope of records a user can view or modify. There\'s a moderate risk of violating regulatory obligations (like data privacy) if the application forms contain PII. However, the configurational authority is low, and data exposure is limited, suggesting a controlled risk level. Good auditability and reversibility further mitigate the risk.",\n  "confidence": "High"\n}\n```'

In [14]:
# Convert the JSON text output to columns
risk_rating_df = extract_json_fields(
    risk_results_df
  , json_column='Evaluation'
  , debug = True
)


First 5 rows of processed data:


Unnamed: 0,Permission Name,API Name,Description,Risk Rating,Evaluation,Processing Time,Risk Rating Tier,Risk Rating Score,Weighted Score,Scores,Rationale,Confidence
0,Allows user access activations,AccessActivation,Allows user access activations.,RiskRating.GENERAL,"{ ""risk_rating_tier"": ""General"", ""risk_ratin...",2.33,General,1,1.0,"{'Data_Sensitivity': 1, 'Scope_of_Impact': 1, ...",The permission 'Allows user access activations...,High
1,Access Application Form-Related Functionality,AccessApplicationFormObjects,Grants users access to application form-relate...,RiskRating.CONTROLLED,"{ ""risk_rating_tier"": ""Controlled"", ""risk_ra...",2.31,Controlled,2,2.2,"{'Data_Sensitivity': 2, 'Scope_of_Impact': 3, ...",The 'Access Application Form-Related Functiona...,High
2,Manage Budgets,AccessBudgetManagement,Lets users view and manage budget management r...,RiskRating.GENERAL,"{ ""risk_rating_tier"": ""Controlled"", ""risk_ra...",2.26,Controlled,2,2.2,"{'Data_Sensitivity': 3, 'Scope_of_Impact': 2, ...",The `Manage Budgets` permission allows users t...,High
3,Access Data Cloud Data Explorer,AccessCdpDataExplorer,Allows user access Data Cloud Data Explorer.,RiskRating.SENSITIVE,"{ ""risk_rating_tier"": ""Sensitive"", ""risk_rat...",2.48,Sensitive,3,2.7,"{'Data_Sensitivity': 3, 'Scope_of_Impact': 3, ...",Access to Data Cloud Data Explorer could expos...,High
4,Access Data Cloud Profile Explorer,AccessCdpProfileExplorer,Allows user access Data Cloud Profile Explorer.,RiskRating.SENSITIVE,"{ ""risk_rating_tier"": ""Sensitive"", ""risk_rat...",2.47,Sensitive,3,2.6,"{'Data_Sensitivity': 3, 'Scope_of_Impact': 2, ...",Accessing Data Cloud Profile Explorer poses a ...,High



Columns added: ['Risk Rating Tier', 'Risk Rating Score', 'Weighted Score', 'Scores', 'Rationale', 'Confidence']


In [15]:
# Preview a record from the parsed output table
risk_rating_df.iloc[0].to_dict()

{'Permission Name': 'Allows user access activations',
 'API Name': 'AccessActivation',
 'Description': 'Allows user access activations.',
 'Risk Rating': <RiskRating.GENERAL: '1'>,
 'Evaluation': '{  "risk_rating_tier": "General",  "risk_rating_score": "1",  "weighted_score": 1.0,  "scores": {    "Data_Sensitivity": 1,    "Scope_of_Impact": 1,    "Configurational_Authority": 1,    "External_Data_Exposure": 1,    "Regulatory_Obligation": 1,    "Segregation_of_Duties": 1,    "Auditability": 1,    "Reversibility": 1  },  "rationale": "The permission \'Allows user access activations\' seems to grant basic access, likely related to activating user accounts or certain features. It doesn\'t inherently expose sensitive data, grant broad configurational authority, or create external data exposure. Regulatory and segregation of duties risks appear minimal, as does the potential for irreversible actions. Overall, the potential impact seems low.",  "confidence": "High"}',
 'Processing Time': 2.33,

In [16]:
# Save the results DataFrame
save_data(
    data=risk_rating_df,
    filename='risk_rating_output_'+str(datetime.now().strftime("%Y%m%d_%H%M%S")),  # This will save to data/output/
    data_type='output',  # This will save to data/output/
    format='csv',
    index=False
)



'data/output/risk_rating_output_20250502_203942.csv'

# Permission Category Classification

In [17]:
# Load the promt from the source file
with open('/content/SFDC-User-Permissions-AI/src/prompts/templates/prompt_user_perm_category.md', 'r') as f:
    PROMPT_USER_PERM_CATEGORY = f.read()
# Display the prompt
print(PROMPT_USER_PERM_CATEGORY)

<!---
# Permission Category Evaluation Prompt Template  
# --------------------------------------------------
# This template can be imported and formatted with the specific
# `permission_name` and `permission_api_name` and `permission_description` variables to create
# a concrete evaluation prompt for any Salesforce permission.
# --------------------------------------------------
-->

# Instruction

You are a **Salesforce security risk assessor**.
Your task is to categorize user security permission int **permission categories**.
We will provide you with the permission name and a short description of what the Salesforce user permission (or capability) grants to a user.
Analyze the permission against the **Evaluation Criteria** below and assign one of the twenty **Permission Categories** defined based on similarity of the category and the permission.
Give step‑by‑step reasoning for your decision, citing the specific criteria that most influenced your categorization.

# Evaluation

## Me

In [18]:
# Create a new chat session
#category_chat_session = create_chat_session(client = client, model_name='gemini-2.0-flash')

# Evaluate a single permission
cat_text_eval, structured_cat_rating, structured_cat_label = category_eval_summary(
    prompt=PROMPT_USER_PERM_CATEGORY,
    name=perm_list_df['Permission Name'][0],
    api_name=perm_list_df['API Name'][0],
    description=perm_list_df['Description'][0],
    expanded_description=perm_list_df['Expanded Description'][0],
    model_name='gemini-2.0-flash',
    client=client,
    chat_session=None  # Create a new session
)

# Display the result
print(cat_text_eval)
print(f"Category Rating: {structured_cat_rating.name} ({structured_cat_rating.value})")
print(f"Category Label: {structured_cat_label.name} ({structured_cat_label.value})")

```json
{
  "permission_category_label": "User Management Admin",
  "permission_category_order": "3",
  "match_rating_tier": "High Match",
  "match_rating_score": "4",
  "weighted_match_score": 4.2,
  "scores": {
    "Primary Product or Feature Anchor": 4,
    "Administrative vs End-User Function": 5,
    "Data Interaction Pattern": 2,
    "Platform Layer or Add-On Alignment": 2,
    "Intended User Persona or Business Process": 5
  },
  "rationale": "The permission to 'Allows user access activations' strongly aligns with User Management Admin, as it directly relates to controlling user access within the Salesforce environment. This category includes managing user lifecycles and access control, and activation clearly falls within that scope. The permission is administrative in nature, not end-user, since it is involved in managing user accounts and their access. The other categories do not align closely with the concept of user activation.",
  "confidence": "High"
}
```
Category Rating:

In [19]:
# Execute a batch of classifying the inputs using the template
category_results_df = classify_category(
      input_df = perm_list_df
    , prompt = PROMPT_USER_PERM_CATEGORY
    , checkpoint_dir = "data/checkpoints"
    , job_id = None
    , resume_from_checkpoint = False
    , model_name = 'gemini-2.0-flash'
    , client = client
    , chat_session = None
    , total_records = 5
    , checkin_interval = 60
    , checkpoint_interval = 20
    , debug = True
    , verbose = False
  )
category_results_df

Starting job 20250502_203945 to process 5 records.
####################



  results_df = pd.concat([results_df, new_row], ignore_index=True)



####################
Total time taken: 14.71 seconds to process 5 records.
Average time per record: 2.94 seconds


Unnamed: 0,Permission Name,API Name,Description,Category Rating,Category Label,Evaluation,Processing Time
0,Allows user access activations,AccessActivation,Allows user access activations.,CategoryRating.EXACT_MATCH,CategoryLabel.GENERAL_ADMIN,"```json\n{\n ""permission_category_label"": ""Us...",3.04
1,Access Application Form-Related Functionality,AccessApplicationFormObjects,Grants users access to application form-relate...,CategoryRating.UNKNOWN,CategoryLabel.USER_INTERFACE,"```json\n{\n ""permission_category_label"": ""Ge...",3.04
2,Manage Budgets,AccessBudgetManagement,Lets users view and manage budget management r...,CategoryRating.EXACT_MATCH,CategoryLabel.GENERAL_ADMIN,"```json\n{\n ""permission_category_label"": ""Fi...",3.0
3,Access Data Cloud Data Explorer,AccessCdpDataExplorer,Allows user access Data Cloud Data Explorer.,CategoryRating.HIGH_MATCH,CategoryLabel.GENERAL_ADMIN,"```json\n{\n ""permission_category_label"": ""Da...",2.75
4,Access Data Cloud Profile Explorer,AccessCdpProfileExplorer,Allows user access Data Cloud Profile Explorer.,CategoryRating.UNKNOWN,CategoryLabel.GENERAL_INDUSTRIES_CLOUD,"```json\n{\n ""permission_category_label"": ""Da...",2.87


In [20]:
# Preview a record from the raw output
category_results_df['Evaluation'][1]

'```json\n{\n  "permission_category_label": "General Admin",\n  "permission_category_order": "1",\n  "match_rating_tier": "Low Match",\n  "match_rating_score": "2",\n  "weighted_match_score": 2.6,\n  "scores": {\n    "Primary Product or Feature Anchor": 3,\n    "Administrative vs End-User Function": 3,\n    "Data Interaction Pattern": 2,\n    "Platform Layer or Add-On Alignment": 2,\n    "Intended User Persona or Business Process": 3\n  },\n  "rationale": "The permission name \\"Access Application Form-Related Functionality\\" hints at broader administrative functionality within Salesforce. While \'application form-related objects\' could exist in multiple areas, the broad access suggests some org-wide settings or configuration. The administrative versus end-user function and primary feature alignment criteria are the most influential here, suggesting some administrative control over application forms.",\n  "confidence": "Low"\n}\n```'

In [21]:
# Convert the JSON text output to columns
category_df = extract_json_fields(
    category_results_df
  , json_column='Evaluation'
  , fields = {
      'permission_category_label': 'Permission Category Label',
      'permission_category_order': 'Permission Category Order',
      'match_rating_tier': 'Match Rating Tier',
      'match_rating_score': 'Match Rating Score',
      'weighted_match_score': 'Weighted Match Score',
      'scores': 'Scores',
      'rationale': 'Rationale',
      'confidence': 'Confidence'
  }
  , debug = True
)


First 5 rows of processed data:


Unnamed: 0,Permission Name,API Name,Description,Category Rating,Category Label,Evaluation,Processing Time,Permission Category Label,Permission Category Order,Match Rating Tier,Match Rating Score,Weighted Match Score,Scores,Rationale,Confidence
0,Allows user access activations,AccessActivation,Allows user access activations.,CategoryRating.EXACT_MATCH,CategoryLabel.GENERAL_ADMIN,"{ ""permission_category_label"": ""User Manageme...",3.04,User Management Admin,3,High Match,4,3.6,"{'Primary Product or Feature Anchor': 4, 'Admi...",The permission 'Allows user access activations...,High
1,Access Application Form-Related Functionality,AccessApplicationFormObjects,Grants users access to application form-relate...,CategoryRating.UNKNOWN,CategoryLabel.USER_INTERFACE,"{ ""permission_category_label"": ""General Admin...",3.04,General Admin,1,Low Match,2,2.6,"{'Primary Product or Feature Anchor': 3, 'Admi...","The permission name ""Access Application Form-R...",Low
2,Manage Budgets,AccessBudgetManagement,Lets users view and manage budget management r...,CategoryRating.EXACT_MATCH,CategoryLabel.GENERAL_ADMIN,"{ ""permission_category_label"": ""Financial Ser...",3.0,Financial Services Cloud,23,High Match,4,3.8,"{'Primary Product or Feature Anchor': 5, 'Admi...",The permission 'Manage Budgets' strongly align...,High
3,Access Data Cloud Data Explorer,AccessCdpDataExplorer,Allows user access Data Cloud Data Explorer.,CategoryRating.HIGH_MATCH,CategoryLabel.GENERAL_ADMIN,"{ ""permission_category_label"": ""Data Cloud"", ...",2.75,Data Cloud,12,Exact Match,5,5.0,"{'Primary Product or Feature Anchor': 5, 'Admi...",The permission name and description explicitly...,High
4,Access Data Cloud Profile Explorer,AccessCdpProfileExplorer,Allows user access Data Cloud Profile Explorer.,CategoryRating.UNKNOWN,CategoryLabel.GENERAL_INDUSTRIES_CLOUD,"{ ""permission_category_label"": ""Data Cloud"", ...",2.87,Data Cloud,12,Exact Match,5,5.0,"{'Primary Product or Feature Anchor': 5, 'Admi...",This permission directly references 'Data Clou...,High



Columns added: ['Permission Category Label', 'Permission Category Order', 'Match Rating Tier', 'Match Rating Score', 'Weighted Match Score', 'Scores', 'Rationale', 'Confidence']


In [22]:
# Preview a record from the parsed output table
category_df.iloc[0].to_dict()

{'Permission Name': 'Allows user access activations',
 'API Name': 'AccessActivation',
 'Description': 'Allows user access activations.',
 'Category Rating': <CategoryRating.EXACT_MATCH: '5'>,
 'Category Label': <CategoryLabel.GENERAL_ADMIN: '1'>,
 'Evaluation': '{  "permission_category_label": "User Management Admin",  "permission_category_order": "3",  "match_rating_tier": "High Match",  "match_rating_score": "4",  "weighted_match_score": 3.6,  "scores": {    "Primary Product or Feature Anchor": 4,    "Administrative vs End-User Function": 5,    "Data Interaction Pattern": 2,    "Platform Layer or Add-On Alignment": 1,    "Intended User Persona or Business Process": 5  },  "rationale": "The permission \'Allows user access activations\' strongly aligns with \'User Management Admin\'. The permission clearly relates to managing user access, which is a core function of the User Management Admin category. The description focuses on enabling activations, fitting the persona of an admin res

In [23]:
# Save the results DataFrame
save_data(
    data=category_df,
    filename='category_results_'+str(datetime.now().strftime("%Y%m%d_%H%M%S")),
    data_type='output',  # This will save to data/output/
    format='csv',
    index=False
)



'data/output/category_results_20250502_204000.csv'

# Permission Cloud Classification


In [None]:
# Load the promt from the source file
with open('/content/SFDC-User-Permissions-AI/src/prompts/templates/prompt_user_perm_cloud.md', 'r') as f:
    PROMPT_USER_PERM_CLOUD = f.read()
# Display the prompt
print(PROMPT_USER_PERM_CLOUD)

In [None]:
# Create a new chat session
#cloud_chat_session = create_chat_session(client = client, model_name='gemini-2.0-flash')

# Evaluate a single permission
cloud_text_eval, structured_cloud_rating, structured_cloud_label = cloud_eval_summary(
    prompt=PROMPT_USER_PERM_CLOUD,
    name=perm_list_df['Permission Name'][0],
    api_name=perm_list_df['API Name'][0],
    description=perm_list_df['Description'][0],
    expanded_description=perm_list_df['Expanded Description'][0],
    model_name='gemini-2.0-flash',
    client=client,
    chat_session=None  # Create a new session
)

# Display the result
print(cat_text_eval)
print(f"Cloud Rating: {structured_cloud_rating.name} ({structured_cloud_rating.value})")
print(f"Cloud Label: {structured_cloud_label.name} ({structured_cloud_label.value})")

In [None]:
# Execute a batch of classifying the inputs using the template
cloud_results_df = classify_cloud(
      input_df = perm_list_df
    , prompt = PROMPT_USER_PERM_CLOUD
    , checkpoint_dir = "data/checkpoints"
    , job_id = None
    , resume_from_checkpoint = False
    , model_name = 'gemini-2.0-flash'
    , client = client
    , chat_session = None
    , total_records = 5
    , checkin_interval = 60
    , checkpoint_interval = 20
    , debug = True
    , verbose = False
  )
cloud_results_df

In [None]:
# Preview a record from the raw output
cloud_results_df['Evaluation'][1]

In [None]:
# Convert the JSON text output to columns
cloud_df = extract_json_fields(
    cloud_results_df
  , json_column='Evaluation'
  , fields = {
      'permission_cloud_label': 'Permission Cloud Label',
      'permission_cloud_order': 'Permission Cloud Order',
      'match_rating_tier': 'Match Rating Tier',
      'match_rating_score': 'Match Rating Score',
      'weighted_match_score': 'Weighted Match Score',
      'scores': 'Scores',
      'rationale': 'Rationale',
      'confidence': 'Confidence'
  }
  , debug = True
)

In [None]:
# Preview a record from the parsed output table
cloud_df.iloc[0].to_dict()

In [None]:
# Save the results DataFrame
save_data(
    data=cloud_df,
    filename='cloud_results_'+str(datetime.now().strftime("%Y%m%d_%H%M%S")),
    data_type='output',  # This will save to data/output/
    format='csv',
    index=False
)