In [1]:
pip install sentence-transformers pandas

Collecting sentence-transformersNote: you may need to restart the kernel to use updated packages.

  Downloading sentence_transformers-3.4.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.4.1-py3-none-any.whl (275 kB)
   ---------------------------------------- 0.0/275.9 kB ? eta -:--:--
   - -------------------------------------- 10.2/275.9 kB ? eta -:--:--
   ---- ---------------------------------- 30.7/275.9 kB 445.2 kB/s eta 0:00:01
   ----------------- ---------------------- 122.9/275.9 kB 1.0 MB/s eta 0:00:01
   ---------------------------------------- 275.9/275.9 kB 1.7 MB/s eta 0:00:00
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-3.4.1


In [10]:
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)
harmful_terms = {
    "abort": "stop",
    "terminate": "cancel",
    "average user": "typical user",
    "black box": "opaque system",
    "white box": "transparent system",
    "black hat": "malicious actor",
    "white hat": "ethical hacker",
    "blacklist": "blocklist",
    "whitelist": "allowlist",
    "blind": "anonymous",
    "double blind": "double-anonymous",
    "male connector": "plug connector",
    "female connector": "socket connector",
    "she": "they",
    "her": "them",
    "hers": "theirs",
    "he": "they",
    "him": "them",
    "his": "their",
    "master": "primary",
    "slave": "replica",
    "quantum supremacy": "quantum advantage",
    "grandfathered": "exempted",
    "guys": "everyone",
    "man hours": "work hours",
    "sanity check": "quick check",
    "sanity test": "smoke test",
    "dummy value": "placeholder value",
    "scrum master": "scrum lead",
    "mob programming": "ensemble programming",
    "segregation": "separation",
    "blackout period": "restricted period",
    "gray hat": "ethical hacker",
    "native": "built-in",
    "red team": "offensive security team",
    "web master": "web administrator",
    "white space": "padding/margin",
    "white team": "oversight team",
    "yellow team": "optimization team",
    "aboriginal": "Indigenous",
    "brown bags": "lunch and learn sessions",
    "brown bag": "lunch and learn session",
    "first-class citizen": "core element",
    "first-class citizens": "core elements",
    "man-in-the-middle": "on-path attack",
    "master branch": "main branch",
    "minority": "underrepresented group",
    "normal": "standard",
    "handicapped": "people with disabilities",
    "crazy": "unconventional",
    "OCD": "perfectionist tendencies",
    "culture fit": "values alignment",
    "chairman": "chairperson",
    "foreman": "site supervisor",
    "man": "human",
    "mankind": "humanity",
    "mans": "crew",
    "salesman": "sales representative",
    "manmade": "artificial",
    "manpower": "workforce",
    "demilitarized zone (DMZ)": "perimeter zone",
    "demilitarized zone": "perimeter zone",
    "DMZ": "perimeter zone",
    "demilitarized zones": "perimeter zones",
    "hang": "freeze",
    "daughter board": "auxiliary board",
    "gender bender": "connector adapter",
    "orphaned object": "unused object"
}

# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.", 
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]

# Function to replace harmful terms using regular expressions
def replace_terms(sentence, replacements):
    for term, replacement in replacements.items():
        # Use case-insensitive matching and word boundaries
        sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
    return sentence
    
# Generate revised sentences
revised_sentences = [replace_terms(sentence, harmful_terms) for sentence in original_sentences]

# Compute SBERT similarity scores
similarity_scores = [
    util.pytorch_cos_sim(model.encode(orig, convert_to_tensor=True), 
                         model.encode(revised, convert_to_tensor=True)).item()
    for orig, revised in zip(original_sentences, revised_sentences)
]

# Create DataFrame
df = pd.DataFrame({
    "Original Sentence": original_sentences,
    "Revised Sentence": revised_sentences,
    "Similarity Score": similarity_scores
})

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Revised Sentence,Similarity Score
5,The company detected a black hat attempting to...,The company detected a malicious actor attempt...,0.610877
41,Functions are treated as first-class citizens ...,Functions are treated as core elements in many...,0.64498
12,The female connector allows for easy integrati...,The socket connector allows for easy integrati...,0.679458
40,The company hosted brown bag sessions to share...,The company hosted lunch and learn session ses...,0.682555
62,The adapter functions as a gender bender for c...,The adapter functions as a connector adapter f...,0.703491
48,His desk organization reflects a hint of OCD t...,their desk organization reflects a hint of per...,0.70853
29,The team opted for mob programming to tackle t...,The team opted for ensemble programming to tac...,0.721668
4,The white box testing method allows developers...,The transparent system testing method allows d...,0.722767
9,The paper underwent a blind review process to ...,The paper underwent a anonymous review process...,0.744736
51,The foreman supervised the construction site w...,The site supervisor supervised the constructio...,0.745977
