In [1]:
pip install sentence-transformers pandas

Collecting sentence-transformersNote: you may need to restart the kernel to use updated packages.

  Downloading sentence_transformers-3.4.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.4.1-py3-none-any.whl (275 kB)
   ---------------------------------------- 0.0/275.9 kB ? eta -:--:--
   - -------------------------------------- 10.2/275.9 kB ? eta -:--:--
   ---- ---------------------------------- 30.7/275.9 kB 445.2 kB/s eta 0:00:01
   ----------------- ---------------------- 122.9/275.9 kB 1.0 MB/s eta 0:00:01
   ---------------------------------------- 275.9/275.9 kB 1.7 MB/s eta 0:00:00
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-3.4.1


In [2]:
# PROMPT 1:

from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)

harmful_terms = {
    "abort": ["cancel", "stop", "halt", "end", "discontinue"],
    "terminate": ["end", "close", "stop", "discontinue", "conclude"],

    # User References
    "average user": ["typical user", "general user", "most users", "mainstream user"],

    # Testing Terminology
    "black box": ["closed box", "opaque system", "hidden implementation"],
    "white box": ["clear box", "transparent box", "open box", "visible implementation"],
    "black hat": ["malicious actor", "unauthorized hacker", "adversarial hacker"],
    "white hat": ["ethical hacker", "security researcher", "authorized tester"],
    "blacklist": ["blocklist", "deny list", "exclusion list"],
    "whitelist": ["allowlist", "permit list", "inclusion list"],
    "blind": ["anonymous", "unnamed", "unidentified"],
    "double blind": ["double anonymous", "dual anonymous"],

    # Hardware Terms
    "male connector": ["plug", "outward connector", "protruding connector"],
    "female connector": ["socket", "inward connector", "receptacle"],

    # Pronouns
    "she": "they",
    "her": "their",
    "hers": "theirs",
    "he": "they",
    "him": "them",
    "his": "their",

    # System Relationships
    "master": ["primary", "main", "leader", "controller", "host"],
    "slave": ["secondary", "replica", "follower", "worker", "client"],

    # Other Technical Terms
    "quantum supremacy": ["quantum advantage", "quantum computational advantage"],
    "grandfathered": ["legacy-approved", "previously exempt", "historically allowed"],
    "guys": ["everyone", "team", "folks", "all", "people"],
    "man hours": ["person hours", "labor hours", "work hours", "staff hours"],
    "sanity check": ["quick check", "basic check", "coherence check", "logic check"],
    "sanity test": ["quick test", "basic test", "coherence test", "logic test"],
    "dummy value": ["placeholder", "sample value", "test value", "mock value"],
    "scrum master": ["scrum facilitator", "agile coach", "scrum coordinator"],
    "mob programming": ["ensemble programming", "team programming", "collaborative programming"],
    "segregation": ["separation", "division", "partition", "isolation"],
    "blackout period": ["restricted period", "maintenance window", "downtime period"],
    "gray hat": ["semi-ethical hacker", "boundary hacker", "conditional hacker"],
    "native": ["built-in", "integrated", "core", "inherent"],
    "red team": ["attack team", "adversary team", "offensive team"],
    "web master": ["web administrator", "website manager", "web coordinator"],
    "white space": ["negative space", "empty space", "blank space"],
    "white team": ["oversight team", "evaluation team", "monitoring team"],
    "yellow team": ["build security team", "security development team"],
    "aboriginal": ["indigenous", "first nations", "first peoples"],

    # Workplace & Learning Terms
    "brown bags": ["lunch and learn", "knowledge sharing session", "informal learning session"],
    "first-class citizen": ["first-class entity", "first-class object", "core feature"],
    "man-in-the-middle": ["person-in-the-middle", "interception attack", "MITM attack"],
    "master branch": ["main branch", "primary branch", "default branch"],
    "minority": ["underrepresented group", "marginalized community"],
    "normal": ["expected", "standard", "typical", "default", "baseline"],
    "handicapped": ["person with disabilities", "accessible", "disability-friendly"],
    "crazy": ["unexpected", "surprising", "unconventional", "bold", "remarkable"],
    "OCD": ["detail-oriented", "thorough", "meticulous", "precise"],
    "culture fit": ["values alignment", "team compatibility", "organizational fit"],

    # Gendered Role Terms
    "chairman": ["chair", "chairperson", "meeting leader"],
    "foreman": ["supervisor", "team leader", "site manager"],
    "man": ["person", "individual", "human", "people"],
    "mankind": ["humanity", "humankind", "people", "human species"],
    "mans": ["operates", "staffs", "runs", "works"],
    "salesman": ["salesperson", "sales representative", "sales associate"],
    "manmade": ["artificial", "manufactured", "synthetic", "human-made"],
    "manpower": ["workforce", "staff", "personnel", "human resources"],

    # Networking Terms
    "demilitarized zone": ["perimeter network", "edge network", "buffer zone"],
    "demilitarized zones": ["perimeter networks", "edge networks", "buffer zones"],
    "DMZ": ["perimeter network", "edge network", "buffer zone"],
    "hang": ["freeze", "become unresponsive", "stop responding"],

    # Other Hardware/Software Terms
    "daughter board": ["expansion board", "secondary board", "auxiliary board"],
    "gender bender": ["adapter", "connector converter", "interface adapter"],
    "orphaned object": ["unreferenced object", "disconnected object", "isolated object"]
}


# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.",
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]

# Function to generate modified sentences with replacements
def generate_replacements(sentence, term, replacements):
    modified_sentences = []
    for replacement in replacements:
        modified_sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
        modified_sentences.append((replacement, modified_sentence))
    return modified_sentences

# Compute similarity scores for each replacement
results = []
for sentence in original_sentences:
    for term, replacements in harmful_terms.items():
        if re.search(rf'\b{re.escape(term)}\b', sentence, flags=re.IGNORECASE):
            modified_versions = generate_replacements(sentence, term, replacements)
            for replacement, modified_sentence in modified_versions:
                score = util.pytorch_cos_sim(model.encode(sentence, convert_to_tensor=True), 
                                             model.encode(modified_sentence, convert_to_tensor=True)).item()
                results.append({
                    "Original Sentence": sentence,
                    "Harmful Term": term,
                    "Replacement": replacement,
                    "Modified Sentence": modified_sentence,
                    "Similarity Score": score
                })

# Convert results to a DataFrame and display
df = pd.DataFrame(results)

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Harmful Term,Replacement,Modified Sentence,Similarity Score
44,The female connector allows for easy integrati...,female connector,receptacle,The receptacle allows for easy integration wit...,0.51021
42,The female connector allows for easy integrati...,female connector,socket,The socket allows for easy integration with ot...,0.573467
22,The company detected a black hat attempting to...,black hat,unauthorized hacker,The company detected a unauthorized hacker att...,0.594909
21,The company detected a black hat attempting to...,black hat,malicious actor,The company detected a malicious actor attempt...,0.610877
23,The company detected a black hat attempting to...,black hat,adversarial hacker,The company detected a adversarial hacker atte...,0.611928
129,A blackout period was enforced during the syst...,blackout period,maintenance window,A maintenance window was enforced during the s...,0.658593
53,Her contribution to the discussion was insight...,her,r,r contribution to the discussion was insightfu...,0.691604
20,The white box testing method allows developers...,white box,visible implementation,The visible implementation testing method allo...,0.693767
235,The server operates within the DMZ for added s...,DMZ,edge network,The server operates within the edge network fo...,0.694725
43,The female connector allows for easy integrati...,female connector,inward connector,The inward connector allows for easy integrati...,0.701439


In [3]:
df[["Harmful Term", "Replacement", "Similarity Score"]].to_csv("C:/Users/hana_/Downloads/ClaudeSonnet3.7-1_replacements.csv", index=False)

In [6]:
# PROMPT 2:

from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)
harmful_terms = {
    "abort": ["cancel", "stop", "halt", "end", "discontinue"],
    "terminate": ["end", "close", "stop", "conclude", "exit"],
    "average user": ["typical user", "general user", "most users", "target user"],
    "black box": ["closed box", "opaque system", "hidden implementation"],
    "white box": ["clear box", "transparent box", "open box", "visible implementation"],
    "black hat": ["malicious actor", "unauthorized hacker", "threat actor"],
    "white hat": ["ethical hacker", "security researcher", "authorized tester"],
    "blacklist": ["blocklist", "deny list", "exclusion list"],
    "whitelist": ["allowlist", "permit list", "inclusion list"],
    "blind": ["anonymous", "unnamed", "unidentified"],
    "double blind": ["double anonymous", "dual anonymous"],
    "male connector": ["plug", "outward connector", "protruding connector"],
    "female connector": ["socket", "inward connector", "receptacle"],
    "she": "they",
    "her": "their",
    "hers": "theirs",
    "he": "they",
    "him": "them",
    "his": "their",
    "master": ["primary", "main", "leader", "controller", "host"],
    "slave": ["secondary", "follower", "worker", "client", "replica"],
    "quantum supremacy": ["quantum advantage", "quantum computational advantage"],
    "grandfathered": ["legacy-approved", "previously exempt", "historically allowed"],
    "guys": ["everyone", "team", "folks", "all", "people", "colleagues"],
    "man hours": ["person hours", "labor hours", "work hours", "staff hours"],
    "sanity check": ["quick check", "basic check", "coherence check", "logic check"],
    "sanity test": ["quick test", "basic test", "coherence test", "logic test"],
    "dummy value": ["placeholder", "sample value", "test value", "mock value"],
    "scrum master": ["scrum facilitator", "agile coach", "scrum coordinator"],
    "mob programming": ["ensemble programming", "team programming", "collaborative programming"],
    "segregation": ["separation", "division", "partition, Isolation"],
    "blackout period": ["restricted period", "maintenance window", "downtime period"],
    "gray hat": ["semi-ethical hacker", "boundary hacker"],
    "native": ["built-in", "integrated", "core", "inherent"],
    "red team": ["attack team", "adversary team", "offensive team"],
    "web master": ["web administrator", "site manager", "web coordinator"],
    "white space": ["negative space", "empty space", "blank space"],
    "white team": ["oversight team", "evaluation team", "monitoring team"],
    "yellow team": ["build security team", "security development team"],
    "aboriginal": ["indigenous", "first nations", "first peoples"],
    "brown bags": ["lunch and learn sessions", "knowledge sharing sessions"],
    "brown bag": ["lunch and learn session", "knowledge sharing session"],
    "first-class citizen": ["first-class entity", "first-class object", "core feature"],
    "first-class citizens": ["first-class entities", "first-class objects", "core features"],
    "man-in-the-middle": ["person-in-the-middle", "interception attack", "MITM attack"],
    "master branch": ["main branch", "primary branch", "default branch"],
    "minority": ["underrepresented group", "marginalized community"],
    "normal": ["expected", "standard", "typical", "default", "baseline"],
    "handicapped": ["person with disabilities", "accessible", "disability-friendly"],
    "crazy": ["unexpected", "surprising", "unconventional", "bold"],
    "OCD": ["detail-oriented", "thorough", "meticulous", "precise"],
    "culture fit": ["values alignment", "team compatibility", "organizational fit"],
    "chairman": ["chair", "chairperson", "meeting leader"],
    "foreman": ["supervisor", "team leader", "site manager"],
    "man": ["person", "individual", "human", "people"],
    "mankind": ["humanity", "humankind", "people"],
    "mans": ["operates", "staffs", "runs", "works"],
    "salesman": ["salesperson", "sales representative", "sales associate"],
    "manmade": ["artificial", "manufactured", "synthetic", "human-made"],
    "manpower": ["workforce", "staff", "personnel", "human resources"],
    "demilitarized zone": ["perimeter network", "edge network", "buffer zone"],
    "demilitarized zones": ["perimeter networks", "edge networks", "buffer zones"],
    "DMZ": ["perimeter network", "edge network", "buffer zone"],
    "hang": ["freeze", "become unresponsive", "stop responding"],
    "daughter board": ["expansion board", "secondary board", "auxiliary board"],
    "gender bender": ["adapter", "connector converter", "interface adapter"],
    "orphaned object": ["unreferenced object", "disconnected object", "isolated object"]
}

# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.",
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]


# Function to generate modified sentences with replacements
def generate_replacements(sentence, term, replacements):
    modified_sentences = []
    for replacement in replacements:
        modified_sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
        modified_sentences.append((replacement, modified_sentence))
    return modified_sentences

# Compute similarity scores for each replacement
results = []
for sentence in original_sentences:
    for term, replacements in harmful_terms.items():
        if re.search(rf'\b{re.escape(term)}\b', sentence, flags=re.IGNORECASE):
            modified_versions = generate_replacements(sentence, term, replacements)
            for replacement, modified_sentence in modified_versions:
                score = util.pytorch_cos_sim(model.encode(sentence, convert_to_tensor=True), 
                                             model.encode(modified_sentence, convert_to_tensor=True)).item()
                results.append({
                    "Original Sentence": sentence,
                    "Harmful Term": term,
                    "Replacement": replacement,
                    "Modified Sentence": modified_sentence,
                    "Similarity Score": score
                })

# Convert results to a DataFrame and display
df = pd.DataFrame(results)

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Harmful Term,Replacement,Modified Sentence,Similarity Score
44,The female connector allows for easy integrati...,female connector,receptacle,The receptacle allows for easy integration wit...,0.51021
42,The female connector allows for easy integrati...,female connector,socket,The socket allows for easy integration with ot...,0.573467
22,The company detected a black hat attempting to...,black hat,unauthorized hacker,The company detected a unauthorized hacker att...,0.594909
23,The company detected a black hat attempting to...,black hat,threat actor,The company detected a threat actor attempting...,0.606656
21,The company detected a black hat attempting to...,black hat,malicious actor,The company detected a malicious actor attempt...,0.610877
163,Functions are treated as first-class citizens ...,first-class citizens,core features,Functions are treated as core features in many...,0.641356
129,A blackout period was enforced during the syst...,blackout period,maintenance window,A maintenance window was enforced during the s...,0.658593
159,The company hosted brown bag sessions to share...,brown bag,lunch and learn session,The company hosted lunch and learn session ses...,0.682555
160,The company hosted brown bag sessions to share...,brown bag,knowledge sharing session,The company hosted knowledge sharing session s...,0.687225
53,Her contribution to the discussion was insight...,her,r,r contribution to the discussion was insightfu...,0.691604


In [22]:
df[["Harmful Term", "Replacement", "Similarity Score"]].to_csv("C:/Users/hana_/Downloads/ClaudeSonnet3.7-2_replacements.csv", index=False)

In [13]:
# No replacements from prompt 3, model recognized that prompt was the same.