In [1]:
pip install sentence-transformers pandas

Note: you may need to restart the kernel to use updated packages.


In [4]:
# PROMPT 1:

from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)
harmful_terms = {
    "Abort": ["Halt", "Stop", "Exit"],
    "Terminate": ["End", "Cancel", "Stop"],
    "Average user": ["Typical user", "General user"],
    "Black box": ["Opaque system", "Unobservable system"],
    "White box": ["Transparent system", "Glass box testing"],
    "Black hat": ["Malicious hacker", "Unethical hacker"],
    "White hat": ["Ethical hacker", "Security researcher"],
    "Blacklist": ["Blocklist", "Denylist"],
    "Whitelist": ["Allowlist", "Approved list"],
    "Blind": ["Anonymous", "Unbiased"],
    "Double blind": ["Dual anonymous", "Impartial review"],
    "Male connector": ["Plug connector", "Pin connector"],
    "Female connector": ["Socket connector", "Receptacle connector"],
    "She": ["They"],
    "Her": ["Their"],
    "Hers": ["Theirs"],
    "He": ["They"],
    "Him": ["Them"],
    "His": ["Their"],
    "Master": ["Primary", "Main", "Leader"],
    "Slave": ["Secondary", "Replica", "Follower"],
    "Quantum supremacy": ["Quantum advantage", "Quantum superiority"],
    "Grandfathered": ["Legacy status", "Exempted"],
    "Guys": ["Everyone", "Team", "Folks"],
    "Man hours": ["Work hours", "Person hours"],
    "Sanity check": ["Quick check", "Validation check"],
    "Sanity test": ["Initial test", "Smoke test"],
    "Dummy value": ["Placeholder value", "Test value"],
    "Scrum master": ["Scrum lead", "Agile facilitator"],
    "Mob programming": ["Ensemble programming", "Collaborative programming"],
    "Segregation": ["Separation", "Division"],
    "Blackout period": ["Restricted period", "Downtime period"],
    "Gray hat": ["Ethical hacker", "Security researcher"],
    "Native": ["Built-in", "Default", "Integrated"],
    "Red team": ["Offensive security team", "Penetration testing team"],
    "Web master": ["Web administrator", "Site manager"],
    "White space": ["Blank space", "Open space"],
    "White team": ["Control team", "Oversight team"],
    "Yellow team": ["Security optimization team"],
    "Aboriginal": ["Indigenous"],
    "Brown bags": ["Lunch-and-learn sessions", "Informal learning sessions"],
    "Brown bag": ["Lunch-and-learn session", "Informal learning session"],
    "First-class citizen": ["Primary entity", "Core feature"],
    "First-class citizens": ["Primary entities", "Core features"],
    "Man-in-the-middle": ["Interception attack", "Relay attack"],
    "Master branch": ["Main branch", "Primary branch"],
    "Minority": ["Underrepresented group", "Marginalized group"],
    "Normal": ["Typical", "Standard", "Expected"],
    "Handicapped": ["Accessible", "Person with a disability"],
    "Crazy": ["Unconventional", "Unexpected", "Innovative"],
    "OCD": ["Detail-oriented", "Highly organized"],
    "Culture fit": ["Culture add", "Value alignment"],
    "Chairman": ["Chair", "Chairperson"],
    "Foreman": ["Supervisor", "Team lead"],
    "Man": ["Human", "Person"],
    "Mankind": ["Humanity", "Humankind"],
    "Mans": ["Operates", "Staffs"],
    "Salesman": ["Salesperson", "Sales representative"],
    "Manmade": ["Artificial", "Human-made", "Constructed"],
    "Manpower": ["Workforce", "Personnel", "Staffing"],
    "Demilitarized zone": ["Buffer zone", "Perimeter network"],
    "Demilitarized zones": ["Buffer zones", "Perimeter networks"],
    "DMZ": ["Network perimeter", "Security zone"],
    "Hang": ["Freeze", "Become unresponsive"],
    "Daughter board": ["Expansion board", "Auxiliary board"],
    "Gender bender": ["Gender adapter", "Connector adapter"],
    "Orphaned object": ["Unlinked object", "Unreferenced object"]
}

# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.",
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]

# Function to generate modified sentences with replacements
def generate_replacements(sentence, term, replacements):
    modified_sentences = []
    for replacement in replacements:
        modified_sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
        modified_sentences.append((replacement, modified_sentence))
    return modified_sentences

# Compute similarity scores for each replacement
results = []
for sentence in original_sentences:
    for term, replacements in harmful_terms.items():
        if re.search(rf'\b{re.escape(term)}\b', sentence, flags=re.IGNORECASE):
            modified_versions = generate_replacements(sentence, term, replacements)
            for replacement, modified_sentence in modified_versions:
                score = util.pytorch_cos_sim(model.encode(sentence, convert_to_tensor=True), 
                                             model.encode(modified_sentence, convert_to_tensor=True)).item()
                results.append({
                    "Original Sentence": sentence,
                    "Harmful Term": term,
                    "Replacement": replacement,
                    "Modified Sentence": modified_sentence,
                    "Similarity Score": score
                })

# Convert results to a DataFrame and display
df = pd.DataFrame(results)

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Harmful Term,Replacement,Modified Sentence,Similarity Score
13,The company detected a black hat attempting to...,Black hat,Unethical hacker,The company detected a Unethical hacker attemp...,0.584426
12,The company detected a black hat attempting to...,Black hat,Malicious hacker,The company detected a Malicious hacker attemp...,0.601477
27,The female connector allows for easy integrati...,Female connector,Receptacle connector,The Receptacle connector allows for easy integ...,0.627351
89,Functions are treated as first-class citizens ...,First-class citizens,Core features,Functions are treated as Core features in many...,0.641356
87,The company hosted brown bag sessions to share...,Brown bag,Informal learning session,The company hosted Informal learning session s...,0.674223
88,Functions are treated as first-class citizens ...,First-class citizens,Primary entities,Functions are treated as Primary entities in m...,0.678419
26,The female connector allows for easy integrati...,Female connector,Socket connector,The Socket connector allows for easy integrati...,0.679458
86,The company hosted brown bag sessions to share...,Brown bag,Lunch-and-learn session,The company hosted Lunch-and-learn session ses...,0.695995
141,The adapter functions as a gender bender for c...,Gender bender,Connector adapter,The adapter functions as a Connector adapter f...,0.703491
62,The team opted for mob programming to tackle t...,Mob programming,Ensemble programming,The team opted for Ensemble programming to tac...,0.721668


In [5]:
df[["Harmful Term", "Replacement", "Similarity Score"]].to_csv("C:/Users/hana_/Downloads/GPT4-1_replacements.csv", index=False)

In [6]:
# PROMPT 2:

from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)
harmful_terms = {
    "Abort": ["Stop", "Halt", "Cancel"],
    "Terminate": ["End", "Cancel", "Stop"],
    "Average user": ["Typical user", "General user", "Standard user"],
    "Black box": ["Opaque system", "Closed system"],
    "White box": ["Transparent system", "Open system"],
    "Black hat": ["Malicious hacker", "Unethical hacker"],
    "White hat": ["Ethical hacker", "Security researcher"],
    "Blacklist": ["Blocklist", "Denylist"],
    "Whitelist": ["Allowlist", "Safelist"],
    "Blind": ["Anonymous", "Unbiased"],
    "Double blind": ["Double-anonymous", "Fully blinded"],
    "Male connector": ["Plug connector", "Insert connector"],
    "Female connector": ["Socket connector", "Receptacle connector"],
    "She/Her/Hers/He/Him/His": ["They/Them"],
    "Master": ["Primary", "Main", "Leader"],
    "Slave": ["Secondary", "Replica", "Worker"],
    "Quantum supremacy": ["Quantum advantage", "Quantum breakthrough"],
    "Grandfathered": ["Legacy status", "Exempt from new rules"],
    "Guys": ["Everyone", "Team", "Folks", "All"],
    "Man hours": ["Person hours", "Work hours", "Effort hours"],
    "Sanity check": ["Quick check", "Logic check", "Coherence check"],
    "Sanity test": ["Smoke test", "Preliminary test"],
    "Dummy value": ["Placeholder value", "Sample value"],
    "Scrum master": ["Scrum lead", "Agile facilitator"],
    "Mob programming": ["Collaborative programming", "Team programming"],
    "Segregation": ["Separation", "Division"],
    "Blackout period": ["Restricted period", "Downtime"],
    "Gray hat": ["Ethical hacker", "Security researcher"],
    "Native (in software)": ["Built-in", "Integrated", "First-party"],
    "Red team": ["Offensive security team", "Penetration testing team"],
    "Web master": ["Website administrator", "Web administrator"],
    "White space": ["Blank space", "Empty space"],
    "White team": ["Oversight team", "Neutral team"],
    "Yellow team": ["Security optimization team"],
    "Aboriginal": ["Indigenous"],
    "Brown bags": ["Lunch-and-learn sessions", "Informal learning sessions"],
    "First-class citizen": ["Core feature", "Fundamental entity"],
    "Man-in-the-middle": ["Intermediary attack", "Network interception attack"],
    "Master branch": ["Main branch", "Primary branch"],
    "Minority": ["Underrepresented group"],
    "Normal": ["Typical", "Expected", "Standard"],
    "Handicapped": ["Accessible", "Person with a disability"],
    "Crazy": ["Unconventional", "Wild", "Bold"],
    "OCD": ["Detail-oriented", "Meticulous"],
    "Culture fit": ["Culture add", "Value alignment"],
    "Chairman": ["Chair", "Chairperson"],
    "Foreman": ["Supervisor", "Lead worker"],
    "Man": ["Humanity", "People", "Humans"],
    "Mankind": ["Humankind", "Humanity"],
    "Mans": ["Operates", "Runs", "Staffs"],
    "Salesman": ["Salesperson", "Sales representative"],
    "Manmade": ["Human-made", "Artificial", "Constructed"],
    "Manpower": ["Workforce", "Staffing", "Labor"],
    "Demilitarized zone": ["Perimeter network", "Buffer zone"],
    "DMZ": ["Perimeter network", "Secure zone"],
    "Hang (application)": ["Freeze", "Become unresponsive"],
    "Daughter board": ["Expansion board", "Auxiliary board"],
    "Gender bender": ["Adapter", "Connector adapter"],
    "Orphaned object": ["Unlinked object", "Disconnected object"]
}

# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.",
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]

# Function to generate modified sentences with replacements
def generate_replacements(sentence, term, replacements):
    modified_sentences = []
    for replacement in replacements:
        modified_sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
        modified_sentences.append((replacement, modified_sentence))
    return modified_sentences

# Compute similarity scores for each replacement
results = []
for sentence in original_sentences:
    for term, replacements in harmful_terms.items():
        if re.search(rf'\b{re.escape(term)}\b', sentence, flags=re.IGNORECASE):
            modified_versions = generate_replacements(sentence, term, replacements)
            for replacement, modified_sentence in modified_versions:
                score = util.pytorch_cos_sim(model.encode(sentence, convert_to_tensor=True), 
                                             model.encode(modified_sentence, convert_to_tensor=True)).item()
                results.append({
                    "Original Sentence": sentence,
                    "Harmful Term": term,
                    "Replacement": replacement,
                    "Modified Sentence": modified_sentence,
                    "Similarity Score": score
                })

# Convert results to a DataFrame and display
df = pd.DataFrame(results)

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Harmful Term,Replacement,Modified Sentence,Similarity Score
14,The company detected a black hat attempting to...,Black hat,Unethical hacker,The company detected a Unethical hacker attemp...,0.584426
13,The company detected a black hat attempting to...,Black hat,Malicious hacker,The company detected a Malicious hacker attemp...,0.601477
28,The female connector allows for easy integrati...,Female connector,Receptacle connector,The Receptacle connector allows for easy integ...,0.627351
27,The female connector allows for easy integrati...,Female connector,Socket connector,The Socket connector allows for easy integrati...,0.679458
132,The adapter functions as a gender bender for c...,Gender bender,Connector adapter,The adapter functions as a Connector adapter f...,0.703491
108,The foreman supervised the construction site w...,Foreman,Lead worker,The Lead worker supervised the construction si...,0.712054
11,The white box testing method allows developers...,White box,Transparent system,The Transparent system testing method allows d...,0.722767
131,The adapter functions as a gender bender for c...,Gender bender,Adapter,The adapter functions as a Adapter for connect...,0.72736
127,The server operates within the DMZ for added s...,DMZ,Perimeter network,The server operates within the Perimeter netwo...,0.728124
66,A blackout period was enforced during the syst...,Blackout period,Downtime,A Downtime was enforced during the system upgr...,0.728261


In [7]:
df[["Harmful Term", "Replacement", "Similarity Score"]].to_csv("C:/Users/hana_/Downloads/GPT4-2_replacements.csv", index=False)

In [8]:
# PROMPT 3:

from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)
harmful_terms = {
    "Abort": ["Halt", "Stop", "Cancel"],
    "Terminate": ["End", "Stop", "Complete"],
    "Average user": ["Typical user", "General user"],
    "Black box": ["Opaque system", "Unobservable process"],
    "White box": ["Transparent system", "Open process"],
    "Black hat": ["Malicious hacker", "Unauthorized attacker"],
    "White hat": ["Ethical hacker", "Security researcher"],
    "Blacklist": ["Blocklist", "Denylist"],
    "Whitelist": ["Allowlist", "Approved list"],
    "Blind": ["Anonymous", "Unbiased"],
    "Double blind": ["Fully anonymous", "Unbiased testing"],
    "Male connector": ["Plug", "Insert connector"],
    "Female connector": ["Socket", "Receiving connector"],
    "She": ["They", "Use specific names"],
    "Her": ["Their", "Use specific names"],
    "Hers": ["Theirs", "Use specific names"],
    "He": ["They", "Use specific names"],
    "Him": ["Them", "Use specific names"],
    "His": ["Their", "Use specific names"],
    "Master": ["Primary", "Main"],
    "Slave": ["Secondary", "Replica", "Worker"],
    "Quantum supremacy": ["Quantum advantage", "Quantum breakthrough"],
    "Grandfathered": ["Legacy status", "Exempted"],
    "Guys": ["Everyone", "Team", "Folks"],
    "Man hours": ["Person hours", "Work hours"],
    "Sanity check": ["Quick check", "Validation check"],
    "Sanity test": ["Preliminary test", "Basic functionality test"],
    "Dummy value": ["Placeholder value", "Test value"],
    "Scrum master": ["Scrum lead", "Scrum facilitator"],
    "Mob programming": ["Ensemble programming", "Group programming"],
    "Segregation": ["Separation", "Division"],
    "Blackout period": ["Restricted period", "Downtime"],
    "Gray hat": ["Independent security researcher"],
    "Native": ["Built-in", "Default"],
    "Red team": ["Offensive security team", "Attack simulation team"],
    "Web master": ["Website administrator", "Web manager"],
    "White space": ["Empty space", "Spacing"],
    "White team": ["Neutral team", "Oversight team"],
    "Yellow team": ["Security optimization team"],
    "Aboriginal": ["Indigenous"],
    "Brown bags": ["Lunch-and-learn sessions"],
    "Brown bag": ["Lunch-and-learn session"],
    "First-class citizen": ["Core feature", "Primary element"],
    "First-class citizens": ["Core features", "Primary elements"],
    "Man-in-the-middle": ["Interception attack", "Relay attack"],
    "Master branch": ["Main branch", "Primary branch"],
    "Minority": ["Underrepresented group", "Marginalized group"],
    "Normal": ["Expected", "Standard"],
    "Handicapped": ["Accessible", "Person with a disability"],
    "Crazy": ["Unconventional", "Unexpected"],
    "OCD": ["Highly organized", "Detail-oriented"],
    "Culture fit": ["Culture add", "Value alignment"],
    "Chairman": ["Chair", "Chairperson"],
    "Foreman": ["Supervisor", "Team lead"],
    "Man": ["Humanity", "People"],
    "Mankind": ["Humankind", "Humanity"],
    "Mans": ["Staffs", "Operates"],
    "Salesman": ["Salesperson", "Sales representative"],
    "Manmade": ["Artificial", "Human-made"],
    "Manpower": ["Workforce", "Personnel"],
    "Demilitarized zone": ["Perimeter network", "Buffer zone"],
    "Demilitarized zones": ["Perimeter networks", "Buffer zones"],
    "DMZ": ["Perimeter network", "Isolated network"],
    "Hang": ["Freeze", "Become unresponsive"],
    "Daughter board": ["Auxiliary board", "Expansion board"],
    "Gender bender": ["Adapter", "Converter"],
    "Orphaned object": ["Unlinked object", "Unassociated object"]
}

# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.",
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]

# Function to generate modified sentences with replacements
def generate_replacements(sentence, term, replacements):
    modified_sentences = []
    for replacement in replacements:
        modified_sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
        modified_sentences.append((replacement, modified_sentence))
    return modified_sentences

# Compute similarity scores for each replacement
results = []
for sentence in original_sentences:
    for term, replacements in harmful_terms.items():
        if re.search(rf'\b{re.escape(term)}\b', sentence, flags=re.IGNORECASE):
            modified_versions = generate_replacements(sentence, term, replacements)
            for replacement, modified_sentence in modified_versions:
                score = util.pytorch_cos_sim(model.encode(sentence, convert_to_tensor=True), 
                                             model.encode(modified_sentence, convert_to_tensor=True)).item()
                results.append({
                    "Original Sentence": sentence,
                    "Harmful Term": term,
                    "Replacement": replacement,
                    "Modified Sentence": modified_sentence,
                    "Similarity Score": score
                })

# Convert results to a DataFrame and display
df = pd.DataFrame(results)

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Harmful Term,Replacement,Modified Sentence,Similarity Score
26,The female connector allows for easy integrati...,Female connector,Socket,The Socket allows for easy integration with ot...,0.573467
35,He led the presentation with confidence and cl...,He,Use specific names,Use specific names led the presentation with c...,0.573573
12,The company detected a black hat attempting to...,Black hat,Malicious hacker,The company detected a Malicious hacker attemp...,0.601477
13,The company detected a black hat attempting to...,Black hat,Unauthorized attacker,The company detected a Unauthorized attacker a...,0.601788
37,The team assigned the most critical task to him.,Him,Use specific names,The team assigned the most critical task to Us...,0.640173
88,Functions are treated as first-class citizens ...,First-class citizens,Core features,Functions are treated as Core features in many...,0.641356
39,His programming skills greatly improved the pr...,His,Use specific names,Use specific names programming skills greatly ...,0.659806
31,Her contribution to the discussion was insight...,Her,Use specific names,Use specific names contribution to the discuss...,0.670337
29,She was responsible for coordinating the team’...,She,Use specific names,Use specific names was responsible for coordin...,0.670666
89,Functions are treated as first-class citizens ...,First-class citizens,Primary elements,Functions are treated as Primary elements in m...,0.675208


In [9]:
df[["Harmful Term", "Replacement", "Similarity Score"]].to_csv("C:/Users/hana_/Downloads/GPT4-3_replacements.csv", index=False)