In [1]:
pip install sentence-transformers pandas

Note: you may need to restart the kernel to use updated packages.


In [3]:
# PROMPT 1:
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)
harmful_terms = {
    "Abort": ["Cancel", "Terminate", "Halt", "Stop"],
    "Terminate": ["End", "Stop", "Conclude", "Finish"],
    "Average user": ["Typical user", "Standard user", "Common user"],
    "Black box": ["Opaque system", "Undocumented system", "System with limited visibility"],
    "White box": ["Transparent system", "System with visible internals"],
    "Black hat": ["Malicious actor", "Unethical hacker", "Cybercriminal"],
    "White hat": ["Ethical hacker", "Security researcher", "Security specialist"],
    "Blacklist": ["Denylist", "Blocklist", "Exclusion list"],
    "Whitelist": ["Allowlist", "Safelist", "Inclusion list"],
    "Blind": ["Anonymous review", "Masked review"],
    "Double blind": ["Double-anonymous review", "Double-masked review"],
    "Male connector": ["Plug connector", "Pin connector"],
    "Female connector": ["Socket connector", "Port connector"],
    "She": ["They", "The person", "The developer", "The engineer"],
    "Her": ["Their", "The person's", "The developer's"],
    "Hers": ["Theirs", "The person's", "The developer's"],
    "He": ["They", "The person", "The developer", "The engineer"],
    "Him": ["Them", "The person", "The developer"],
    "His": ["Their", "The person's", "The developer's"],
    "Master": ["Primary", "Main", "Principal", "Leading"],
    "Slave": ["Secondary", "Replica", "Follower", "Dependent"],
    "Quantum supremacy": ["Quantum advantage", "Quantum superiority"],
    "Grandfathered": ["Legacy", "Previously approved", "Exempt"],
    "Guys": ["Everyone", "Folks", "Team", "People", "You all"],
    "Man hours": ["Person-hours", "Staff-hours", "Work-hours"],
    "Sanity check": ["Quick check", "Basic check", "Preliminary check", "Confidence check"],
    "Sanity test": ["Quick test", "Basic test", "Preliminary test", "Confidence test"],
    "Dummy value": ["Placeholder value", "Sample value", "Test value"],
    "Scrum master": ["Scrum facilitator", "Team lead"],
    "Mob programming": ["Ensemble programming", "Collaborative programming"],
    "Segregation": ["Separation", "Isolation", "Division"],
    "Blackout period": ["Maintenance window", "Downtime", "Service interruption"],
    "Gray hat": ["Ambiguous hacker", "Security researcher with mixed methods"],
    "Native": ["Built-in", "Integrated", "Core"],
    "Red team": ["Attack simulation team", "Security testing team"],
    "Web master": ["Web developer", "Web administrator", "Web maintainer"],
    "White space": ["Empty space", "Padding", "Margin"],
    "White team": ["Oversight team", "Moderation team"],
    "Yellow team": ["Security optimization team", "Improvement team"],
    "Aboriginal": ["Indigenous", "Native", "Original"],
    "Brown bags": ["Informal learning sessions", "Lunch and learns"],
    "First-class citizen": ["First-class element", "Primary element"],
    "Man-in-the-middle": ["Interception attack", "Relay attack"],
    "Master branch": ["Main branch", "Default branch"],
    "Minority": ["Underrepresented group", "Smaller group"],
    "Normal": ["Usual", "Standard", "Typical", "Expected"],
    "Handicapped": ["Person with a disability", "Accessible", "Person with access needs"],
    "Crazy": ["Unconventional", "Innovative", "Bold", "Unique"],
    "OCD": ["Detail-oriented", "Highly organized", "Meticulous"],
    "Culture fit": ["Team compatibility", "Alignment with team values", "Shared values"],
    "Chairman": ["Chairperson", "Chair", "Moderator"],
    "Foreman": ["Supervisor", "Lead", "Site manager"],
    "Man": ["Humanity", "People", "Humankind", "Individuals"],
    "Mankind": ["Humanity", "People", "Humankind"],
    "Mans": ["Staffs", "Operates", "Works"],
    "Salesman": ["Salesperson", "Sales representative", "Sales associate"],
    "Manmade": ["Synthetic", "Artificial", "Engineered"],
    "Manpower": ["Workforce", "Personnel", "Staffing"],
    "Demilitarized zone": ["Network perimeter", "Protected network area"],
    "DMZ": ["Network perimeter", "Protected network area"],
    "Hang": ["Freeze", "Stall", "Become unresponsive"],
    "Daughter board": ["Expansion board", "Add-on board", "Secondary board"],
    "Gender bender": ["Adapter", "Converter", "Connector adapter"],
    "Orphaned object": ["Unreferenced object", "Disconnected object"]
}

# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.",
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]

# Function to generate modified sentences with replacements
def generate_replacements(sentence, term, replacements):
    modified_sentences = []
    for replacement in replacements:
        modified_sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
        modified_sentences.append((replacement, modified_sentence))
    return modified_sentences

# Compute similarity scores for each replacement
results = []
for sentence in original_sentences:
    for term, replacements in harmful_terms.items():
        if re.search(rf'\b{re.escape(term)}\b', sentence, flags=re.IGNORECASE):
            modified_versions = generate_replacements(sentence, term, replacements)
            for replacement, modified_sentence in modified_versions:
                score = util.pytorch_cos_sim(model.encode(sentence, convert_to_tensor=True), 
                                             model.encode(modified_sentence, convert_to_tensor=True)).item()
                results.append({
                    "Original Sentence": sentence,
                    "Harmful Term": term,
                    "Replacement": replacement,
                    "Modified Sentence": modified_sentence,
                    "Similarity Score": score
                })

# Convert results to a DataFrame and display
df = pd.DataFrame(results)

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Harmful Term,Replacement,Modified Sentence,Similarity Score
17,The company detected a black hat attempting to...,Black hat,Unethical hacker,The company detected a Unethical hacker attemp...,0.584426
16,The company detected a black hat attempting to...,Black hat,Malicious actor,The company detected a Malicious actor attempt...,0.610877
18,The company detected a black hat attempting to...,Black hat,Cybercriminal,The company detected a Cybercriminal attemptin...,0.645379
103,A blackout period was enforced during the syst...,Blackout period,Maintenance window,A Maintenance window was enforced during the s...,0.658593
34,The female connector allows for easy integrati...,Female connector,Socket connector,The Socket connector allows for easy integrati...,0.679458
97,The scrum master facilitated the daily stand-u...,Scrum master,Team lead,The Team lead facilitated the daily stand-up m...,0.700074
15,The white box testing method allows developers...,White box,System with visible internals,The System with visible internals testing meth...,0.700742
161,The company prioritizes culture fit when hirin...,Culture fit,Team compatibility,The company prioritizes Team compatibility whe...,0.702953
201,The adapter functions as a gender bender for c...,Gender bender,Connector adapter,The adapter functions as a Connector adapter f...,0.703491
107,The gray hat hacker reported the vulnerabiliti...,Gray hat,Security researcher with mixed methods,The Security researcher with mixed methods hac...,0.708395


In [4]:
df[["Harmful Term", "Replacement", "Similarity Score"]].to_csv("C:/Users/hana_/Downloads/GeminiFlash2-1_replacements.csv", index=False)

In [6]:
# PROMPT 1:
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)
harmful_terms = {
    "Abort": ["Cancel", "Terminate", "Halt", "Stop"],
    "Terminate": ["End", "Stop", "Conclude", "Finish"],
    "Average user": ["Typical user", "Standard user", "Common user"],
    "Black box": ["Opaque system", "Undocumented system", "System with limited visibility"],
    "White box": ["Transparent system", "System with visible internals"],
    "Black hat": ["Malicious actor", "Unethical hacker", "Cybercriminal"],
    "White hat": ["Ethical hacker", "Security researcher", "Security specialist"],
    "Blacklist": ["Denylist", "Blocklist", "Exclusion list"],
    "Whitelist": ["Allowlist", "Safelist", "Inclusion list"],
    "Blind": ["Anonymous review", "Masked review"],
    "Double blind": ["Double-anonymous review", "Double-masked review"],
    "Male connector": ["Plug connector", "Pin connector"],
    "Female connector": ["Socket connector", "Port connector"],
    "She": ["They", "The person", "The developer", "The engineer"],
    "Her": ["Their", "The person's", "The developer's"],
    "Hers": ["Theirs", "The person's", "The developer's"],
    "He": ["They", "The person", "The developer", "The engineer"],
    "Him": ["Them", "The person", "The developer"],
    "His": ["Their", "The person's", "The developer's"],
    "Master": ["Primary", "Main", "Principal", "Leading"],
    "Slave": ["Secondary", "Replica", "Follower", "Dependent"],
    "Quantum supremacy": ["Quantum advantage", "Quantum superiority"],
    "Grandfathered": ["Legacy", "Previously approved", "Exempt"],
    "Guys": ["Everyone", "Folks", "Team", "People", "You all"],
    "Man hours": ["Person-hours", "Staff-hours", "Work-hours"],
    "Sanity check": ["Quick check", "Basic check", "Preliminary check", "Confidence check"],
    "Sanity test": ["Quick test", "Basic test", "Preliminary test", "Confidence test"],
    "Dummy value": ["Placeholder value", "Sample value", "Test value"],
    "Scrum master": ["Scrum facilitator", "Team lead"],
    "Mob programming": ["Ensemble programming", "Collaborative programming"],
    "Segregation": ["Separation", "Isolation", "Division"],
    "Blackout period": ["Maintenance window", "Downtime", "Service interruption"],
    "Gray hat": ["Ambiguous hacker", "Security researcher with mixed methods"],
    "Native": ["Built-in", "Integrated", "Core"],
    "Red team": ["Attack simulation team", "Security testing team"],
    "Web master": ["Web developer", "Web administrator", "Web maintainer"],
    "White space": ["Empty space", "Padding", "Margin"],
    "White team": ["Oversight team", "Moderation team"],
    "Yellow team": ["Security optimization team", "Improvement team"],
    "Aboriginal": ["Indigenous", "Native", "Original"],
    "Brown bags": ["Informal learning sessions", "Lunch and learns"],
    "First-class citizen": ["First-class element", "Primary element"],
    "Man-in-the-middle": ["Interception attack", "Relay attack"],
    "Master branch": ["Main branch", "Default branch"],
    "Minority": ["Underrepresented group", "Smaller group"],
    "Normal": ["Usual", "Standard", "Typical", "Expected"],
    "Handicapped": ["Person with a disability", "Accessible", "Person with access needs"],
    "Crazy": ["Unconventional", "Innovative", "Bold", "Unique"],
    "OCD": ["Detail-oriented", "Highly organized", "Meticulous"],
    "Culture fit": ["Team compatibility", "Alignment with team values", "Shared values"],
    "Chairman": ["Chairperson", "Chair", "Moderator"],
    "Foreman": ["Supervisor", "Lead", "Site manager"],
    "Man": ["Humanity", "People", "Humankind", "Individuals"],
    "Mankind": ["Humanity", "People", "Humankind"],
    "Mans": ["Staffs", "Operates", "Works"],
    "Salesman": ["Salesperson", "Sales representative", "Sales associate"],
    "Manmade": ["Synthetic", "Artificial", "Engineered"],
    "Manpower": ["Workforce", "Personnel", "Staffing"],
    "Demilitarized zone": ["Network perimeter", "Protected network area"],
    "DMZ": ["Network perimeter", "Protected network area"],
    "Hang": ["Freeze", "Stall", "Become unresponsive"],
    "Daughter board": ["Expansion board", "Add-on board", "Secondary board"],
    "Gender bender": ["Adapter", "Converter", "Connector adapter"],
    "Orphaned object": ["Unreferenced object", "Disconnected object"]
}


# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.",
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]

# Function to generate modified sentences with replacements
def generate_replacements(sentence, term, replacements):
    modified_sentences = []
    for replacement in replacements:
        modified_sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
        modified_sentences.append((replacement, modified_sentence))
    return modified_sentences

# Compute similarity scores for each replacement
results = []
for sentence in original_sentences:
    for term, replacements in harmful_terms.items():
        if re.search(rf'\b{re.escape(term)}\b', sentence, flags=re.IGNORECASE):
            modified_versions = generate_replacements(sentence, term, replacements)
            for replacement, modified_sentence in modified_versions:
                score = util.pytorch_cos_sim(model.encode(sentence, convert_to_tensor=True), 
                                             model.encode(modified_sentence, convert_to_tensor=True)).item()
                results.append({
                    "Original Sentence": sentence,
                    "Harmful Term": term,
                    "Replacement": replacement,
                    "Modified Sentence": modified_sentence,
                    "Similarity Score": score
                })

# Convert results to a DataFrame and display
df = pd.DataFrame(results)

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Harmful Term,Replacement,Modified Sentence,Similarity Score
17,The company detected a black hat attempting to...,Black hat,Unethical hacker,The company detected a Unethical hacker attemp...,0.584426
16,The company detected a black hat attempting to...,Black hat,Malicious actor,The company detected a Malicious actor attempt...,0.610877
18,The company detected a black hat attempting to...,Black hat,Cybercriminal,The company detected a Cybercriminal attemptin...,0.645379
103,A blackout period was enforced during the syst...,Blackout period,Maintenance window,A Maintenance window was enforced during the s...,0.658593
34,The female connector allows for easy integrati...,Female connector,Socket connector,The Socket connector allows for easy integrati...,0.679458
97,The scrum master facilitated the daily stand-u...,Scrum master,Team lead,The Team lead facilitated the daily stand-up m...,0.700074
15,The white box testing method allows developers...,White box,System with visible internals,The System with visible internals testing meth...,0.700742
161,The company prioritizes culture fit when hirin...,Culture fit,Team compatibility,The company prioritizes Team compatibility whe...,0.702953
201,The adapter functions as a gender bender for c...,Gender bender,Connector adapter,The adapter functions as a Connector adapter f...,0.703491
107,The gray hat hacker reported the vulnerabiliti...,Gray hat,Security researcher with mixed methods,The Security researcher with mixed methods hac...,0.708395


In [7]:
df[["Harmful Term", "Replacement", "Similarity Score"]].to_csv("C:/Users/hana_/Downloads/GeminiFlash2-2_replacements.csv", index=False)

In [9]:
# PROMPT 3:
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)
harmful_terms = {
    "Abort": ["Cancel", "Terminate", "Halt", "Stop"],
    "Terminate": ["End", "Stop", "Conclude", "Finish"],
    "Average user": ["Typical user", "Standard user", "Common user"],
    "Black box": ["Opaque system", "Undocumented system", "System with limited visibility"],
    "White box": ["Transparent system", "System with visible internals"],
    "Black hat": ["Malicious actor", "Unethical hacker", "Cybercriminal"],
    "White hat": ["Ethical hacker", "Security researcher", "Security specialist"],
    "Blacklist": ["Denylist", "Blocklist", "Exclusion list"],
    "Whitelist": ["Allowlist", "Safelist", "Inclusion list"],
    "Blind": ["Anonymous review", "Masked review"],
    "Double blind": ["Double-anonymous review", "Double-masked review"],
    "Male connector": ["Plug connector", "Pin connector"],
    "Female connector": ["Socket connector", "Port connector"],
    "She": ["They", "The person", "The developer", "The engineer"],
    "Her": ["Their", "The person's", "The developer's"],
    "Hers": ["Theirs", "The person's", "The developer's"],
    "He": ["They", "The person", "The developer", "The engineer"],
    "Him": ["Them", "The person", "The developer"],
    "His": ["Their", "The person's", "The developer's"],
    "Master": ["Primary", "Main", "Principal", "Leading"],
    "Slave": ["Secondary", "Replica", "Follower", "Dependent"],
    "Quantum supremacy": ["Quantum advantage", "Quantum superiority"],
    "Grandfathered": ["Legacy", "Previously approved", "Exempt"],
    "Guys": ["Everyone", "Folks", "Team", "People", "You all"],
    "Man hours": ["Person-hours", "Staff-hours", "Work-hours"],
    "Sanity check": ["Quick check", "Basic check", "Preliminary check", "Confidence check"],
    "Sanity test": ["Quick test", "Basic test", "Preliminary test", "Confidence test"],
    "Dummy value": ["Placeholder value", "Sample value", "Test value"],
    "Scrum master": ["Scrum facilitator", "Team lead"],
    "Mob programming": ["Ensemble programming", "Collaborative programming"],
    "Segregation": ["Separation", "Isolation", "Division"],
    "Blackout period": ["Maintenance window", "Downtime", "Service interruption"],
    "Gray hat": ["Ambiguous hacker", "Security researcher with mixed methods"],
    "Native": ["Built-in", "Integrated", "Core"],
    "Red team": ["Attack simulation team", "Security testing team"],
    "Web master": ["Web developer", "Web administrator", "Web maintainer"],
    "White space": ["Empty space", "Padding", "Margin"],
    "White team": ["Oversight team", "Moderation team"],
    "Yellow team": ["Security optimization team", "Improvement team"],
    "Aboriginal": ["Indigenous", "Native", "Original"],
    "Brown bags": ["Informal learning sessions", "Lunch and learns"],
    "First-class citizen": ["First-class element", "Primary element"],
    "Man-in-the-middle": ["Interception attack", "Relay attack"],
    "Master branch": ["Main branch", "Default branch"],
    "Minority": ["Underrepresented group", "Smaller group"],
    "Normal": ["Usual", "Standard", "Typical", "Expected"],
    "Handicapped": ["Person with a disability", "Accessible", "Person with access needs"],
    "Crazy": ["Unconventional", "Innovative", "Bold", "Unique"],
    "OCD": ["Detail-oriented", "Highly organized", "Meticulous"],
    "Culture fit": ["Team compatibility", "Alignment with team values", "Shared values"],
    "Chairman": ["Chairperson", "Chair", "Moderator"],
    "Foreman": ["Supervisor", "Lead", "Site manager"],
    "Man": ["Humanity", "People", "Humankind", "Individuals"],
    "Mankind": ["Humanity", "People", "Humankind"],
    "Mans": ["Staffs", "Operates", "Works"],
    "Salesman": ["Salesperson", "Sales representative", "Sales associate"],
    "Manmade": ["Synthetic", "Artificial", "Engineered"],
    "Manpower": ["Workforce", "Personnel", "Staffing"],
    "Demilitarized zone": ["Network perimeter", "Protected network area"],
    "DMZ": ["Network perimeter", "Protected network area"],
    "Hang": ["Freeze", "Stall", "Become unresponsive"],
    "Daughter board": ["Expansion board", "Add-on board", "Secondary board"],
    "Gender bender": ["Adapter", "Converter", "Connector adapter"],
    "Orphaned object": ["Unreferenced object", "Disconnected object"]
}

# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.",
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]

# Function to generate modified sentences with replacements
def generate_replacements(sentence, term, replacements):
    modified_sentences = []
    for replacement in replacements:
        modified_sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
        modified_sentences.append((replacement, modified_sentence))
    return modified_sentences

# Compute similarity scores for each replacement
results = []
for sentence in original_sentences:
    for term, replacements in harmful_terms.items():
        if re.search(rf'\b{re.escape(term)}\b', sentence, flags=re.IGNORECASE):
            modified_versions = generate_replacements(sentence, term, replacements)
            for replacement, modified_sentence in modified_versions:
                score = util.pytorch_cos_sim(model.encode(sentence, convert_to_tensor=True), 
                                             model.encode(modified_sentence, convert_to_tensor=True)).item()
                results.append({
                    "Original Sentence": sentence,
                    "Harmful Term": term,
                    "Replacement": replacement,
                    "Modified Sentence": modified_sentence,
                    "Similarity Score": score
                })

# Convert results to a DataFrame and display
df = pd.DataFrame(results)

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Harmful Term,Replacement,Modified Sentence,Similarity Score
17,The company detected a black hat attempting to...,Black hat,Unethical hacker,The company detected a Unethical hacker attemp...,0.584426
16,The company detected a black hat attempting to...,Black hat,Malicious actor,The company detected a Malicious actor attempt...,0.610877
18,The company detected a black hat attempting to...,Black hat,Cybercriminal,The company detected a Cybercriminal attemptin...,0.645379
103,A blackout period was enforced during the syst...,Blackout period,Maintenance window,A Maintenance window was enforced during the s...,0.658593
34,The female connector allows for easy integrati...,Female connector,Socket connector,The Socket connector allows for easy integrati...,0.679458
97,The scrum master facilitated the daily stand-u...,Scrum master,Team lead,The Team lead facilitated the daily stand-up m...,0.700074
15,The white box testing method allows developers...,White box,System with visible internals,The System with visible internals testing meth...,0.700742
161,The company prioritizes culture fit when hirin...,Culture fit,Team compatibility,The company prioritizes Team compatibility whe...,0.702953
201,The adapter functions as a gender bender for c...,Gender bender,Connector adapter,The adapter functions as a Connector adapter f...,0.703491
107,The gray hat hacker reported the vulnerabiliti...,Gray hat,Security researcher with mixed methods,The Security researcher with mixed methods hac...,0.708395


In [10]:
df[["Harmful Term", "Replacement", "Similarity Score"]].to_csv("C:/Users/hana_/Downloads/GeminiFlash2-3_replacements.csv", index=False)