In [3]:
# PROMPT 1:
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)
harmful_terms = {
    "Abort": ["Cancel", "Halt", "Stop", "Interrupt"],
    "Terminate": ["End", "Close", "Stop", "Finish"],
    "Average user": ["Typical user", "Standard user", "General user", "Intended user"],
    "Black box": ["Opaque box", "Non-transparent box", "Undocumented process"],
    "White box": ["Transparent box", "Glass box", "Open box"],
    "Black hat": ["Malicious actor", "Unethical hacker", "Attacker"],
    "White hat": ["Ethical hacker", "Security researcher"],
    "Blacklist": ["Denylist", "Blocklist", "Exclusion list"],
    "Whitelist": ["Allowlist", "Passlist", "Inclusion list"],
    "Blind": ["Blinded review", "Anonymized review"],
    "Double blind": ["Double-blinded review", "Fully anonymized review", "Independent review"],
    "Male connector": ["Plug connector", "Pin connector"],
    "Female connector": ["Receptacle connector", "Socket connector"],
    "She": ["They"],
    "Her": ["Them"],
    "Hers": ["Theirs"],
    "He": ["They"],
    "Him": ["Them"],
    "His": ["Theirs"],
    "Master": ["Primary", "Main", "Leader", "Controller"],
    "Slave": ["Secondary", "Replica", "Follower", "Agent"],
    "Quantum supremacy": ["Quantum advantage", "Quantum preeminence"],
    "Grandfathered": ["Legacy status", "Retained status", "Exempt under previous policy", "Previously approved"],
    "Guys": ["Everyone", "Folks", "Team", "People"],
    "Man hours": ["Person-hours", "Work hours", "Labor hours", "Staff hours"],
    "Sanity check": ["Quick check", "Basic check", "Initial check", "Confidence test", "Smoke test"],
    "Sanity test": ["Quick test", "Basic test", "Initial test", "Confidence test", "Smoke test"],
    "Dummy value": ["Placeholder value", "Sample value", "Example value", "Test value"],
    "Scrum master": ["Scrum facilitator", "Project facilitator"],
    "Mob programming": ["Ensemble programming", "Collaborative programming"],
    "Segregation": ["Separation", "Isolation", "Compartmentalization", "Distinction"],
    "Blackout period": ["Maintenance window", "Downtime", "Outage period"],
    "Gray hat": ["Ambiguous actor", "Actor with unclear motives", "Semi-authorized actor"],
    "Native": ["Built-in", "Inherent", "Default", "Platform-specific"],
    "Red team": ["Security testing team", "Offensive security team", "Attack simulation team"],
    "Web master": ["Web administrator", "Web lead", "Website maintainer"],
    "White space": ["Spacing", "Negative space", "Padding"],
    "White team": ["Oversight team", "Moderation team", "Control team"],
    "Yellow team": ["Security optimization team", "Performance team", "Team [Specific Function]"],
    "Aboriginal": ["Indigenous", "First Peoples", "Original"],
    "Brown bags": ["Informal learning session", "Lunch and learn session", "Knowledge sharing session"],
    "First-class citizen": ["Primary citizen", "Essential citizen", "Core component", "Fundamental element"],
    "Man-in-the-middle": ["Intermediary attack", "Eavesdropping attack", "Relay attack"],
    "Master branch": ["Main branch", "Default branch", "Primary branch"],
    "Minority": ["Underrepresented group", "Marginalized group", "Specific demographic group"],
    "Normal": ["Typical", "Expected", "Standard", "Regular"],
    "Handicapped": ["Accessible", "People with disabilities", "Persons with disabilities", "Users with disabilities"],
    "Crazy": ["Unconventional", "Unexpected", "Surprising", "Innovative", "Remarkable"],
    "OCD": ["Meticulous", "Organized", "Detail-oriented", "Orderly", "Neat"],
    "Culture fit": ["Team contribution", "Values alignment", "Collaborative spirit", "Shared values"],
    "Chairman": ["Chairperson", "Chair", "Presiding officer", "Coordinator"],
    "Foreman": ["Supervisor", "Team lead", "Site supervisor"],
    "Man": ["Humankind", "People", "Humanity"],
    "Mankind": ["Humankind", "Humanity", "People"],
    "Mans": ["Crews", "Staffs", "Operates"],
    "Salesman": ["Salesperson", "Sales representative"],
    "Manmade": ["Synthetic", "Engineered", "Artificial", "Constructed"],
    "Manpower": ["Staffing", "Workforce", "Personnel", "Team members"],
    "Demilitarized zone": ["Perimeter network", "Screened subnet", "Isolated network"],
    "DMZ": ["Perimeter network", "Screened subnet", "Isolated network"],
    "Hang": ["Freeze", "Become unresponsive", "Stall", "Become blocked"],
    "Daughter board": ["Expansion board", "Add-on board", "Secondary board"],
    "Gender bender": ["Gender changer", "Gender adapter", "Connector adapter"],
    "Orphaned object": ["Unreferenced object", "Unlinked object", "Detached object"]
}

# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.",
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]

# Function to generate modified sentences with replacements
def generate_replacements(sentence, term, replacements):
    modified_sentences = []
    for replacement in replacements:
        modified_sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
        modified_sentences.append((replacement, modified_sentence))
    return modified_sentences

# Compute similarity scores for each replacement
results = []
for sentence in original_sentences:
    for term, replacements in harmful_terms.items():
        if re.search(rf'\b{re.escape(term)}\b', sentence, flags=re.IGNORECASE):
            modified_versions = generate_replacements(sentence, term, replacements)
            for replacement, modified_sentence in modified_versions:
                score = util.pytorch_cos_sim(model.encode(sentence, convert_to_tensor=True), 
                                             model.encode(modified_sentence, convert_to_tensor=True)).item()
                results.append({
                    "Original Sentence": sentence,
                    "Harmful Term": term,
                    "Replacement": replacement,
                    "Modified Sentence": modified_sentence,
                    "Similarity Score": score
                })

# Convert results to a DataFrame and display
df = pd.DataFrame(results)

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Harmful Term,Replacement,Modified Sentence,Similarity Score
19,The company detected a black hat attempting to...,Black hat,Unethical hacker,The company detected a Unethical hacker attemp...,0.584426
161,The company prioritizes culture fit when hirin...,Culture fit,Team contribution,The company prioritizes Team contribution when...,0.606165
18,The company detected a black hat attempting to...,Black hat,Malicious actor,The company detected a Malicious actor attempt...,0.610877
163,The company prioritizes culture fit when hirin...,Culture fit,Collaborative spirit,The company prioritizes Collaborative spirit w...,0.626185
35,The female connector allows for easy integrati...,Female connector,Receptacle connector,The Receptacle connector allows for easy integ...,0.627351
20,The company detected a black hat attempting to...,Black hat,Attacker,The company detected a Attacker attempting to ...,0.638441
87,The scrum master facilitated the daily stand-u...,Scrum master,Project facilitator,The Project facilitator facilitated the daily ...,0.642606
94,A blackout period was enforced during the syst...,Blackout period,Maintenance window,A Maintenance window was enforced during the s...,0.658593
30,The paper underwent a blind review process to ...,Blind,Anonymized review,The paper underwent a Anonymized review review...,0.661169
36,The female connector allows for easy integrati...,Female connector,Socket connector,The Socket connector allows for easy integrati...,0.679458


In [4]:
df[["Harmful Term", "Replacement", "Similarity Score"]].to_csv("C:/Users/hana_/Downloads/GeminiFlash2_Reasoning-1_replacements.csv", index=False)

In [5]:
# PROMPT 1:
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)
harmful_terms = {
    "Abort": ["Cancel", "Halt", "Stop", "Interrupt"],
    "Terminate": ["End", "Close", "Stop", "Finish"],
    "Average user": ["Typical user", "Standard user", "General user", "Intended user"],
    "Black box": ["Opaque box", "Non-transparent box", "Undocumented process"],
    "White box": ["Transparent box", "Glass box", "Open box"],
    "Black hat": ["Malicious actor", "Unethical hacker", "Attacker"],
    "White hat": ["Ethical hacker", "Security researcher"],
    "Blacklist": ["Denylist", "Blocklist", "Exclusion list"],
    "Whitelist": ["Allowlist", "Passlist", "Inclusion list"],
    "Blind": ["Blinded review", "Anonymized review"],
    "Double blind": ["Double-blinded review", "Fully anonymized review", "Independent review"],
    "Male connector": ["Plug connector", "Pin connector"],
    "Female connector": ["Receptacle connector", "Socket connector"],
    "She": ["They"],
    "Her": ["They"],
    "Hers": ["Theirs"],
    "He": ["They"],
    "Him": ["Them"],
    "His": ["Theirs"],
    "Master": ["Primary", "Main", "Leader", "Controller"],
    "Slave": ["Secondary", "Replica", "Follower", "Agent"],
    "Quantum supremacy": ["Quantum advantage", "Quantum preeminence"],
    "Grandfathered": ["Legacy status", "Retained status", "Exempt under previous policy", "Previously approved"],
    "Guys": ["Everyone", "Folks", "Team", "People"],
    "Man hours": ["Person-hours", "Work hours", "Labor hours", "Staff hours"],
    "Sanity check": ["Quick check", "Basic check", "Initial check", "Confidence test", "Smoke test"],
    "Sanity test": ["Quick test", "Basic test", "Initial test", "Confidence test", "Smoke test"],
    "Dummy value": ["Placeholder value", "Sample value", "Example value", "Test value"],
    "Scrum master": ["Scrum facilitator", "Project facilitator"],
    "Mob programming": ["Ensemble programming", "Collaborative programming"],
    "Segregation": ["Separation", "Isolation", "Compartmentalization", "Distinction"],
    "Blackout period": ["Maintenance window", "Downtime", "Outage period"],
    "Gray hat": ["Ambiguous actor", "Actor with unclear motives", "Semi-authorized actor"],
    "Native": ["Built-in", "Inherent", "Default", "Platform-specific"],
    "Red team": ["Security testing team", "Offensive security team", "Attack simulation team"],
    "Web master": ["Web administrator", "Web lead", "Website maintainer"],
    "White space": ["Spacing", "Negative space", "Padding"],
    "White team": ["Oversight team", "Moderation team", "Control team"],
    "Yellow team": ["Security optimization team", "Performance team", "Team [Specific Function]"],
    "Aboriginal": ["Indigenous", "First Peoples", "Original"],
    "Brown bags": ["Informal learning session", "Lunch and learn session", "Knowledge sharing session"],
    "First-class citizen": ["Primary citizen", "Essential citizen", "Core component", "Fundamental element"],
    "Man-in-the-middle": ["Intermediary attack", "Eavesdropping attack", "Relay attack"],
    "Master branch": ["Main branch", "Default branch", "Primary branch"],
    "Minority": ["Underrepresented group", "Marginalized group", "Specific demographic group"],
    "Normal": ["Typical", "Expected", "Standard", "Regular"],
    "Handicapped": ["Accessible", "People with disabilities", "Persons with disabilities", "Users with disabilities"],
    "Crazy": ["Unconventional", "Unexpected", "Surprising", "Innovative", "Remarkable"],
    "OCD": ["Meticulous", "Organized", "Detail-oriented", "Orderly", "Neat"],
    "Culture fit": ["Team contribution", "Values alignment", "Collaborative spirit", "Shared values"],
    "Chairman": ["Chairperson", "Chair", "Presiding officer", "Coordinator"],
    "Foreman": ["Supervisor", "Team lead", "Site supervisor"],
    "Man": ["Humankind", "People", "Humanity"],
    "Mankind": ["Humankind", "Humanity", "People"],
    "Mans": ["Crews", "Staffs", "Operates"],
    "Salesman": ["Salesperson", "Sales representative"],
    "Manmade": ["Synthetic", "Engineered", "Artificial", "Constructed"],
    "Manpower": ["Staffing", "Workforce", "Personnel", "Team members"],
    "Demilitarized zone": ["Perimeter network", "Screened subnet", "Isolated network"],
    "DMZ": ["Perimeter network", "Screened subnet", "Isolated network"],
    "Hang": ["Freeze", "Become unresponsive", "Stall", "Become blocked"],
    "Daughter board": ["Expansion board", "Add-on board", "Secondary board"],
    "Gender bender": ["Gender changer", "Gender adapter", "Connector adapter"],
    "Orphaned object": ["Unreferenced object", "Unlinked object", "Detached object"]
}

# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.",
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]

# Function to generate modified sentences with replacements
def generate_replacements(sentence, term, replacements):
    modified_sentences = []
    for replacement in replacements:
        modified_sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
        modified_sentences.append((replacement, modified_sentence))
    return modified_sentences

# Compute similarity scores for each replacement
results = []
for sentence in original_sentences:
    for term, replacements in harmful_terms.items():
        if re.search(rf'\b{re.escape(term)}\b', sentence, flags=re.IGNORECASE):
            modified_versions = generate_replacements(sentence, term, replacements)
            for replacement, modified_sentence in modified_versions:
                score = util.pytorch_cos_sim(model.encode(sentence, convert_to_tensor=True), 
                                             model.encode(modified_sentence, convert_to_tensor=True)).item()
                results.append({
                    "Original Sentence": sentence,
                    "Harmful Term": term,
                    "Replacement": replacement,
                    "Modified Sentence": modified_sentence,
                    "Similarity Score": score
                })

# Convert results to a DataFrame and display
df = pd.DataFrame(results)

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Harmful Term,Replacement,Modified Sentence,Similarity Score
19,The company detected a black hat attempting to...,Black hat,Unethical hacker,The company detected a Unethical hacker attemp...,0.584426
161,The company prioritizes culture fit when hirin...,Culture fit,Team contribution,The company prioritizes Team contribution when...,0.606165
18,The company detected a black hat attempting to...,Black hat,Malicious actor,The company detected a Malicious actor attempt...,0.610877
163,The company prioritizes culture fit when hirin...,Culture fit,Collaborative spirit,The company prioritizes Collaborative spirit w...,0.626185
35,The female connector allows for easy integrati...,Female connector,Receptacle connector,The Receptacle connector allows for easy integ...,0.627351
20,The company detected a black hat attempting to...,Black hat,Attacker,The company detected a Attacker attempting to ...,0.638441
87,The scrum master facilitated the daily stand-u...,Scrum master,Project facilitator,The Project facilitator facilitated the daily ...,0.642606
94,A blackout period was enforced during the syst...,Blackout period,Maintenance window,A Maintenance window was enforced during the s...,0.658593
30,The paper underwent a blind review process to ...,Blind,Anonymized review,The paper underwent a Anonymized review review...,0.661169
36,The female connector allows for easy integrati...,Female connector,Socket connector,The Socket connector allows for easy integrati...,0.679458


In [6]:
df[["Harmful Term", "Replacement", "Similarity Score"]].to_csv("C:/Users/hana_/Downloads/GeminiFlash2_Reasoning-2_replacements.csv", index=False)

In [7]:
# PROMPT 1:
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

# Load the SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Dictionary of terms and their replacements (all 64 terms)
harmful_terms = {
    "Abort": ["Cancel", "Halt", "Stop", "Interrupt"],
    "Terminate": ["End", "Close", "Stop", "Finish"],
    "Average user": ["Typical user", "Standard user", "General user", "Intended user"],
    "Black box": ["Opaque box", "Non-transparent box", "Undocumented process"],
    "White box": ["Transparent box", "Glass box", "Open box"],
    "Black hat": ["Malicious actor", "Unethical hacker", "Attacker"],
    "White hat": ["Ethical hacker", "Security researcher"],
    "Blacklist": ["Denylist", "Blocklist", "Exclusion list"],
    "Whitelist": ["Allowlist", "Passlist", "Inclusion list"],
    "Blind": ["Blinded review", "Anonymized review"],
    "Double blind": ["Double-blinded review", "Fully anonymized review", "Independent review"],
    "Male connector": ["Plug connector", "Pin connector"],
    "Female connector": ["Receptacle connector", "Socket connector"],
    "She": ["They"],
    "Her": ["They"],
    "Hers": ["Theirs"],
    "He": ["They"],
    "Him": ["Them"],
    "His": ["Theirs"],
    "Master": ["Primary", "Main", "Leader", "Controller"],
    "Slave": ["Secondary", "Replica", "Follower", "Agent"],
    "Quantum supremacy": ["Quantum advantage", "Quantum preeminence"],
    "Grandfathered": ["Legacy status", "Retained status", "Exempt under previous policy", "Previously approved"],
    "Guys": ["Everyone", "Folks", "Team", "People"],
    "Man hours": ["Person-hours", "Work hours", "Labor hours", "Staff hours"],
    "Sanity check": ["Quick check", "Basic check", "Initial check", "Confidence test", "Smoke test"],
    "Sanity test": ["Quick test", "Basic test", "Initial test", "Confidence test", "Smoke test"],
    "Dummy value": ["Placeholder value", "Sample value", "Example value", "Test value"],
    "Scrum master": ["Scrum facilitator", "Project facilitator"],
    "Mob programming": ["Ensemble programming", "Collaborative programming"],
    "Segregation": ["Separation", "Isolation", "Compartmentalization", "Distinction"],
    "Blackout period": ["Maintenance window", "Downtime", "Outage period"],
    "Gray hat": ["Ambiguous actor", "Actor with unclear motives", "Semi-authorized actor"],
    "Native": ["Built-in", "Inherent", "Default", "Platform-specific"],
    "Red team": ["Security testing team", "Offensive security team", "Attack simulation team"],
    "Web master": ["Web administrator", "Web lead", "Website maintainer"],
    "White space": ["Spacing", "Negative space", "Padding"],
    "White team": ["Oversight team", "Moderation team", "Control team"],
    "Yellow team": ["Security optimization team", "Performance team", "Team [Specific Function]"],
    "Aboriginal": ["Indigenous", "First Peoples", "Original"],
    "Brown bags": ["Informal learning session", "Lunch and learn session", "Knowledge sharing session"],
    "First-class citizen": ["Primary citizen", "Essential citizen", "Core component", "Fundamental element"],
    "Man-in-the-middle": ["Intermediary attack", "Eavesdropping attack", "Relay attack"],
    "Master branch": ["Main branch", "Default branch", "Primary branch"],
    "Minority": ["Underrepresented group", "Marginalized group", "Specific demographic group"],
    "Normal": ["Typical", "Expected", "Standard", "Regular"],
    "Handicapped": ["Accessible", "People with disabilities", "Persons with disabilities", "Users with disabilities"],
    "Crazy": ["Unconventional", "Unexpected", "Surprising", "Innovative", "Remarkable"],
    "OCD": ["Meticulous", "Organized", "Detail-oriented", "Orderly", "Neat"],
    "Culture fit": ["Team contribution", "Values alignment", "Collaborative spirit", "Shared values"],
    "Chairman": ["Chairperson", "Chair", "Presiding officer", "Coordinator"],
    "Foreman": ["Supervisor", "Team lead", "Site supervisor"],
    "Man": ["Humankind", "People", "Humanity"],
    "Mankind": ["Humankind", "Humanity", "People"],
    "Mans": ["Crews", "Staffs", "Operates"],
    "Salesman": ["Salesperson", "Sales representative"],
    "Manmade": ["Synthetic", "Engineered", "Artificial", "Constructed"],
    "Manpower": ["Staffing", "Workforce", "Personnel", "Team members"],
    "Demilitarized zone": ["Perimeter network", "Screened subnet", "Isolated network"],
    "DMZ": ["Perimeter network", "Screened subnet", "Isolated network"],
    "Hang": ["Freeze", "Become unresponsive", "Stall", "Become blocked"],
    "Daughter board": ["Expansion board", "Add-on board", "Secondary board"],
    "Gender bender": ["Gender changer", "Gender adapter", "Connector adapter"],
    "Orphaned object": ["Unreferenced object", "Unlinked object", "Detached object"]
}

# Example sentences that include harmful terms
original_sentences = [
    "The program will automatically abort if a critical error occurs during execution.", 
    "The user chose to terminate the installation process after encountering an error.", 
    "The app is designed to be intuitive for the average user.", 
    "The algorithm functions as a black box, with no transparency about how decisions are made.", 
    "The white box testing method allows developers to see the internal workings of the code.", 
    "The company detected a black hat attempting to infiltrate their systems.", 
    "A white hat hacker helped identify security vulnerabilities in the new software.", 
    "The IP address was added to the company’s blacklist after repeated failed login attempts.", 
    "Only pre-approved devices are included in the network’s whitelist.", 
    "The paper underwent a blind review process to ensure impartiality.", 
    "The experiment was conducted under double-blind conditions to eliminate bias.", 
    "The cable is equipped with a male connector for compatibility with standard ports.", 
    "The female connector allows for easy integration with other components.", 
    "She was responsible for coordinating the team’s efforts on the project.", 
    "Her contribution to the discussion was insightful and appreciated.", 
    "The credit for the innovative design is entirely hers.", 
    "He led the presentation with confidence and clarity.", 
    "The team assigned the most critical task to him.", 
    "His programming skills greatly improved the project’s outcome.", 
    "The master database contains all the key records for the organization.", 
    "The secondary system operates as a slave to the primary server.", 
    "Achieving quantum supremacy marks a significant milestone in computing.", 
    "The older software was grandfathered in despite the new policy.", 
    "Hey guys, let’s gather for the meeting in five minutes.", 
    "Completing the project required 100 man hours of effort.", 
    "Before deploying the code, we need to perform a sanity check.", 
    "A quick sanity test revealed several issues in the new feature.", 
    "Developers use a dummy value as a placeholder during testing.", 
    "The scrum master facilitated the daily stand-up meeting.", 
    "The team opted for mob programming to tackle the complex issue collaboratively.", 
    "The system’s segregation of duties ensures secure operations.", 
    "A blackout period was enforced during the system upgrade.", 
    "The gray hat hacker reported the vulnerabilities after exploiting them for demonstration.", 
    "The app includes a native feature for photo editing.", 
    "The red team simulated an attack to test the organization’s defenses.", 
    "The web master updated the website’s layout for better usability.", 
    "The designer added white space to improve the page's readability.", 
    "The white team oversaw the cyber exercise and ensured fair play.", 
    "The yellow team focused on optimizing the software’s security during development.", 
    "The land’s history is deeply rooted in Aboriginal culture and traditions.",
    "The company hosted brown bag sessions to share knowledge informally.", 
    "Functions are treated as first-class citizens in many programming languages.", 
    "The man-in-the-middle attack intercepted sensitive information during transmission.", 
    "Changes were merged into the master branch for deployment.", 
    "Efforts to promote diversity aim to amplify the voices of the minority.", 
    "The system is back to normal after resolving the outage.", 
    "The venue was upgraded to be accessible for handicapped individuals.", 
    "The plan was considered crazy but turned out to be a brilliant success.", 
    "His desk organization reflects a hint of OCD tendencies.", 
    "The company prioritizes culture fit when hiring new employees.", 
    "The chairman called for a vote on the proposed changes.", 
    "The foreman supervised the construction site with expertise.", 
    "Man has always sought to understand the universe.", 
    "Mankind has made significant strides in technology over the centuries.", 
    "The crew mans the ship during long voyages.", 
    "The salesman demonstrated the product’s key features effectively.", 
    "The reservoir is a manmade structure designed for water storage.", 
    "The project required significant manpower to complete on time.", 
    "The network’s demilitarized zone protects internal systems from external threats.", 
    "The server operates within the DMZ for added security.", 
    "The application tends to hang when handling large datasets.", 
    "The new functionality was implemented through a daughter board.", 
    "The adapter functions as a gender bender for connecting devices.", 
    "The cleanup script removed the orphaned object from the database."
]

# Function to generate modified sentences with replacements
def generate_replacements(sentence, term, replacements):
    modified_sentences = []
    for replacement in replacements:
        modified_sentence = re.sub(rf'\b{re.escape(term)}\b', replacement, sentence, flags=re.IGNORECASE)
        modified_sentences.append((replacement, modified_sentence))
    return modified_sentences

# Compute similarity scores for each replacement
results = []
for sentence in original_sentences:
    for term, replacements in harmful_terms.items():
        if re.search(rf'\b{re.escape(term)}\b', sentence, flags=re.IGNORECASE):
            modified_versions = generate_replacements(sentence, term, replacements)
            for replacement, modified_sentence in modified_versions:
                score = util.pytorch_cos_sim(model.encode(sentence, convert_to_tensor=True), 
                                             model.encode(modified_sentence, convert_to_tensor=True)).item()
                results.append({
                    "Original Sentence": sentence,
                    "Harmful Term": term,
                    "Replacement": replacement,
                    "Modified Sentence": modified_sentence,
                    "Similarity Score": score
                })

# Convert results to a DataFrame and display
df = pd.DataFrame(results)

# Adjust display options to show all rows
pd.set_option('display.max_rows', None)

# Display the dataframe sorted by similarity score
df_sorted = df.sort_values(by="Similarity Score", ascending=True)
df_sorted


Unnamed: 0,Original Sentence,Harmful Term,Replacement,Modified Sentence,Similarity Score
19,The company detected a black hat attempting to...,Black hat,Unethical hacker,The company detected a Unethical hacker attemp...,0.584426
161,The company prioritizes culture fit when hirin...,Culture fit,Team contribution,The company prioritizes Team contribution when...,0.606165
18,The company detected a black hat attempting to...,Black hat,Malicious actor,The company detected a Malicious actor attempt...,0.610877
163,The company prioritizes culture fit when hirin...,Culture fit,Collaborative spirit,The company prioritizes Collaborative spirit w...,0.626185
35,The female connector allows for easy integrati...,Female connector,Receptacle connector,The Receptacle connector allows for easy integ...,0.627351
20,The company detected a black hat attempting to...,Black hat,Attacker,The company detected a Attacker attempting to ...,0.638441
87,The scrum master facilitated the daily stand-u...,Scrum master,Project facilitator,The Project facilitator facilitated the daily ...,0.642606
94,A blackout period was enforced during the syst...,Blackout period,Maintenance window,A Maintenance window was enforced during the s...,0.658593
30,The paper underwent a blind review process to ...,Blind,Anonymized review,The paper underwent a Anonymized review review...,0.661169
36,The female connector allows for easy integrati...,Female connector,Socket connector,The Socket connector allows for easy integrati...,0.679458


In [None]:
df[["Harmful Term", "Replacement", "Similarity Score"]].to_csv("C:/Users/hana_/Downloads/GeminiFlash2_Reasoning-3_replacements.csv", index=False)