In [29]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import Trainer, TrainingArguments

team_descriptions = [
    ["We seek a collaborative team member who values diversity.", "inclusive"],
    ["We need a go-getter who thrives under pressure.", "biased"],
    ["We are looking for a supportive team player who uplifts others.", "inclusive"],
    ["We want someone who can take charge and make tough decisions.", "biased"],
    ["We seek a consensus builder who listens to everyone's input.", "inclusive"],
    ["We need a competitive individual who drives results.", "biased"],
    ["We want a compassionate team member who prioritizes collaboration.", "inclusive"],
    ["We are looking for a strong-willed leader who won't back down.", "biased"],
    ["We seek a flexible team member who adapts to change.", "inclusive"],
    ["We want an assertive person who can handle challenges head-on.", "biased"],
    ["We need a thoughtful communicator who values each voice in the room.", "inclusive"]
]

df = pd.DataFrame(team_descriptions, columns=['description', 'label'])

print(df)

X_train, X_test, y_train, y_test = train_test_split(df['description'], df['label'], test_size=0.2, random_state=42)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_encodings = tokenizer(list(X_train), truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(list(X_test), truncation=True, padding=True, max_length=128)

class JobDescriptionDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = JobDescriptionDataset(train_encodings, y_train.map({'biased': 0, 'inclusive': 1}).tolist())
test_dataset = JobDescriptionDataset(test_encodings, y_test.map({'biased': 0, 'inclusive': 1}).tolist())

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

trainer.train()

predictions = trainer.predict(test_dataset)
preds = predictions.predictions.argmax(-1)
print(classification_report(y_test.map({'biased': 0, 'inclusive': 1}).tolist(), preds))

def suggest_revisions(description):
    biased_terms = {
        'go-getter': 'driven individual',
        'strong-willed leader': 'effective team player',
        'take charge': 'lead collaboratively',
        'assertive': 'confident team member',
        'competitive': 'motivated individual',
    }
    for term, suggestion in biased_terms.items():
        if term in description.lower():
            description = description.replace(term, suggestion)
    return description

revised_descriptions = [suggest_revisions(desc[0]) for desc in team_descriptions]

print("Original Description:")
for desc in team_descriptions:
    print(desc[0])

print("\nSuggested Revision:")
for rev in revised_descriptions:
    print(rev)



                                          description      label
0   We seek a collaborative team member who values...  inclusive
1     We need a go-getter who thrives under pressure.     biased
2   We are looking for a supportive team player wh...  inclusive
3   We want someone who can take charge and make t...     biased
4   We seek a consensus builder who listens to eve...  inclusive
5   We need a competitive individual who drives re...     biased
6   We want a compassionate team member who priori...  inclusive
7   We are looking for a strong-willed leader who ...     biased
8   We seek a flexible team member who adapts to c...  inclusive
9   We want an assertive person who can handle cha...     biased
10  We need a thoughtful communicator who values e...  inclusive


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,No log,0.78718
2,No log,0.787198
3,No log,0.787307


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.33      1.00      0.50         1

    accuracy                           0.33         3
   macro avg       0.17      0.50      0.25         3
weighted avg       0.11      0.33      0.17         3

Original Description:
We seek a collaborative team member who values diversity.
We need a go-getter who thrives under pressure.
We are looking for a supportive team player who uplifts others.
We want someone who can take charge and make tough decisions.
We seek a consensus builder who listens to everyone's input.
We need a competitive individual who drives results.
We want a compassionate team member who prioritizes collaboration.
We are looking for a strong-willed leader who won't back down.
We seek a flexible team member who adapts to change.
We want an assertive person who can handle challenges head-on.
We need a thoughtful communicator who values each voice in 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
