In [None]:
### Transformers

In [None]:
!pip install transformers torch scipy matplotlib pandas

In [None]:
import time
import torch
from transformers import pipeline
import pandas as pd
import matplotlib.pyplot as plt

print("Loading models.. ")
bert_pipe = pipeline("fill-mask", model="bert-base-uncased", device=-1) # CPU
distil_pipe = pipeline("fill-mask", model="distilbert-base-uncased", device=-1) # CPU


text = "The quick brown fox jumps over the [MASK] dog."
dataset = [text] * 100  # Repeat 100 times to simulate a workload


start = time.time()
for _ in dataset:
    bert_pipe(text)
bert_time = time.time() - start


start = time.time()
for _ in dataset:
    distil_pipe(text)
distil_time = time.time() - start

print(f"BERT Time: {bert_time:.2f}s")
print(f"DistilBERT Time: {distil_time:.2f}s")
print(f"Speedup: {bert_time/distil_time:.2f}x faster")


df = pd.DataFrame({'Model': ['BERT', 'DistilBERT'], 'Time (s)': [bert_time, distil_time]})
df.plot(kind='bar', x='Model', y='Time (s)', color=['blue', 'orange'], legend=False)
plt.title('Inference Latency: BERT vs DistilBERT (CPU)')
plt.ylabel('Time to process 100 sentences (seconds)')
plt.show()

In [None]:

professions = ["doctor", "nurse", "engineer", "teacher", "CEO", "assistant"]

results = []

for job in professions:

    sentence = f"[MASK] is a {job}."
    prediction = bert_pipe(sentence)

     he_score = 0
    she_score = 0

    for pred in prediction:
        if pred['token_str'] == "he":
            he_score = pred['score']
        elif pred['token_str'] == "she":
            she_score = pred['score']

    results.append({'Profession': job, 'He': he_score, 'She': she_score})


bias_df = pd.DataFrame(results).set_index('Profession')
print(bias_df)

bias_df.plot(kind='bar', figsize=(10, 6))
plt.title('Gender Bias in BERT: Probability of "He" vs "She"')
plt.ylabel('Probability Score')
plt.xticks(rotation=0)
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt


country1 = "india"
country2 = "america"
targets = [country1, country2]


professions = ["doctor", "nurse", "engineer", "teacher", "ceo", "assistant"]

results = []

print(f"Testing Nationality Bias: {country1} vs {country2}...")

for job in professions:
    sentence = f"The {job} is from [MASK]."


    prediction = bert_pipe(sentence, targets=targets)


    score_c1 = 0
    score_c2 = 0

    for pred in prediction:
        if pred['token_str'] == country1:
            score_c1 = pred['score']
        elif pred['token_str'] == country2:
            score_c2 = pred['score']

    results.append({'Profession': job, 'India': score_c1, 'America': score_c2})

bias_df = pd.DataFrame(results).set_index('Profession')
print(bias_df)

bias_df.plot(kind='bar', stacked=True, figsize=(10, 6))
plt.title('Nationality Bias in BERT: Where does the model think these jobs are from?')
plt.ylabel('Probability Score')
plt.xticks(rotation=0)
plt.show()