# Surgical Site Infections (SSI) Chatbot Project

This notebook demonstrates the process of creating a chatbot that provides information about Surgical Site Infections (SSI) using a fine-tuned BERT model.


 Install Necessary Libraries

In [2]:
!pip install torch transformers datasets




 Import Libraries


In [3]:
import torch
from transformers import BertTokenizer, BertForQuestionAnswering, Trainer, TrainingArguments
from datasets import Dataset


 Load and Prepare Dataset

In [4]:
# Load dataset
dataset = [
    {"question": "What are Surgical Site Infections (SSIs)?", "answer": "Surgical Site Infections (SSIs) are infections that occur after surgery in the part of the body where the surgery took place."},
    {"question": "What are the common causes of SSIs?", "answer": "SSIs are typically caused by bacteria that enter the incision site during surgery or in the days following surgery."},
    {"question": "How can SSIs be prevented?", "answer": "SSIs can be prevented by maintaining proper hygiene, using sterile equipment, and administering antibiotics before surgery."},
    {"question": "What are the symptoms of SSIs?", "answer": "Symptoms of SSIs include redness and swelling at the incision site, pain or tenderness, and pus or drainage from the wound."},
    {"question": "How are SSIs treated?", "answer": "Treatment for SSIs typically involves antibiotics and, in some cases, additional surgery to remove infected tissue."}
]

data = {"question": [], "context": [], "answer": []}
for item in dataset:
    data["question"].append(item["question"])
    data["context"].append(" ".join([q["answer"] for q in dataset]))
    data["answer"].append(item["answer"])

dataset = Dataset.from_dict(data)


Tokenize Dataset

In [5]:
# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')

# Tokenize the dataset
def preprocess_function(examples):
    return tokenizer(examples['question'], examples['context'], truncation=True, padding=True)

tokenized_dataset = dataset.map(preprocess_function, batched=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/5 [00:00<?, ? examples/s]

In [14]:
import torch.nn.functional as F


In [1]:
!pip install torch transformers[torch] accelerate -U




Fine-tune the Model

In [26]:
from transformers import Trainer, TrainingArguments
import torch

# Subclass Trainer to override compute_loss method
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        outputs = model(**inputs)
        loss = outputs.loss

        # Check if loss is None and handle it
        if loss is None:
            # This can happen if the loss is not calculated in the model (e.g., for evaluation)
            return None if return_outputs else torch.tensor(0.0, device=self.args.device)

        return (loss, outputs) if return_outputs else loss

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    save_steps=10_000,
    save_total_limit=2,
)

# Custom Trainer
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)




In [31]:
import torch

# Define a tensor and set requires_grad to True
tensor = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)


In [41]:
import torch
import torch.nn as nn
from transformers import BertPreTrainedModel, BertModel

class CustomBertForQuestionAnswering(BertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.bert = BertModel(config)
        self.qa_outputs = nn.Linear(config.hidden_size, 2)  # 2 because we're predicting start and end positions

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, start_positions=None, end_positions=None):
        outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask)
        sequence_output = outputs.last_hidden_state
        logits = self.qa_outputs(sequence_output)

        start_logits, end_logits = logits.split(1, dim=-1)
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        # Compute the loss
        loss_fct = nn.CrossEntropyLoss(ignore_index=-100)
        if start_positions is not None and end_positions is not None:
            start_loss = loss_fct(start_logits, start_positions)
            end_loss = loss_fct(end_logits, end_positions)
            total_loss = (start_loss + end_loss) / 2  # You can adjust this formula as needed
        else:
            total_loss = None

        return total_loss, start_logits, end_logits


In [59]:
import torch
from transformers import Trainer, TrainingArguments

# Subclass Trainer to override compute_loss method
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        outputs = model(**inputs)
        loss = outputs.loss
        if loss is None:
            loss = torch.tensor(0.0, device=outputs.start_logits.device)
        return (loss, outputs) if return_outputs else loss

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    save_steps=10_000,
    save_total_limit=2,
)

# Custom Trainer
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# Ensure all model parameters require gradients
for param in model.parameters():
    param.requires_grad = True

# Print the names of parameters that require gradients
print("Parameters requiring gradients:")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)

# Train the model
try:
    trainer.train()
except RuntimeError as e:
    print("RuntimeError:", e)

# Save the model and tokenizer
model.save_pretrained("./ssi_bert_model")
tokenizer.save_pretrained("./ssi_bert_model")


Parameters requiring gradients:
bert.embeddings.word_embeddings.weight
bert.embeddings.position_embeddings.weight
bert.embeddings.token_type_embeddings.weight
bert.embeddings.LayerNorm.weight
bert.embeddings.LayerNorm.bias
bert.encoder.layer.0.attention.self.query.weight
bert.encoder.layer.0.attention.self.query.bias
bert.encoder.layer.0.attention.self.key.weight
bert.encoder.layer.0.attention.self.key.bias
bert.encoder.layer.0.attention.self.value.weight
bert.encoder.layer.0.attention.self.value.bias
bert.encoder.layer.0.attention.output.dense.weight
bert.encoder.layer.0.attention.output.dense.bias
bert.encoder.layer.0.attention.output.LayerNorm.weight
bert.encoder.layer.0.attention.output.LayerNorm.bias
bert.encoder.layer.0.intermediate.dense.weight
bert.encoder.layer.0.intermediate.dense.bias
bert.encoder.layer.0.output.dense.weight
bert.encoder.layer.0.output.dense.bias
bert.encoder.layer.0.output.LayerNorm.weight
bert.encoder.layer.0.output.LayerNorm.bias
bert.encoder.layer.1.atte

('./ssi_bert_model/tokenizer_config.json',
 './ssi_bert_model/special_tokens_map.json',
 './ssi_bert_model/vocab.txt',
 './ssi_bert_model/added_tokens.json')

Load and Test the Model

In [60]:
# Load the fine-tuned model and tokenizer
tokenizer = BertTokenizer.from_pretrained('./ssi_bert_model')
model = BertForQuestionAnswering.from_pretrained('./ssi_bert_model')

# Function to answer questions
def answer_question(question, context):
    inputs = tokenizer(question, context, return_tensors="pt")
    outputs = model(**inputs)
    answer_start = torch.argmax(outputs.start_logits)
    answer_end = torch.argmax(outputs.end_logits) + 1
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))
    return answer

# Example usage
context = " ".join([
    "Surgical Site Infections (SSIs) are infections that occur after surgery in the part of the body where the surgery took place.",
    "SSIs are typically caused by bacteria that enter the incision site during surgery or in the days following surgery.",
    "SSIs can be prevented by maintaining proper hygiene, using sterile equipment, and administering antibiotics before surgery.",
    "Symptoms of SSIs include redness and swelling at the incision site, pain or tenderness, and pus or drainage from the wound.",
    "Treatment for SSIs typically involves antibiotics and, in some cases, additional surgery to remove infected tissue."
])

questions = [
    "What are Surgical Site Infections (SSIs)?",
    "What are the common causes of SSIs?",
    "How can SSIs be prevented?",
    "What are the symptoms of SSIs?",
    "How are SSIs treated?"
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {answer_question(question, context)}\n")


Question: What are Surgical Site Infections (SSIs)?
Answer: 

Question: What are the common causes of SSIs?
Answer: 

Question: How can SSIs be prevented?
Answer: be prevented by maintaining proper hygiene , using sterile

Question: What are the symptoms of SSIs?
Answer: 

Question: How are SSIs treated?
Answer: 

