In [None]:
# Install necessary libraries
!pip install transformers datasets

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, f1_score
from transformers import BertTokenizer, BertForQuestionAnswering, Trainer, TrainingArguments
from datasets import load_dataset

In [None]:
# Load the dataset from Hugging Face
dataset = load_dataset("medalpaca/medical_meadow_medical_flashcards")

In [None]:
# Inspect the dataset structure
print(dataset['train'].column_names)
print(dataset['train'][0])

In [None]:
# Preprocess the data
def preprocess_data(dataset):
    df = pd.DataFrame(dataset['train'])
    df['question'] = df['input'] + " " + df['instruction']
    df['answer'] = df['output']
    return df

df = preprocess_data(dataset)

In [None]:
# Split the data into training and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [None]:
# Prepare data for BERT model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_function(examples):
    return tokenizer(examples['question'], padding="max_length", truncation=True)

train_encodings = tokenizer(train_df['question'].tolist(), truncation=True, padding=True)
test_encodings = tokenizer(test_df['question'].tolist(), truncation=True, padding=True)

In [None]:
# Convert data to torch dataset
class MedicalDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, answers):
        self.encodings = encodings
        self.answers = answers

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.answers[idx])
        return item

    def __len__(self):
        return len(self.answers)

train_dataset = MedicalDataset(train_encodings, train_df['answer'].tolist())
test_dataset = MedicalDataset(test_encodings, test_df['answer'].tolist())

In [None]:
# Fine-tune BERT model for question answering
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')

In [None]:
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

In [None]:
trainer.train()

In [None]:
# Evaluate BERT model
predictions = trainer.predict(test_dataset)
pred_answers = [pred['answer'] for pred in predictions.predictions]

In [None]:
# Evaluate using Exact Match (EM) and F1-score
em_score = np.mean([pred == truth for pred, truth in zip(pred_answers, test_df['answer'])])
f1_score_val = f1_score(test_df['answer'], pred_answers, average='macro')

In [None]:
print("BERT Model Performance:")
print(f"Exact Match (EM): {em_score:.4f}")
print(f"F1-score: {f1_score_val:.4f}")
print("Accuracy:", accuracy_score(test_df['answer'], pred_answers))

In [None]:
# Compare performance with a linear classifier
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(train_df['question'])
X_test_tfidf = vectorizer.transform(test_df['question'])

lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_tfidf, train_df['answer'])

lr_predictions = lr_model.predict(X_test_tfidf)

print("Linear Classifier Performance:")
print(classification_report(test_df['answer'], lr_predictions))
print("Accuracy:", accuracy_score(test_df['answer'], lr_predictions))

In [None]:
# Compare performance of different models
results = {
    "Model": ["Linear Classifier", "BERT"],
    "F1-score": [f1_score(test_df['answer'], lr_predictions, average='macro'), f1_score_val],
    "Accuracy": [accuracy_score(test_df['answer'], lr_predictions), accuracy_score(test_df['answer'], pred_answers)]
}

results_df = pd.DataFrame(results)
print(results_df)

In [None]:
sns.barplot(x="Model", y="Accuracy", data=results_df)
plt.title("Model Comparison")
plt.show()