In [1]:


!pip install -q transformers datasets tqdm



from transformers import GPTNeoForCausalLM, GPT2Tokenizer
from datasets import load_dataset
from datasets import DatasetDict
import torch
import re
from tqdm import tqdm
import random



# Check if CUDA (GPU) is available, and set the device accordingly

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")



# Load the GPT-Neo model and tokenizer, and move the model to the device

model_name = "EleutherAI/gpt-neo-2.7B"
model = GPTNeoForCausalLM.from_pretrained(model_name).to(device)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)



# Load the MedQA dataset with training and testing splits
dataset = load_dataset("keivalya/MedQuad-MedicalQnADataset")
train_test_split = dataset['train'].train_test_split(test_size=0.115)

dataset = DatasetDict({

    'train': train_test_split['train'],

    'test': train_test_split['test']

})

dataset

Using device: cuda


config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/10.7G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]



README.md:   0%|          | 0.00/233 [00:00<?, ?B/s]

medDataset_processed.csv:   0%|          | 0.00/22.5M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16407 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['qtype', 'Question', 'Answer'],
        num_rows: 14520
    })
    test: Dataset({
        features: ['qtype', 'Question', 'Answer'],
        num_rows: 1887
    })
})

In [2]:
def create_few_shot_examples(train_data, num_examples):
    shuffled_data = random.sample(list(train_data), len(train_data))

    examples = ""
    selected_categories = set()

    for sample in tqdm(shuffled_data, desc="Creating few-shot examples", total=len(train_data)):
        question_type = sample['qtype']
        question = sample['Question']
        correct_answer = sample['Answer']  
        
        if len(selected_categories) >= num_examples:
            break

        example = f"Question Type: {question_type}\nQuestion: {question}\nAnswer: {correct_answer}\n\n"
        examples += example

        selected_categories.add(question_type)

    return examples

In [3]:
def get_model_prediction(question, few_shot_prompt):
    input_text = f"{few_shot_prompt}\nQuestion: {question}\nAnswer:"

    inputs = tokenizer(input_text, return_tensors="pt").to(device)

    output = model.generate(inputs["input_ids"], max_new_tokens=200)

    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    question_end_index = generated_text.find(question) + len(question) + len("Answer: ")
    extracted_text = generated_text[question_end_index:]
    next_question_index = extracted_text.find("Question")
    predicted = extracted_text[:next_question_index].strip() if next_question_index != -1 else extracted_text.strip()

    return predicted

In [4]:
def generate_predictions_with_few_shot(model, tokenizer, train_dataset, test_dataset, num_examples=3):
    model.eval()  
    predictions = []

    few_shot_prompt = create_few_shot_examples(train_dataset, num_examples)

    for sample in tqdm(test_dataset, desc="Generating predictions"):
        question = sample['Question']
        predicted_answer = get_model_prediction(question, few_shot_prompt)
        predictions.append(predicted_answer)

    return predictions

In [5]:

predictions = generate_predictions_with_few_shot(model, tokenizer, dataset['train'], dataset['test'], num_examples=3)

Creating few-shot examples:   0%|          | 3/14520 [00:00<00:00, 38014.84it/s]
Generating predictions:   0%|          | 0/1887 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Generating predictions:   0%|          | 1/1887 [00:13<7:15:20, 13.85s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Generating predictions:   0%|          | 2/1887 [00:

In [6]:
!pip -q install evaluate
!pip -q install rouge-score
# Load ROUGE for evaluation
import evaluate

# Load ROUGE for evaluation
rouge = evaluate.load("rouge")

# Evaluate predictions


# Prepare references (ground-truth answers)
references = dataset['test']['Answer']


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [7]:
scores = rouge.compute(predictions=predictions, references=references)
print(scores)

{'rouge1': 0.24637029419036793, 'rouge2': 0.08954977939504233, 'rougeL': 0.1800870177569843, 'rougeLsum': 0.18306894969708457}


In [8]:
bleu = evaluate.load("bleu")
scores = bleu.compute(predictions=predictions, references=references)
print(scores)

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

{'bleu': 0.024933675003962548, 'precisions': [0.35459902638995644, 0.10380731051067892, 0.04764291239500147, 0.031763067659572125], 'brevity_penalty': 0.28861223598765007, 'length_ratio': 0.4458968323293292, 'translation_length': 187344, 'reference_length': 420151}


In [9]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({
    'predictions': predictions,
    'references': references
})

# Save as a CSV file
df.to_csv('/kaggle/working/predictions_references.csv', index=False)