In [None]:
!pip install datasets




In [None]:
from datasets import load_dataset

# Load litqa dataset
litqa_train = load_dataset("Anshu132002/litqaTrain", split="train")
litqa_test = load_dataset("Anshu132002/litqaTest", split="test")

# Load pubmedqa dataset
pubmedqa_train = load_dataset("Anshu132002/pubmedqaTrain", split="train")
pubmedqa_test = load_dataset("Anshu132002/pubmedqaTest", split="test")


In [None]:
prompt_litqa = """Below is a question along with a key passage and distractors. Provide only the ideal answer as single word or two to three word only.

### Question:
{}

### Key Passage:
{}

### Distractors:
{}

Answer:"""


In [None]:
prompt_pubmedqa = """Below is a question along with context. Provide a answer maximum 3-4 lines.

### Question:

{}

### Context:

{}

### Answer:

{}"""


In [None]:
def preprocess_litqa(examples):
    texts = []
    for question, key_passage, distractors, ideal in zip(
        examples["question"],
        examples["key-passage"],
        examples["distractors"],
        examples["ideal"],
    ):
        text = prompt_litqa.format(question, key_passage, distractors, ideal)
        texts.append(text)
    return {"text": texts}


In [None]:
def preprocess_pubmedqa(examples):
    texts = []
    for question, context, long_answer in zip(
        examples["QUESTION"],
        examples["CONTEXTS"],
        examples["LONG_ANSWER"],
    ):
        text = prompt_pubmedqa.format(question, context, long_answer)
        texts.append(text)
    return {"text": texts}


In [None]:
# Preprocess litqa datasets
litqa_train = litqa_train.map(preprocess_litqa, batched=True)
litqa_test = litqa_test.map(preprocess_litqa, batched=True)

# Preprocess pubmedqa datasets
pubmedqa_train = pubmedqa_train.map(preprocess_pubmedqa, batched=True)
pubmedqa_test = pubmedqa_test.map(preprocess_pubmedqa, batched=True)


Map:   0%|          | 0/145 [00:00<?, ? examples/s]

Map:   0%|          | 0/49 [00:00<?, ? examples/s]

In [None]:
from datasets import concatenate_datasets

# Combine training datasets
train_dataset = concatenate_datasets([litqa_train, pubmedqa_train])

# Combine test datasets
test_dataset = concatenate_datasets([litqa_test, pubmedqa_test])


In [None]:
%%capture
!pip install unsloth "xformers==0.0.28.post2"
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
from unsloth import FastLanguageModel
import torch

# Set parameters
max_seq_length = 150
dtype = None
load_in_4bit = True

# Load the model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Phi-3.5-mini-instruct",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)


==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu124. CUDA = 7.5. CUDA Toolkit = 12.4.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=1,  # Increase for full training
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

# Start training
trainer.train()


Map (num_proc=2):   0%|          | 0/1095 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,095 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 1
 "-____-"     Number of trainable parameters = 29,884,416


Step,Training Loss


TrainOutput(global_step=1, training_loss=2.549076557159424, metrics={'train_runtime': 6.1927, 'train_samples_per_second': 1.292, 'train_steps_per_second': 0.161, 'total_flos': 27017736192000.0, 'train_loss': 2.549076557159424, 'epoch': 0.0072992700729927005})

In [None]:

from datasets import load_dataset
# Load litqa dataset
litqa_train = load_dataset("Anshu132002/litqaTrain", split="train")
litqa_test = load_dataset("Anshu132002/litqaTest", split="test")

pubmedqa_train = load_dataset("Anshu132002/pubmedqaTrain", split="train")
pubmedqa_test = load_dataset("Anshu132002/pubmedqaTest", split="test")

prompt_litqa = """Answer the following question based on the key passage. Provide only the ideal answer as a single word or a few words.

Question:
{}

Key Passage:
{}

Answer:"""

prompt_pubmedqa = """Answer the following question based on the context. Provide an answer in 3-4 lines.

Question:
{}

Context:
{}

Answer:"""

def preprocess_litqa(examples):
    texts = []
    for question, key_passage in zip(
        examples["question"],
        examples["key-passage"],
    ):
        text = prompt_litqa.format(question, key_passage)
        texts.append(text)
    return {"text": texts}

def preprocess_pubmedqa(examples):
    texts = []
    for question, context in zip(
        examples["QUESTION"],
        examples["CONTEXTS"],
    ):
        text = prompt_pubmedqa.format(question, context)
        texts.append(text)
    return {"text": texts}

litqa_train = litqa_train.map(preprocess_litqa, batched=True)

pubmedqa_train = pubmedqa_train.map(preprocess_pubmedqa, batched=True)

from datasets import concatenate_datasets

train_dataset = concatenate_datasets([litqa_train, pubmedqa_train])


from unsloth import FastLanguageModel
import torch

# Set parameters
max_seq_length = 150
dtype = None
load_in_4bit = True

# Load the model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Phi-3.5-mini-instruct",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=1, 
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

# Start training
trainer.train()

from tqdm import tqdm 

FastLanguageModel.for_inference(model) 

def preprocess_test_dataset(dataset, format_string, fields):
    texts = []
    for entry in zip(*[dataset[field] for field in fields]):
        text = format_string.format(*entry)
        texts.append(text)
    return texts

litqa_test_texts = preprocess_test_dataset(
    litqa_test,
    prompt_litqa,
    ["question", "key-passage"]
)
litqa_test = litqa_test.add_column("text", litqa_test_texts)

pubmedqa_test_texts = preprocess_test_dataset(
    pubmedqa_test,
    prompt_pubmedqa,
    ["QUESTION", "CONTEXTS"]
)
pubmedqa_test = pubmedqa_test.add_column("text", pubmedqa_test_texts)

def generate_predictions_for_all(model, tokenizer, test_dataset, max_seq_length):
    predictions = []
    inputs_list = test_dataset['text']

    for idx, input_text in enumerate(tqdm(inputs_list, desc="Generating Predictions")):
        inputs = tokenizer(
            input_text,
            return_tensors="pt",
            truncation=True,
            max_length=max_seq_length,
        ).to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=50,
                use_cache=True,
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.eos_token_id,
                num_beams=1,
                do_sample=False,
            )

        output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        answer = output_text[len(input_text):].strip()

        answer_line = answer.split('\n')[0].strip()
        predictions.append(answer_line)

        print(f"Example {idx + 1}: {predictions[-1]}")

    return predictions

print("\nGenerating predictions for LitQA Test Dataset...")
litqa_predictions = generate_predictions_for_all(model, tokenizer, litqa_test, max_seq_length)

print("\nGenerating predictions for PubMedQA Test Dataset...")
pubmedqa_predictions = generate_predictions_for_all(model, tokenizer, pubmedqa_test, max_seq_length)

import json

results_litqa = []

for idx in range(len(litqa_test)):
    result = {
        "question": litqa_test["question"][idx],
        "key-passage": litqa_test["key-passage"][idx],
        "generated_answer": litqa_predictions[idx]
    }
    results_litqa.append(result)

with open("litqa_predictions.json", "w") as f:
    json.dump(results_litqa, f, indent=4)

results_pubmedqa = []

# for idx in range(len(pubmedqa_test)):
#     result = {
#         "QUESTION": pubmedqa_test["QUESTION"][idx],
#         "CONTEXTS": pubmedqa_test["CONTEXTS"][idx],
#         "generated_answer": pubmedqa_predictions[idx]
#     }
#     results_pubmedqa.append(result)

# with open("pubmedqa_predictions.json", "w") as f:
#     json.dump(results_pubmedqa, f, indent=4)




Map:   0%|          | 0/145 [00:00<?, ? examples/s]

Map:   0%|          | 0/950 [00:00<?, ? examples/s]

==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu124. CUDA = 7.5. CUDA Toolkit = 12.4.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Map (num_proc=2):   0%|          | 0/1095 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,095 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 1
 "-____-"     Number of trainable parameters = 29,884,416


Step,Training Loss



Generating predictions for LitQA Test Dataset...


Generating Predictions:   2%|▏         | 1/49 [00:03<02:40,  3.35s/it]

Example 1: Glycosyltransferases


Generating Predictions:   4%|▍         | 2/49 [00:06<02:25,  3.10s/it]

Example 2: itatory neurons


Generating Predictions:   6%|▌         | 3/49 [00:10<02:39,  3.48s/it]

Example 3: ld crystallographic symmetry.


Generating Predictions:   8%|▊         | 4/49 [00:11<01:53,  2.52s/it]

Example 4: Microcin V


Generating Predictions:  10%|█         | 5/49 [00:14<02:03,  2.82s/it]

Example 5: S1pr2-RFP mice


Generating Predictions:  12%|█▏        | 6/49 [00:17<02:09,  3.01s/it]

Example 6: S371F, D405N, R408S


Generating Predictions:  14%|█▍        | 7/49 [00:22<02:25,  3.47s/it]

Example 7: 


Generating Predictions:  16%|█▋        | 8/49 [00:22<01:44,  2.55s/it]

Example 8: DILT


Generating Predictions:  18%|█▊        | 9/49 [00:26<01:52,  2.81s/it]

Example 9: 


Generating Predictions:  20%|██        | 10/49 [00:28<01:40,  2.58s/it]

Example 10: domain


Generating Predictions:  22%|██▏       | 11/49 [00:31<01:49,  2.88s/it]

Example 11: 


Generating Predictions:  24%|██▍       | 12/49 [00:36<01:59,  3.24s/it]

Example 12: 


Generating Predictions:  27%|██▋       | 13/49 [00:37<01:32,  2.56s/it]

Example 13: GSDMD


Generating Predictions:  29%|██▊       | 14/49 [00:37<01:11,  2.04s/it]

Example 14: Ogfrl1


Generating Predictions:  31%|███       | 15/49 [00:38<00:54,  1.62s/it]

Example 15: Rasgrf1


Generating Predictions:  33%|███▎      | 16/49 [00:40<00:57,  1.75s/it]

Example 16: 


Generating Predictions:  35%|███▍      | 17/49 [00:44<01:12,  2.26s/it]

Example 17: HTR3A and HTR3B


Generating Predictions:  37%|███▋      | 18/49 [00:48<01:25,  2.77s/it]

Example 18: Fig. S2 H).


Generating Predictions:  39%|███▉      | 19/49 [00:51<01:28,  2.96s/it]

Example 19: icitly mentions that IL-4 activates STAT6, cMYC, EGR2, and the PPARγ/RX


Generating Predictions:  41%|████      | 20/49 [00:54<01:29,  3.08s/it]

Example 20: MYB45


Generating Predictions:  43%|████▎     | 21/49 [00:58<01:31,  3.28s/it]

Example 21: POLR2A up-regulation


Generating Predictions:  45%|████▍     | 22/49 [01:02<01:34,  3.48s/it]

Example 22: 


Generating Predictions:  47%|████▋     | 23/49 [01:05<01:29,  3.46s/it]

Example 23: 


Generating Predictions:  49%|████▉     | 24/49 [01:07<01:14,  2.97s/it]

Example 24: 


Generating Predictions:  51%|█████     | 25/49 [01:11<01:15,  3.16s/it]

Example 25: The passage specifies that the


Generating Predictions:  53%|█████▎    | 26/49 [01:15<01:17,  3.39s/it]

Example 26: uced conformational change


Generating Predictions:  55%|█████▌    | 27/49 [01:16<00:57,  2.64s/it]

Example 27: Surface trafficking and assembly


Generating Predictions:  57%|█████▋    | 28/49 [01:19<01:00,  2.86s/it]

Example 28: 


Generating Predictions:  59%|█████▉    | 29/49 [01:21<00:49,  2.47s/it]

Example 29: Podoplanin (Pdpn) and integrin β4 (CD104)


Generating Predictions:  61%|██████    | 30/49 [01:24<00:54,  2.88s/it]

Example 30: 


Generating Predictions:  63%|██████▎   | 31/49 [01:28<00:54,  3.03s/it]

Example 31: 


Generating Predictions:  65%|██████▌   | 32/49 [01:31<00:53,  3.12s/it]

Example 32: +


Generating Predictions:  67%|██████▋   | 33/49 [01:34<00:51,  3.20s/it]

Example 33: ve to MMS than rev3 single mutants (Figure 5C). Rev1-


Generating Predictions:  69%|██████▉   | 34/49 [01:37<00:43,  2.92s/it]

Example 34: er:BUD40


Generating Predictions:  71%|███████▏  | 35/49 [01:40<00:43,  3.07s/it]

Example 35: mplitude, mean left amygdala activation, and signal changes in the left amygdala.


Generating Predictions:  73%|███████▎  | 36/49 [01:42<00:34,  2.65s/it]

Example 36: nswer:


Generating Predictions:  76%|███████▌  | 37/49 [01:45<00:34,  2.85s/it]

Example 37: 


Generating Predictions:  78%|███████▊  | 38/49 [01:49<00:33,  3.01s/it]

Example 38: Thymidylate synthase (TYMS)


Generating Predictions:  80%|███████▉  | 39/49 [01:53<00:34,  3.40s/it]

Example 39: U1


Generating Predictions:  82%|████████▏ | 40/49 [01:56<00:30,  3.37s/it]

Example 40: 


Generating Predictions:  84%|████████▎ | 41/49 [02:00<00:26,  3.36s/it]

Example 41: 


Generating Predictions:  86%|████████▌ | 42/49 [02:03<00:23,  3.35s/it]

Example 42: 


Generating Predictions:  88%|████████▊ | 43/49 [02:07<00:21,  3.53s/it]

Example 43: eIF4E-eIF4G pathway


Generating Predictions:  90%|████████▉ | 44/49 [02:07<00:13,  2.62s/it]

Example 44: Chemical modifications


Generating Predictions:  92%|█████████▏| 45/49 [02:11<00:11,  2.84s/it]

Example 45: GFAP


Generating Predictions:  94%|█████████▍| 46/49 [02:12<00:07,  2.46s/it]

Example 46: Vasculitis or embolism


Generating Predictions:  96%|█████████▌| 47/49 [02:16<00:05,  2.82s/it]

Example 47: 


Generating Predictions:  98%|█████████▊| 48/49 [02:20<00:03,  3.12s/it]

Example 48: 


Generating Predictions: 100%|██████████| 49/49 [02:20<00:00,  2.88s/it]


Example 49: acj6 and SoxN

Generating predictions for PubMedQA Test Dataset...


Generating Predictions:   2%|▏         | 1/50 [00:03<02:39,  3.26s/it]

Example 1: 


Generating Predictions:   4%|▍         | 2/50 [00:06<02:39,  3.32s/it]

Example 2: 


Generating Predictions:   6%|▌         | 3/50 [00:11<02:59,  3.83s/it]

Example 3: 


Generating Predictions:   8%|▊         | 4/50 [00:14<02:46,  3.61s/it]

Example 4: 


Generating Predictions:  10%|█         | 5/50 [00:17<02:37,  3.51s/it]

Example 5: 


Generating Predictions:  12%|█▏        | 6/50 [00:21<02:36,  3.55s/it]

Example 6: 


Generating Predictions:  14%|█▍        | 7/50 [00:25<02:36,  3.65s/it]

Example 7: 


Generating Predictions:  16%|█▌        | 8/50 [00:28<02:29,  3.55s/it]

Example 8: 


Generating Predictions:  18%|█▊        | 9/50 [00:31<02:23,  3.49s/it]

Example 9: 


Generating Predictions:  20%|██        | 10/50 [00:36<02:29,  3.75s/it]

Example 10: 


Generating Predictions:  22%|██▏       | 11/50 [00:39<02:20,  3.60s/it]

Example 11: 


Generating Predictions:  24%|██▍       | 12/50 [00:42<02:13,  3.51s/it]

Example 12: 


Generating Predictions:  26%|██▌       | 13/50 [00:46<02:07,  3.46s/it]

Example 13: 


Generating Predictions:  28%|██▊       | 14/50 [00:50<02:13,  3.72s/it]

Example 14: 


Generating Predictions:  30%|███       | 15/50 [00:53<02:06,  3.60s/it]

Example 15: 


Generating Predictions:  32%|███▏      | 16/50 [00:57<02:00,  3.53s/it]

Example 16: 


Generating Predictions:  34%|███▍      | 17/50 [01:00<01:59,  3.63s/it]

Example 17: 


Generating Predictions:  36%|███▌      | 18/50 [01:04<01:55,  3.60s/it]

Example 18: 


Generating Predictions:  38%|███▊      | 19/50 [01:07<01:48,  3.51s/it]

Example 19: 


Generating Predictions:  40%|████      | 20/50 [01:11<01:43,  3.45s/it]

Example 20: 


Generating Predictions:  42%|████▏     | 21/50 [01:15<01:47,  3.72s/it]

Example 21: 


Generating Predictions:  44%|████▍     | 22/50 [01:18<01:40,  3.59s/it]

Example 22: 


Generating Predictions:  46%|████▌     | 23/50 [01:22<01:34,  3.50s/it]

Example 23: 


Generating Predictions:  48%|████▊     | 24/50 [01:25<01:31,  3.53s/it]

Example 24: 


Generating Predictions:  50%|█████     | 25/50 [01:29<01:32,  3.69s/it]

Example 25: 


Generating Predictions:  52%|█████▏    | 26/50 [01:32<01:25,  3.58s/it]

Example 26: 


Generating Predictions:  54%|█████▍    | 27/50 [01:36<01:20,  3.50s/it]

Example 27: 


Generating Predictions:  56%|█████▌    | 28/50 [01:40<01:20,  3.66s/it]

Example 28: 


Generating Predictions:  58%|█████▊    | 29/50 [01:43<01:15,  3.59s/it]

Example 29: 


Generating Predictions:  60%|██████    | 30/50 [01:47<01:10,  3.52s/it]

Example 30: 


Generating Predictions:  62%|██████▏   | 31/50 [01:50<01:05,  3.46s/it]

Example 31: 


Generating Predictions:  64%|██████▍   | 32/50 [01:54<01:07,  3.73s/it]

Example 32: 


Generating Predictions:  66%|██████▌   | 33/50 [01:58<01:01,  3.61s/it]

Example 33: 


Generating Predictions:  68%|██████▊   | 34/50 [02:01<00:56,  3.52s/it]

Example 34: 


Generating Predictions:  70%|███████   | 35/50 [02:05<00:53,  3.54s/it]

Example 35: 


Generating Predictions:  72%|███████▏  | 36/50 [02:09<00:51,  3.67s/it]

Example 36: 


Generating Predictions:  74%|███████▍  | 37/50 [02:12<00:46,  3.57s/it]

Example 37: 


Generating Predictions:  76%|███████▌  | 38/50 [02:15<00:41,  3.50s/it]

Example 38: 


Generating Predictions:  78%|███████▊  | 39/50 [02:19<00:40,  3.70s/it]

Example 39: 


Generating Predictions:  80%|████████  | 40/50 [02:23<00:36,  3.63s/it]

Example 40: 


Generating Predictions:  82%|████████▏ | 41/50 [02:26<00:31,  3.54s/it]

Example 41: 


Generating Predictions:  84%|████████▍ | 42/50 [02:29<00:27,  3.47s/it]

Example 42: 


Generating Predictions:  86%|████████▌ | 43/50 [02:34<00:25,  3.70s/it]

Example 43: 


Generating Predictions:  88%|████████▊ | 44/50 [02:37<00:21,  3.59s/it]

Example 44: 


Generating Predictions:  90%|█████████ | 45/50 [02:40<00:17,  3.50s/it]

Example 45: 


Generating Predictions:  92%|█████████▏| 46/50 [02:44<00:14,  3.58s/it]

Example 46: 


Generating Predictions:  94%|█████████▍| 47/50 [02:48<00:11,  3.67s/it]

Example 47: 


Generating Predictions:  96%|█████████▌| 48/50 [02:51<00:07,  3.55s/it]

Example 48: 


Generating Predictions:  98%|█████████▊| 49/50 [02:55<00:03,  3.47s/it]

Example 49: 


Generating Predictions: 100%|██████████| 50/50 [02:59<00:00,  3.59s/it]

Example 50: 





In [None]:

for idx in range(len(pubmedqa_test)):
    result = {
        "QUESTION": pubmedqa_test["QUESTION"][idx],
        "CONTEXTS": pubmedqa_test["CONTEXTS"][idx],
        "generated_answer": pubmedqa_predictions[idx]
    }
    results_pubmedqa.append(result)

with open("pubmedqa_predictions.json", "w") as f:
    json.dump(results_pubmedqa, f, indent=4)

In [None]:

!pip install datasets

from datasets import load_dataset

# Load datasets
litqa_train = load_dataset("Anshu132002/litqaTrain", split="train")
litqa_test = load_dataset("Anshu132002/litqaTest", split="test")

pubmedqa_train = load_dataset("Anshu132002/pubmedqaTrain", split="train")
pubmedqa_test = load_dataset("Anshu132002/pubmedqaTest", split="test")

def messages_to_prompt(messages):
    prompt = ''
    for message in messages:
        role = message['role']
        content = message['content']
        if role == 'system':
            prompt += f"System: {content}\n"
        elif role == 'user':
            prompt += f"User: {content}\n"
        elif role == 'assistant':
            prompt += f"Assistant: {content}\n"
    prompt += 'Assistant: '
    return prompt

def preprocess_litqa(examples):
    texts = []
    for question, key_passage, ideal in zip(
        examples["question"],
        examples["key-passage"],
        examples["ideal"],
    ):
        messages = [
            {"role": "system", "content": "You are a helpful AI assistant."},
            {"role": "user", "content": f"Question: {question}\nKey Passage: {key_passage}\nProvide a concise answer."},
            {"role": "assistant", "content": f"{ideal}"}
        ]
        text = messages_to_prompt(messages)
        texts.append(text)
    return {"text": texts}

def preprocess_pubmedqa(examples):
    texts = []
    for question, context, long_answer in zip(
        examples["QUESTION"],
        examples["CONTEXTS"],
        examples["LONG_ANSWER"],
    ):
        messages = [
            {"role": "system", "content": "You are a helpful AI assistant."},
            {"role": "user", "content": f"Question: {question}\nContext: {context}\nProvide a detailed answer."},
            {"role": "assistant", "content": f"{long_answer}"}
        ]
        text = messages_to_prompt(messages)
        texts.append(text)
    return {"text": texts}

litqa_train = litqa_train.map(preprocess_litqa, batched=True)

pubmedqa_train = pubmedqa_train.map(preprocess_pubmedqa, batched=True)

from datasets import concatenate_datasets

train_dataset = concatenate_datasets([litqa_train, pubmedqa_train])

from unsloth import FastLanguageModel
import torch

max_seq_length = 512
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Phi-3.5-mini-instruct",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
        random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=1000,  
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

trainer.train()

from tqdm import tqdm

FastLanguageModel.for_inference(model)

def preprocess_test_dataset(examples, fields):
    texts = []
    for entry in zip(*[examples[field] for field in fields]):
        if 'key-passage' in fields:
            question, key_passage = entry
            messages = [
                {"role": "system", "content": "You are a helpful AI assistant."},
                {"role": "user", "content": f"Question: {question}\nKey Passage: {key_passage}\nProvide a concise answer."},
            ]
        else:
            question, context = entry
            messages = [
                {"role": "system", "content": "You are a helpful AI assistant."},
                {"role": "user", "content": f"Question: {question}\nContext: {context}\nProvide a detailed answer."},
            ]
        text = messages_to_prompt(messages)
        texts.append(text)
    return texts

litqa_test_texts = preprocess_test_dataset(
    litqa_test,
    ["question", "key-passage"]
)
litqa_test = litqa_test.add_column("text", litqa_test_texts)

pubmedqa_test_texts = preprocess_test_dataset(
    pubmedqa_test,
    ["QUESTION", "CONTEXTS"]
)
pubmedqa_test = pubmedqa_test.add_column("text", pubmedqa_test_texts)

def generate_predictions_for_all(model, tokenizer, test_dataset):
    predictions = []
    inputs_list = test_dataset['text']

    for idx, input_text in enumerate(tqdm(inputs_list, desc="Generating Predictions")):
        inputs = tokenizer(
            input_text,
            return_tensors="pt",
            truncation=True,
            max_length=max_seq_length,
        ).to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=150,
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.eos_token_id,
                num_beams=1,
                do_sample=False,
            )

        output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        assistant_idx = output_text.find('Assistant:')
        if assistant_idx != -1:
            assistant_reply = output_text[assistant_idx + len('Assistant:'):].strip()
            assistant_reply = assistant_reply.split('\n')[0].strip()
            predictions.append(assistant_reply)
        else:
            predictions.append(output_text.strip())

        print(f"Example {idx + 1}: {predictions[-1]}")

    return predictions

print("\nGenerating predictions for LitQA Test Dataset...")
litqa_predictions = generate_predictions_for_all(model, tokenizer, litqa_test)

print("\nGenerating predictions for PubMedQA Test Dataset...")
pubmedqa_predictions = generate_predictions_for_all(model, tokenizer, pubmedqa_test)

import json

results_litqa = []

for idx in range(len(litqa_test)):
    result = {
        "question": litqa_test["question"][idx],
        "key-passage": litqa_test["key-passage"][idx],
        "generated_answer": litqa_predictions[idx]
    }
    results_litqa.append(result)

with open("litqa_predictions.json", "w") as f:
    json.dump(results_litqa, f, indent=4)

results_pubmedqa = []

for idx in range(len(pubmedqa_test)):
    result = {
        "QUESTION": pubmedqa_test["QUESTION"][idx],
        "CONTEXTS": pubmedqa_test["CONTEXTS"][idx],
        "generated_answer": pubmedqa_predictions[idx]
    }
    results_pubmedqa.append(result)

with open("pubmedqa_predictions.json", "w") as f:
    json.dump(results_pubmedqa, f, indent=4)


==((====))==  Unsloth 2024.11.9: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu124. CUDA = 7.5. CUDA Toolkit = 12.4.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,095 | Num Epochs = 8
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 1,000
 "-____-"     Number of trainable parameters = 29,884,416


Step,Training Loss
10,1.7984
20,1.5388
30,1.4328
40,1.4418
50,1.4369
60,1.4211
70,1.4637
80,1.3915
90,1.4575
100,1.4529



Generating predictions for LitQA Test Dataset...


Generating Predictions:   2%|▏         | 1/49 [00:10<08:12, 10.27s/it]

Example 1: 1. Glycosyltransferases


Generating Predictions:   4%|▍         | 2/49 [00:20<07:59, 10.20s/it]

Example 2: 5/8 age


Generating Predictions:   6%|▌         | 3/49 [00:31<07:58, 10.40s/it]

Example 3: 1) Does the crystal structure of Csx23-cA4 complex provide evidence for the presence of a Mg(2+)/Mn(2+)/Zn(2+) switch in regulating transcription in Vibrio cholerae?


Generating Predictions:   8%|▊         | 4/49 [00:41<07:55, 10.56s/it]

Example 4: 5


Generating Predictions:  10%|█         | 5/49 [00:52<07:48, 10.64s/it]

Example 5: 5x5 MYE


Generating Predictions:  12%|█▏        | 6/49 [01:02<07:28, 10.43s/it]

Example 6: 371F, D405N, R408S


Generating Predictions:  14%|█▍        | 7/49 [01:13<07:18, 10.45s/it]

Example 7: 5-prime single-stranded DNA


Generating Predictions:  16%|█▋        | 8/49 [01:24<07:13, 10.58s/it]

Example 8: 7) DILT


Generating Predictions:  18%|█▊        | 9/49 [01:34<07:04, 10.62s/it]

Example 9: E1B-hExo1-XTEN-Cas9


Generating Predictions:  20%|██        | 10/49 [01:45<06:56, 10.69s/it]

Example 10: 7, LRR, FISNA, and NEK


Generating Predictions:  22%|██▏       | 11/49 [01:55<06:41, 10.57s/it]

Example 11: β-Amino acids


Generating Predictions:  24%|██▍       | 12/49 [02:05<06:25, 10.43s/it]

Example 12: 100 nm particles


Generating Predictions:  27%|██▋       | 13/49 [02:16<06:17, 10.50s/it]

Example 13: 1) ZDHHC5; 2) GSDMD


Generating Predictions:  29%|██▊       | 14/49 [02:27<06:10, 10.59s/it]

Example 14: ogfrl1


Generating Predictions:  31%|███       | 15/49 [02:38<06:02, 10.67s/it]

Example 15: rasgrf1


Generating Predictions:  33%|███▎      | 16/49 [02:48<05:51, 10.65s/it]

Example 16: ​JUN upstream region


Generating Predictions:  35%|███▍      | 17/49 [02:58<05:32, 10.38s/it]

Example 17: 5-hydroxytryptamine 3 receptor subunits


Generating Predictions:  37%|███▋      | 18/49 [03:09<05:25, 10.50s/it]

Example 18: 50


Generating Predictions:  39%|███▉      | 19/49 [03:20<05:17, 10.57s/it]

Example 19: β-Catenin


Generating Predictions:  41%|████      | 20/49 [03:30<05:08, 10.65s/it]

Example 20: 5LOX1


Generating Predictions:  43%|████▎     | 21/49 [03:41<04:56, 10.60s/it]

Example 21: 48S Intron RNA


Generating Predictions:  45%|████▍     | 22/49 [03:51<04:41, 10.41s/it]

Example 22: 548E


Generating Predictions:  47%|████▋     | 23/49 [04:02<04:34, 10.57s/it]

Example 23: 446S


Generating Predictions:  49%|████▉     | 24/49 [04:13<04:26, 10.65s/it]

Example 24: 346 Gly56 to His56


Generating Predictions:  51%|█████     | 25/49 [04:24<04:16, 10.69s/it]

Example 25: 92H


Generating Predictions:  53%|█████▎    | 26/49 [04:34<04:04, 10.65s/it]

Example 26: 215W


Generating Predictions:  55%|█████▌    | 27/49 [04:44<03:48, 10.37s/it]

Example 27: Assistant:


Generating Predictions:  57%|█████▋    | 28/49 [04:54<03:39, 10.47s/it]

Example 28: 1.


Generating Predictions:  59%|█████▉    | 29/49 [05:05<03:29, 10.49s/it]

Example 29: ​ Pdpn and CD104


Generating Predictions:  61%|██████    | 30/49 [05:16<03:21, 10.59s/it]

Example 30: System: You are a helpful AI assistant.
User: Question: Which of the following proteins has the greatest reduction in association with mutant drosophila SMN D20V protein vs. WT SMN?
Key Passage: Altogether, we performed three different mass spectrometry runs that included a total of 23 samples and identified a total of 894 different Drosophila proteins (Fig. 1D). Among the four WT biological replicates (WT1-4), there were 796 proteins that copurified with Flag-SMN (Fig. 1D), 79% of which (311/393) overlapped with those we identified previously (WT0) using a single replicate [34]. A visual summary of the top hits, as ranked by Log2 fold-change differences between WT and OreR controls, is shown in Fig. 1E. Clearly identified among the highest ranked partner proteins are the core members of the SMN complex (SMN, Gem2, Gem3, Gem4/Glos and Gem5/rig) and its best known RNP assembly clients, the Sm proteins (SmB, SmD1, SmD2, SmD3, SmE, SmF, SNRPG/SmG and Lsm11). Other RNP biogenes

Generating Predictions:  63%|██████▎   | 31/49 [05:27<03:11, 10.65s/it]

Example 31: 312


Generating Predictions:  65%|██████▌   | 32/49 [05:37<02:58, 10.48s/it]

Example 32: 5,6-Trimethylguanosine synthase


Generating Predictions:  67%|██████▋   | 33/49 [05:47<02:46, 10.40s/it]

Example 33: β-BB2


Generating Predictions:  69%|██████▉   | 34/49 [05:58<02:36, 10.46s/it]

Example 34: 40BUD


Generating Predictions:  71%|███████▏  | 35/49 [06:08<02:27, 10.55s/it]

Example 35: None of the training characteristics within the target region were associated with clinical response.


Generating Predictions:  73%|███████▎  | 36/49 [06:19<02:18, 10.64s/it]

Example 36: 50


Generating Predictions:  76%|███████▌  | 37/49 [06:30<02:07, 10.59s/it]

Example 37: 5) All of the above


Generating Predictions:  78%|███████▊  | 38/49 [06:39<01:53, 10.34s/it]

Example 38: 5'nucleotidase


Generating Predictions:  80%|███████▉  | 39/49 [06:50<01:44, 10.44s/it]

Example 39: 2′-ectoinosidophil (2Ei)


Generating Predictions:  82%|████████▏ | 40/49 [07:01<01:34, 10.49s/it]

Example 40: ions


Generating Predictions:  84%|████████▎ | 41/49 [07:11<01:23, 10.49s/it]

Example 41: 622


Generating Predictions:  86%|████████▌ | 42/49 [07:21<01:12, 10.30s/it]

Example 42: 90 mg/kg Fexofenadine


Generating Predictions:  88%|████████▊ | 43/49 [07:31<01:01, 10.22s/it]

Example 43: 4EGI-1


Generating Predictions:  90%|████████▉ | 44/49 [07:42<00:51, 10.30s/it]

Example 44: 5-mC + RNA


Generating Predictions:  92%|█████████▏| 45/49 [07:52<00:41, 10.36s/it]

Example 45: γ-aminobutyric acid B receptor


Generating Predictions:  94%|█████████▍| 46/49 [08:02<00:31, 10.34s/it]

Example 46: 1. Vasculitis or embolism


Generating Predictions:  96%|█████████▌| 47/49 [08:12<00:20, 10.13s/it]

Example 47: 95, 121, 122


Generating Predictions:  98%|█████████▊| 48/49 [08:23<00:10, 10.29s/it]

Example 48: 5 MEF2 family transcription factors


Generating Predictions: 100%|██████████| 49/49 [08:33<00:00, 10.48s/it]


Example 49: 1) acj6 and 2) SoxN

Generating predictions for PubMedQA Test Dataset...


Generating Predictions:   2%|▏         | 1/50 [00:10<08:37, 10.57s/it]

Example 1: ​​The results of this study show that ART patients face higher C-section rates during their stage of delivery.


Generating Predictions:   4%|▍         | 2/50 [00:20<08:12, 10.27s/it]

Example 2: 1) The requirement of a preoperative weight loss program does not appear to be necessary to achieve good outcomes after LAGB; 2) MSWM programs may be effective in promoting physical activity postoperatively.


Generating Predictions:   6%|▌         | 3/50 [00:30<07:57, 10.16s/it]

Example 3: 1) The kidney seems to tolerate 90 minutes warm ischemia better when it is the only kidney. 2) The kidney seems to be more vulnerable to warm ischemia in paired kidneys compared to a solitary kidney. 3) The kidney seems to be more prone to develop a proinflammatory reaction in paired kidneys compared to a solitary kidney.


Generating Predictions:   8%|▊         | 4/50 [00:41<07:53, 10.30s/it]

Example 4: 11 needles may be as effective as 5 needles in the treatment of neck pain.


Generating Predictions:  10%|█         | 5/50 [00:51<07:47, 10.39s/it]

Example 5: 2 years of exposure to fluoridated water reduces the need for restorative dental care. Fluoridation's effect on demand for dental services may be mediated by the quality of fluoridated dental care.


Generating Predictions:  12%|█▏        | 6/50 [01:02<07:37, 10.40s/it]

Example 6: Early pertro-collisional fractures can be treated successfully with intramedullary nailing with similar results as those achieved with a sliding compression hip screw.


Generating Predictions:  14%|█▍        | 7/50 [01:12<07:24, 10.33s/it]

Example 7: System: You are a helpful AI assistant.
User: Question: SPECT study with I-123-Ioflupane (DaTSCAN) in patients with essential tremor. Is there any correlation with Parkinson's disease?
Context: The differential diagnosis between essential tremor (ET) and Parkinson's disease (PD) may be, in some cases, very difficult on clinical grounds alone. In addition, it is accepted that a small percentage of ET patients presenting symptoms and signs of possible PD may progress finally to a typical pattern of parkinsonism. Ioflupane, N-u-fluoropropyl-2a-carbomethoxy-3a-(4-iodophenyl) nortropane, also called FP-CIT, labelled with (123)I (commercially known as DaTSCAN) has been proven to be useful in the differential diagnosis between PD and ET and to confirm dopaminergic degeneration in patients with parkinsonism. The aim of this study is to identify dopaminergic degeneration in patients with PD and distinguish them from others with ET using semi-quantitative SPECT (123)I-Ioflupane (DaTSC

Generating Predictions:  16%|█▌        | 8/50 [01:22<07:09, 10.22s/it]

Example 8: The GFRUP's recommendations for withholding or withdrawing treatments in the PICU are feasible.


Generating Predictions:  18%|█▊        | 9/50 [01:33<07:05, 10.37s/it]

Example 9: 5000 units of sodium heparin nebulized four times daily is not effective for the prevention of VAP in the ICU.


Generating Predictions:  20%|██        | 10/50 [01:43<06:58, 10.45s/it]

Example 10: 1) h-indices are significantly higher among academic radiologists with NIH funding compared to those without, 2) h-indices<10 are significantly less likely to have NIH funding compared to those with h-indices>10, 3) h-indices>10 are not significantly predictive of greater NIH funding.


Generating Predictions:  22%|██▏       | 11/50 [01:54<06:49, 10.51s/it]

Example 11: 1. Minimally invasive techniques using neochords allow safe and effective mitral valve repair without re-repairs or reoperations in a small percentage of patients. 2. Minimally invasive techniques using neochords are reproducible and stable in the medium term.


Generating Predictions:  24%|██▍       | 12/50 [02:04<06:32, 10.34s/it]

Example 12: In this cohort study of middle aged women, BMI change over 14 years was associated with the presence of knee pain at year 15, independently of radiographic knee OA.


Generating Predictions:  26%|██▌       | 13/50 [02:14<06:22, 10.34s/it]

Example 13: Urine cytology performed with SurePath is comparable to classic smears and ThinPrep. For HGUCA, ThinPrep and SurePath are comparable to classic smears, with ThinPrep showing slightly better performance.


Generating Predictions:  28%|██▊       | 14/50 [02:25<06:15, 10.43s/it]

Example 14: 1. Severely injured trauma patients admitted to a level I trauma center had significantly lower mortality than those admitted to a level III trauma center.


Generating Predictions:  30%|███       | 15/50 [02:36<06:09, 10.56s/it]

Example 15: 1. Collateral circulation does not predispose to restenosis after PCI. 2. The predictors of in-stent restenosis identified in this study were stent diameter, stent length,>10% residual stenosis, and smoking history.


Generating Predictions:  32%|███▏      | 16/50 [02:46<05:58, 10.53s/it]

Example 16: ​HSKs for STIs were viewed as potentially acceptable by MSM, with the provision of additional support services identified as an important consideration. Further work is required to confirm the accuracy of the test and to explore the feasibility of implementing HSKs in routine clinical practice.


Generating Predictions:  34%|███▍      | 17/50 [02:56<05:42, 10.38s/it]

Example 17: 1. Early cesarean section did not reduce maternal mortality and did not show any evidence of reducing perinatal mortality or morbidity.


Generating Predictions:  36%|███▌      | 18/50 [03:07<05:32, 10.40s/it]

Example 18: Assistant:


Generating Predictions:  38%|███▊      | 19/50 [03:17<05:26, 10.55s/it]

Example 19: 1) Microdose leuprolide in IVF can be effective in poor responders, 2) Oral contraceptive pill for follicular synchronization prior to leuprolide in IVF may be safe and equally effective as compared with luteal E2 supplementation, 3) However, OCP/ML may be superior to E2+antag/ML in improving IVF outcomes.


Generating Predictions:  40%|████      | 20/50 [03:28<05:19, 10.64s/it]

Example 20: 1) Documentation of opioid dependence and MMT are not mandatory in the medical record. 2) Missing medical records is a major limitation for the study. 3) Potentially interacting medications were frequently prescribed. 4) Medical providers should be aware of the patient's MMT status and potential medication-methadone interactions.


Generating Predictions:  42%|████▏     | 21/50 [03:39<05:08, 10.63s/it]

Example 21: ​​This patient-safety curriculum was effective in changing medical students' attitudes towards patient safety.


Generating Predictions:  44%|████▍     | 22/50 [03:49<04:54, 10.50s/it]

Example 22: The prevalence of general practitioner visits and hospitalization significantly increased in the last decade in the elderly. Men had a higher probability of being hospitalized than women in the years 2001, 2006 and 2009.


Generating Predictions:  46%|████▌     | 23/50 [03:59<04:41, 10.41s/it]

Example 23: 50 Gy radiotherapy did not lead to a statistically significant improvement of median survival time. A high-dose radiotherapy schedule may be equally effective as a conventional schedule in lung cancer palliative treatment.


Generating Predictions:  48%|████▊     | 24/50 [04:10<04:32, 10.48s/it]

Example 24: 5-hour restraint water-immersion stress induces acute edematous pancreatitis in rats by cerulein. Endothelin-1 is involved in the development of edematous pancreatitis. Blockade of endothelin receptors with BQ123 prevents the transformation of edematous pancreatitis into hemorrhagic pancreatitis.


Generating Predictions:  50%|█████     | 25/50 [04:20<04:22, 10.50s/it]

Example 25: 1) Malnutrition is a risk factor for arterial calcification in hemodialysis patients. 2) The expression of BMP2 and MGP is involved in the development of arterial calcification.


Generating Predictions:  52%|█████▏    | 26/50 [04:31<04:12, 10.54s/it]

Example 26: 500 normotensive women with different parities were investigated in this study. Patients with more parities had significantly higher GFR values. Glomerular hyperfiltration due to pregnancy may damage the kidney in women with more parities. However, the cause-effect relationship between pregnancies and renal damage needs to be determined in more studies.


Generating Predictions:  54%|█████▍    | 27/50 [04:41<03:59, 10.40s/it]

Example 27: 47% of patients with SSS showed complete reversal of flow in the unaffected arm at rest. The presence of this finding was associated with a higher blood pressure difference between arms. The finding of complete reversal of flow in the subclavian artery is a good indicator of a significant pressure gradient.


Generating Predictions:  56%|█████▌    | 28/50 [04:51<03:47, 10.36s/it]

Example 28: 78% of students have regular access to the Internet. Most of their use is for entertainment purposes but they would be willing to use it for health information.


Generating Predictions:  58%|█████▊    | 29/50 [05:02<03:39, 10.47s/it]

Example 29: 1) The dismal prognosis of the remaining group indicates that the therapeutic effect of the planned chemotherapy was probably abolished by the time of cystectomy. 2) The unfavourable prognosis of the matched group indicates that eligibility for a chemotherapy protocol is not a good prognostic factor after radical cystectomy for bladder cancer. 3) The results of the matched group suggest that patients with muscle-invasive disease who are not entered into clinical trials may benefit from neoadjuvant chemotherapy.


Generating Predictions:  60%|██████    | 30/50 [05:13<03:30, 10.50s/it]

Example 30: 57% of the elderly patients referred for cancer treatment whose treatment plan was initially unchanged had their treatment plan modified after geriatric assessment.


Generating Predictions:  62%|██████▏   | 31/50 [05:23<03:20, 10.54s/it]

Example 31: 1) The implantation of LAA occlusion devices would cause a slight and measureable change of QRS electric axis in most patients. 2) The changes of QRS electric axis, HR and mean-mean QT interval were statistically significant in the whole group. 3) There was no significant alteration in echocardiographic parameters after LAA occlusion. 4) The LAA diameter and ARD would increase gradually during follow-up.


Generating Predictions:  64%|██████▍   | 32/50 [05:33<03:04, 10.24s/it]

Example 32: 1. Ki-67 is not a predictive marker for response to neoadjuvant chemotherapy in breast cancer patients. 2. Ki-67 has a limited prognostic value in breast cancer subtypes.


Generating Predictions:  66%|██████▌   | 33/50 [05:43<02:55, 10.29s/it]

Example 33: Metoclopramide given by a 2 min intravenous bolus is as effective as a 15 min intravenous infusion in relieving headache symptoms. However, akathisia is more common when metoclopramide is given by intravenous bolus.


Generating Predictions:  68%|██████▊   | 34/50 [05:54<02:46, 10.42s/it]

Example 34: 1. Breast-feeding is associated with lower rates of illness in children with a family history of atopy.


Generating Predictions:  70%|███████   | 35/50 [06:05<02:36, 10.45s/it]

Example 35: 106Ru plaque beta-radiation is a feasible and effective treatment of posterior uveal melanomas. Enucleation is reserved for rapidly growing tumors, and an initial 100 Gy apical dose is sufficient for most tumors.


Generating Predictions:  72%|███████▏  | 36/50 [06:15<02:27, 10.51s/it]

Example 36: 1. The dimensions of compensatory hypertrophy of the middle turbinate are not significantly changed by septoplasty. 2. There is a significant correlation between the angle of the nasal septum and the dimensions of the middle turbinate after septoplasty.


Generating Predictions:  74%|███████▍  | 37/50 [06:25<02:12, 10.22s/it]

Example 37: 20-minute patient coaching can reduce racial/ethnic disparities in cancer pain control.


Generating Predictions:  76%|███████▌  | 38/50 [06:35<02:03, 10.28s/it]

Example 38: Radical prostatectomy offers more favorable outcomes for patients with prostate cancer who have had prior benign prostate biopsy.


Generating Predictions:  78%|███████▊  | 39/50 [06:46<01:53, 10.33s/it]

Example 39: 1) There is no pressure gradient between the hepatic and central venous circulations, regardless of patient positioning. 2) There is no evidence to support the use of Trendelenberg position as a prophylactic measure for venous air embolism during hepatectomy.


Generating Predictions:  80%|████████  | 40/50 [06:56<01:43, 10.40s/it]

Example 40: 1. There is no evidence that Indigenous Australians age prematurely. 2. Life expectancy at premature ages is similar for Indigenous and non-Indigenous people. 3. The Government's aged care policy is not supported by the data.


Generating Predictions:  82%|████████▏ | 41/50 [07:07<01:34, 10.48s/it]

Example 41: 36.0 % of the enrolled TB patients were non-adherent to treatment regimens. The high loss to follow-up was identified as a major cause of non-adherence. Direct observation was not a significant factor in improving treatment adherence.


Generating Predictions:  84%|████████▍ | 42/50 [07:16<01:21, 10.23s/it]

Example 42: 1. Ultrasound examination of the penis is useful in demonstrating the presence or absence of invasion into the corpora.


Generating Predictions:  86%|████████▌ | 43/50 [07:27<01:12, 10.29s/it]

Example 43: Oblique views do not improve the diagnostic accuracy of radiography for the detection of spondylolysis in children. The additional effective radiation dose and costs associated with obtaining oblique views are not justified.


Generating Predictions:  88%|████████▊ | 44/50 [07:38<01:02, 10.41s/it]

Example 44: 1.3% of cancers might be detected earlier if the patient had undergone regular screening. Therefore, all patients with colorectal cancer should be invited for repeat colonoscopy.


Generating Predictions:  90%|█████████ | 45/50 [07:48<00:52, 10.44s/it]

Example 45: The present study demonstrates that desflurane does not alter LV function during controlled hypertension for aortic surgery.


Generating Predictions:  92%|█████████▏| 46/50 [07:59<00:42, 10.51s/it]

Example 46: 1. The difference in median birthweight at 34 weeks reported for Aborigines and white women in the QPDC is robust to misclassification of gestational age. 2. Differences in distribution of birthweight at preterm are unlikely to explain the difference in median birthweight at 34 weeks. 3. The observed difference in median birthweight at 34 weeks between Aborigines and white women is unlikely to be due to differential misclassification of gestational age.


Generating Predictions:  94%|█████████▍| 47/50 [08:09<00:30, 10.29s/it]

Example 47: 1. Ultrasonography is not useful to determine the presence or absence of an undescended testis. 2. Ultrasonography is not reliable to determine the location of the testis in boys with an undescended testis. 3. In cases of testicular micophenomenon, ultrasonography is not reliable to determine the pathologic nature of the testis. 4. In cases of an unresolved testicular malposition, ultrasonography is not reliable to determine the pathologic nature of the abdominal structures.


Generating Predictions:  96%|█████████▌| 48/50 [08:19<00:20, 10.39s/it]

Example 48: ​​AUC0-24 and Cmin show a direct correlation with vancomycin efficacy and safety, respectively. AUC0-24 may be a more representative index than peak concentration for TDM of many drugs with a linear pharmacokinetic profile.


Generating Predictions:  98%|█████████▊| 49/50 [08:30<00:10, 10.44s/it]

Example 49: 2-year follow-up revealed that PRP gel can reduce postoperative pain but has no effect on swelling, interincisal opening, and socket healing.


Generating Predictions: 100%|██████████| 50/50 [08:41<00:00, 10.42s/it]

Example 50: 57% of paediatric medicines have an endogenous pH<5.5, and the majority are SF formulations. This is probably because SF formulations are more prone to acidic constituents, such as preservatives and flavouring agents, being incorporated into the formulation.



