In [8]:
import pandas as pd
df_sample = pd.read_csv("sample_summary.csv")
# Display the first few rows
print(df_sample.head())

# Check DataFrame info
print(df_sample.info())

                                               input  \
0  Good afternoon, champ, how you holding up? Goo...   
1  What brings you in here today? Hi, I'm um, I'm...   
2  Do you have any known allergies to medications...   
3  How may I help you today? Yeah I've had, a fev...   
4  It sounds like that you're experiencing some c...   

                                              output  
0  Subjective:\n- Symptoms: Lower back pain, radi...  
1  Subjective:\n- Presenting with dry cough for 1...  
2  Subjective:\n- No known allergies to medicatio...  
3  Subjective:\n- Fever and dry cough started 4 d...  
4  Subjective:\n- Presenting with chest pain for ...  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   input   100 non-null    object
 1   output  100 non-null    object
dtypes: object(2)
memory usage: 1.7+ KB
None


In [2]:
!pip install -q transformers huggingface_hub
!pip install -q --upgrade accelerate
!pip install -q -U bitsandbytes

In [9]:
from huggingface_hub import login

# Use your Hugging Face token
login("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")

In [10]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from tqdm import tqdm
import pandas as pd

# Step 1: Load the BioBART model and tokenizer
# Replace 'GanjinZero/biobart-large' with the desired BioBART model version
tokenizer = AutoTokenizer.from_pretrained("GanjinZero/biobart-large")
model = AutoModelForSeq2SeqLM.from_pretrained("GanjinZero/biobart-large")

In [25]:
import time
import torch
import pandas as pd
from tqdm import tqdm
import numpy as np

# -----------------------------------------------------
# 1. Load only first 2 rows
# -----------------------------------------------------
df_sample = df_sample.iloc[:2].copy()
inputs_list = df_sample["input"].tolist()
batch_size = 1  # Keep it low for debugging and simplicity

# -----------------------------------------------------
# 2. Simplified Prompt Construction (no instruction)
# -----------------------------------------------------
def construct_prompt(input_text):
    return input_text.strip()

# -----------------------------------------------------
# 3. Generation Parameters
# -----------------------------------------------------
generation_params = {
    "do_sample": True,
    "top_p": 0.8,
    "temperature": 0.1,
    "top_k": 100,
    "max_new_tokens": 200,
    "repetition_penalty": 1.1,
    "eos_token_id": tokenizer.eos_token_id
}

# -----------------------------------------------------
# 4. Inference & Efficiency Tracking
# -----------------------------------------------------
generated_summaries = []
latencies = []
throughputs = []

def process_batch(batch_inputs):
    batch_generated = []
    total_input_tokens = 0

    for text in batch_inputs:
        prompt = construct_prompt(text)
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1000)
        inputs = {k: v.to(model.device) for k, v in inputs.items()}
        prompt_length = inputs["input_ids"].shape[1]
        total_input_tokens += prompt_length

        start_time = time.time()
        summary_ids = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            **generation_params
        )
        end_time = time.time()

        # Optional Debug: Print full decoded output
        full_output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        print("Full Output:", full_output)

        # Extract new tokens (skip prompt)
        generated_tokens = summary_ids[0, prompt_length:]
        generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
        print("Generated Summary:", generated_text)  # Debug print

        batch_generated.append(generated_text)

        # Metrics
        latency = end_time - start_time
        latencies.append(latency)
        total_tokens = prompt_length + generated_tokens.shape[0]
        throughputs.append(total_tokens / latency)

    return batch_generated

# -----------------------------------------------------
# 5. Process Batches
# -----------------------------------------------------
with tqdm(total=len(inputs_list), desc="Generating Summaries", unit="row") as pbar:
    for i in range(0, len(inputs_list), batch_size):
        batch = inputs_list[i:i + batch_size]
        try:
            batch_generated = process_batch(batch)
        except RuntimeError as e:
            if "out of memory" in str(e):
                torch.cuda.empty_cache()
                print("Out of memory error; try reducing batch size.")
            raise e
        generated_summaries.extend(batch_generated)
        torch.cuda.empty_cache()
        pbar.update(len(batch))

# -----------------------------------------------------
# 6. Save Results
# -----------------------------------------------------
df_sample["generated_summary"] = generated_summaries
df_sample.to_csv("Biobart_soap_generated_summaries.csv", index=False)
print("Summaries saved to 'Biobart_soap_generated_summaries.csv'")

# -----------------------------------------------------
# 7. Print Efficiency Metrics
# -----------------------------------------------------
avg_latency = np.mean(latencies)
std_latency = np.std(latencies)
avg_throughput = np.mean(throughputs)
std_throughput = np.std(throughputs)

print("\n🔹 Efficiency Metrics")
print(f"📌 Average Latency: {avg_latency:.4f} sec (±{std_latency:.4f})")
print(f"📌 Average Throughput: {avg_throughput:.2f} tokens/sec (±{std_throughput:.2f})")


Generating Summaries:  50%|█████     | 1/2 [00:14<00:14, 14.81s/row]

Full Output: Good afternoon, champ, how you holding up? Good afternoon, Doctor, I have a lot of lower back pain. Oh no, before we begin, how old are you, sir and which hand do you write with? I'm seventy five now. Right. Great, so tell me, how long have you had this lower back Pain? It's been about ten days now. Have your symptoms improved at all since they began? No, they keep getting worse. Does the pain radiate into your legs? Yes, it started radiating down my right leg three days after the lower backpain began, and then the left leg three day after the right. The next day I could barely walk, the pain was so severe. Do you remember the initial date of the beginning of your low back pain? Um, it was on December third nineteen ninety five. Have you seen another doctor for this pain? Yes; I saw my local physician, um, it were on December eleventh,
Generated Summary: 


Generating Summaries: 100%|██████████| 2/2 [00:32<00:00, 16.15s/row]

Full Output: What brings you in here today? Hi, I'm um, I're just, so I am here because I have this cough that I've had for the past um, week and a half, and I'm just uh, I usually don't come in with symptoms this minor, but because it's, because we're in the middle of a pandemic, I thought I should come in and get checked out. Okay, yeah, so it's been going on for like more than a week. Um, can you describe to me what kind of cough you're having? Is it like a wet cough or a dry cough? It's a dry coughing. Okay), and has it been getting worse over the, over the time period you described? Um, it's kind of the same. It's the same, and has you noticed, although it's like dry, have you brought up any kind of sputum or phlegm at all? Um no, not, not
Generated Summary: 
Summaries saved to 'Biobart_soap_generated_summaries.csv'

🔹 Efficiency Metrics
📌 Average Latency: 16.1475 sec (±1.3411)
📌 Average Throughput: 42.50 tokens/sec (±14.68)





In [26]:
df_sample.head(2)

Unnamed: 0,input,output,generated_summary
0,"Good afternoon, champ, how you holding up? Goo...","Subjective:\n- Symptoms: Lower back pain, radi...",
1,"What brings you in here today? Hi, I'm um, I'm...",Subjective:\n- Presenting with dry cough for 1...,
