In [1]:
!pip install -q transformers huggingface_hub
!pip install -q --upgrade accelerate
!pip install -q -U bitsandbytes

In [2]:
import pandas as pd

# Define the bucket and file names
bucket_name = 'mimicivliza'  # Replace with your bucket name
mimic_iv_bhc = f's3://{bucket_name}/sample_data_100.csv'

# Load the files
mimic_iv_bhc_100 = pd.read_csv(mimic_iv_bhc)

# Display the data
#mimic_iv_bhc_100.head(5)

In [3]:
print("DataFrame shape:", mimic_iv_bhc_100.shape)
print("\nColumns:", mimic_iv_bhc_100.columns.tolist())

DataFrame shape: (100, 5)

Columns: ['note_id', 'input', 'target', 'input_tokens', 'target_tokens']


In [5]:
pd.set_option('display.max_colwidth', None)  # Prevent truncation of long text
pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.expand_frame_repr', False)  # Prevent wrapping of content

#print(mimic_iv_bhc_100.iloc[9])  # Replace 9 with the desired row index
  # Remember: Index starts from 0

In [6]:
import torch

if torch.cuda.is_available():
    print(f"GPU is available: {torch.cuda.get_device_name(0)}")
else:
    print("No GPU available")


GPU is available: Tesla V100-SXM2-16GB


In [7]:
from huggingface_hub import login

# Use your Hugging Face token
login("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")

In [8]:
import torch
from tqdm import tqdm
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import pandas as pd
import os

### Configuring 8-Bit Quantization

In [9]:
# Set environment variable for better memory management
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Configure quantization (8-bit)
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True
)
print("Environment setup and quantization configuration done.")


Environment setup and quantization configuration done.


In [10]:
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer

print("Loading model and tokenizer...")
with tqdm(total=2, desc="Initializing Model and Tokenizer", unit="step") as pbar:
    model_name = "meta-llama/Llama-3.2-1B-Instruct"
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=quantization_config,
        device_map="auto",
        output_attentions=True,  # Enable attention outputs for AGTD
        return_dict_in_generate=True  # Ensures attention outputs are generated
    )
    pbar.update(1)

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.padding_side = 'left'
    tokenizer.pad_token_id = tokenizer.eos_token_id
    pbar.update(1)
print("Model and tokenizer loaded successfully.")


Loading model and tokenizer...


Initializing Model and Tokenizer:   0%|          | 0/2 [00:00<?, ?step/s]

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

Initializing Model and Tokenizer:  50%|█████     | 1/2 [01:12<01:12, 72.02s/step]

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Initializing Model and Tokenizer: 100%|██████████| 2/2 [01:13<00:00, 36.81s/step]

Model and tokenizer loaded successfully.





In [1]:
from transformers import pipeline

# Initialize the summarization pipeline
summarizer = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
)
print("Summarization pipeline initialized.")


NameError: name 'model' is not defined

In [16]:
# Combine few-shot examples into a prompt template


def construct_few_shot_prompt(row_input):
    """
    Constructs a few-shot prompt dynamically using the row input.

    Args:
    - row_input (str): The input text from the dataframe row.

    Returns:
    - str: The constructed prompt for the model.
    """
    prompt = "You are a medical expert. Please summarize the following input concisely:\n\n"
    for example in few_shot_examples:
        prompt += f"Input: {example['input']}\nTarget: {example['target']}\n\n"
    prompt += f"Input: {row_input}\nSummary:"
    return prompt


In [17]:
import torch
from typing import List, Tuple, Optional
from transformers import PreTrainedModel, PreTrainedTokenizer

class AGTDSummarizer:
    def __init__(self, model: PreTrainedModel, tokenizer: PreTrainedTokenizer):
        self.model = model
        self.tokenizer = tokenizer
        self.device = model.device

    def calculate_importance(
        self, 
        tokens: torch.Tensor, 
        alpha: float = 0.5
    ) -> torch.Tensor:
        """
        Calculate token importance scores using attention weights and positional bias.
        
        Args:
            tokens: Input token ids.
            alpha: Weight factor for layer importance.
            
        Returns:
            Tensor of importance scores for each token.
        """
        try:
            with torch.no_grad():
                outputs = self.model(tokens, output_attentions=True)
                attentions = outputs.attentions
                
                importance_scores = torch.zeros(tokens.size(-1), device=tokens.device)
                num_layers = len(attentions)
                
                # Generate positional weights (linear decay from 1.0 to 0.5)
                position_weights = torch.linspace(1.0, 0.5, steps=tokens.size(-1), device=tokens.device)

                for l, layer_attention in enumerate(attentions):
                    # Calculate layer weight with position-based scaling
                    layer_weight = alpha + (1 - alpha) * (l + 1) / num_layers
                    
                    # Average attention across heads and batches
                    avg_attention = layer_attention.mean(dim=1).squeeze()
                    token_importance = avg_attention.mean(dim=-1)
                    
                    # Add positional weighting to importance scores
                    importance_scores += layer_weight * token_importance * position_weights
                
                return importance_scores
                
        except Exception as e:
            print(f"Error calculating importance scores: {str(e)}")
            raise

    def summarize(
        self,
        text: str,
        retention_ratio: float = 0.7,
        alpha: float = 0.5,
        max_length: int = 2048
    ) -> Tuple[str, List[float]]:
        """
        Perform Attention-Guided Token Dropping to summarize text.
        
        Args:
            text: Input text to summarize.
            retention_ratio: Fraction of tokens to retain.
            alpha: Weight factor for layer importance.
            max_length: Maximum input length.
            
        Returns:
            Tuple of (summarized text, importance scores).
        """
        try:
            # Tokenize input text
            tokens = self.tokenizer(
                text,
                return_tensors="pt",
                truncation=True,
                max_length=max_length
            ).to(self.device)

            # Calculate number of tokens to keep
            n = tokens.input_ids.size(-1)
            k = int(n * retention_ratio)

            # Get importance scores and top k indices
            importance_scores = self.calculate_importance(tokens.input_ids, alpha)
            _, indices = torch.sort(importance_scores, descending=True)
            keep_indices = sorted(indices[:k].tolist())

            # Create reduced token sequence
            reduced_tokens = tokens.input_ids[0][keep_indices]
            
            # Decode back to text
            reduced_text = self.tokenizer.decode(reduced_tokens)

            return reduced_text, importance_scores.tolist()

        except Exception as e:
            print(f"Error in summarization process: {str(e)}")
            raise

def test_summarizer(
    model: PreTrainedModel,
    tokenizer: PreTrainedTokenizer,
    test_text: Optional[str] = None
):
    """Test the AGTD summarizer with sample text."""
    
    if test_text is None:
        test_text = "<SEX> F <SERVICE> SURGERY <CHIEF COMPLAINT> abdominal pain"
    
    summarizer = AGTDSummarizer(model, tokenizer)
    
    try:
        reduced_text, importance_scores = summarizer.summarize(
            test_text,
            retention_ratio=0.7
        )
        
        print("Original Text:\n", test_text)
        print("\nReduced Text:\n", reduced_text)
        print("\nToken Importance Scores:", importance_scores[:10])
        
    except Exception as e:
        print(f"Error testing summarizer: {str(e)}")


In [18]:
import pandas as pd
from tqdm import tqdm

def process_dataset_with_agtd(
    df: pd.DataFrame,
    summarizer: AGTDSummarizer,
    input_column: str = "input",
    retention_ratio: float = 0.7,
    alpha: float = 0.5,
    max_length: int = 2048
) -> pd.DataFrame:
    """
    Apply AGTD to a dataset and add reduced text and importance scores as new columns.
    
    Args:
        df: Input dataframe with an `input` column containing text data.
        summarizer: An instance of the AGTDSummarizer class.
        input_column: Column name in the dataframe containing the input text.
        retention_ratio: Fraction of tokens to retain during summarization.
        alpha: Weight factor for layer importance.
        max_length: Maximum input length for tokenization.
    
    Returns:
        Updated dataframe with new columns: `reduced_text` and `importance_scores`.
    """
    reduced_texts = []
    importance_scores_list = []

    print("Processing dataset with AGTD...")
    with tqdm(total=len(df), desc="Processing Dataset", unit="row") as pbar:
        for _, row in df.iterrows():
            text = row[input_column]
            try:
                # Summarize using AGTDSummarizer
                reduced_text, importance_scores = summarizer.summarize(
                    text,
                    retention_ratio=retention_ratio,
                    alpha=alpha,
                    max_length=max_length
                )
                reduced_texts.append(reduced_text)
                importance_scores_list.append(importance_scores)
            except Exception as e:
                print(f"Error processing row: {text[:50]}... Error: {e}")
                reduced_texts.append("")
                importance_scores_list.append([])
            finally:
                pbar.update(1)

    # Add results as new columns to the dataframe
    df["reduced_text"] = reduced_texts
    df["importance_scores"] = importance_scores_list

    return df


In [19]:
# Process the dataset to reduce text dynamically
processed_df = process_dataset_with_agtd(
    df=mimic_iv_bhc_100,
    summarizer=AGTDSummarizer(model, tokenizer),
    input_column="input",  # The column containing input text
    retention_ratio=0.7,  # Retain 80% of the most important tokens
    alpha=0.5,            # Importance weighting factor
    max_length=2048       # Max tokenization length
)

# View one reduced text
print(processed_df["reduced_text"].iloc[0])  # Replace 0 with the desired row index
# Save the processed DataFrame to a CSV file
processed_df.to_csv("processed_reduced_texts.csv", index=False)

print("Processed DataFrame saved to 'processed_reduced_texts.csv'.")



Processing dataset with AGTD...


Processing Dataset: 100%|██████████| 100/100 [00:25<00:00,  3.98row/s]


<|begin_of_text|><SEX> F <SERVICE> SURGERY <ALLERGIES> Iodine / Thallium-201 / Blue Dye / Iodine-Iodine Containing <ATTENDING> ___. <CHIEF COMPLAINT> Morbid obesity, BMI of 51 <MAJOR SURGICAL OR INVASIVE PROCEDURE> lap gastric bypass <HISTORY OF PRESENT ILLNESS> The patient is a ___ woman with history of obesity, multiple medical problems with a history of 7 pound weight loss and regain. Comorbid conditions include sleep apnea, hypothyroidism, back pain, iron deficiency anemia and headaches. The patient has significant allergies particularly to the blue dye and iodine. The patient was evaluated at ___ ___ ___ Program deemed a good candidate for surgical weight loss. She understands the risks, benefits and alternatives of weight loss surgery. She agrees to diet, exercise, support group and lifelong medical follow-up particularly for B12, calcium and folate levels. <PAST MEDICAL HISTORY> Past medical history includes sleep apnea, hypothyroidism, back pain, urticaria for which she is on c

In [20]:
# Save the DataFrame to a CSV file
Reduced_Text = "processed_reduced_texts.csv"
processed_df.to_csv(Reduced_Text, index=False)

print(f"Reduced text have been saved to '{Reduced_Text}'")


Reduced text have been saved to 'processed_reduced_texts.csv'


In [22]:
#processed_df.head(2)

In [24]:
from tqdm import tqdm
import time

# Generation parameters
generation_params = {
    "do_sample": True,
    "top_p": 0.8,
    "temperature": 0.1,
    "top_k": 40,
    "max_new_tokens": 300,
    "repetition_penalty": 1.1
}

batch_size = 8  # Adjust based on available memory

# Metrics tracking
total_input_tokens = 0
total_output_tokens = 0
total_time_spent = 0
ttft_list = []  # Time to first token
latency_list = []  # Time per summary
throughput_list = []  # Tokens processed per second

# Batch processing
print("Generating summaries in batches...")
generated_summaries = []

for i in tqdm(range(0, len(processed_df), batch_size), desc="Processing Batches"):
    batch = processed_df["reduced_text"][i:i + batch_size].tolist()

    try:
        # Construct prompts for the batch
        prompts = [f"You are a medical expert. {text}" for text in batch]
        batch_input_tokens = sum(len(tokenizer.encode(prompt)) for prompt in prompts)

        # Generate summaries and measure TTFT and latency
        batch_start_time = time.time()
        summaries = []
        for prompt in prompts:
            single_start_time = time.time()
            output = summarizer(prompt, **generation_params)
            single_end_time = time.time()
            ttft_list.append(single_end_time - single_start_time)  # Time to first token
            summaries.append(output[0]["generated_text"])  # Extract generated text
        batch_end_time = time.time()

        # Calculate batch metrics
        batch_output_tokens = sum(len(tokenizer.encode(summary)) for summary in summaries)
        batch_latency = batch_end_time - batch_start_time  # Total time for the batch
        latency_list.append(batch_latency / len(batch))  # Average latency per summary
        throughput_list.append((batch_input_tokens + batch_output_tokens) / batch_latency)

        # Update global metrics
        total_input_tokens += batch_input_tokens
        total_output_tokens += batch_output_tokens
        total_time_spent += batch_latency

        # Store summaries
        generated_summaries.extend(summaries)

    except Exception as e:
        print(f"Error generating summaries for batch starting at index {i}: {e}")
        generated_summaries.extend([""] * len(batch))  # Fill with empty summaries in case of failure

# Add the summaries to the DataFrame
processed_df["generated_summary"] = generated_summaries

# Metrics Calculation
average_latency = sum(latency_list) / len(latency_list) if latency_list else 0
average_ttft = sum(ttft_list) / len(ttft_list) if ttft_list else 0
average_throughput = sum(throughput_list) / len(throughput_list) if throughput_list else 0
token_efficiency = total_output_tokens / total_input_tokens if total_input_tokens else 0

# Print metrics
print("\nComputational Efficiency Metrics:")
print(f"Total Input Tokens: {total_input_tokens}")
print(f"Total Output Tokens: {total_output_tokens}")
print(f"Total Time Spent: {total_time_spent:.2f} seconds")
print(f"Average Latency (Time per Summary): {average_latency:.4f} seconds")
print(f"Average TTFT (Time to First Token): {average_ttft:.4f} seconds")
print(f"Average Throughput: {average_throughput:.2f} tokens/second")
print(f"Token Efficiency (TE): {token_efficiency:.4f}")

# Save results
processed_df.to_csv("generated_summaries_with_metrics.csv", index=False)
print("\nSummaries saved to 'generated_summaries_with_metrics.csv'")


Generating summaries in batches...


Processing Batches: 100%|██████████| 13/13 [33:13<00:00, 153.37s/it]



Computational Efficiency Metrics:
Total Input Tokens: 129585
Total Output Tokens: 158137
Total Time Spent: 1993.11 seconds
Average Latency (Time per Summary): 19.9942 seconds
Average TTFT (Time to First Token): 19.9311 seconds
Average Throughput: 145.01 tokens/second
Token Efficiency (TE): 1.2203

Summaries saved to 'generated_summaries_with_metrics.csv'


In [23]:
from tqdm import tqdm
import time

# Few-shot examples
few_shot_examples = [
    {
        "input": "<SEX> F <SERVICE> ONCOLOGY <CHIEF COMPLAINT> worsening back pain <HISTORY OF PRESENT ILLNESS> The patient is a 45-year-old female with a history of metastatic breast cancer presenting with worsening back pain over the last two weeks. Imaging revealed compression fractures in the thoracic spine.",
        "target": "The patient was admitted to oncology for worsening back pain. Imaging revealed metastatic cancer with thoracic spine involvement. She was started on pain management and referred for palliative radiation therapy."
    },
    {
        "input": "<SEX> M <SERVICE> CARDIOLOGY <CHIEF COMPLAINT> chest pain <HISTORY OF PRESENT ILLNESS> A 55-year-old male presented with chest pain radiating to the left arm and jaw. Initial ECG showed ST-segment elevation in the inferior leads. Troponin levels were elevated.",
        "target": "The patient presented to cardiology with chest pain consistent with acute myocardial infarction. He was taken emergently to the cath lab for primary PCI, and a stent was placed in the right coronary artery."
    }
]

# Generation parameters
generation_params = {
    "do_sample": True,
    "top_p": 0.8,
    "temperature": 0.1,
    "top_k": 40,
    "max_new_tokens": 300,
    "repetition_penalty": 1.1
}

batch_size = 8  # Adjust based on available memory

# Define parse_summary_output
def parse_summary_output(output):
    if isinstance(output, dict) and "generated_text" in output:
        return output["generated_text"].split("Summary:")[-1].strip()
    elif isinstance(output, str):
        return output.split("Summary:")[-1].strip()
    else:
        return ""

# Few-shot prompt constructor
def construct_few_shot_prompt(input_text):
    prompt = "You are a medical expert. Please summarize the following input concisely:\n\n"
    for example in few_shot_examples:
        prompt += f"Input: {example['input']}\nTarget: {example['target']}\n\n"
    prompt += f"Input: {input_text}\nSummary:"
    return prompt

# Metrics tracking
total_input_tokens = 0
total_output_tokens = 0
total_time_spent = 0
ttft_list = []  # Time to first token
latency_list = []  # Time per summary
throughput_list = []  # Tokens processed per second

# Batch processing
print("Generating summaries in batches...")
generated_summaries = []

for i in tqdm(range(0, len(processed_df), batch_size), desc="Processing Batches"):
    batch = processed_df["reduced_text"][i:i + batch_size].tolist()

    try:
        # Construct prompts for the batch
        prompts = [construct_few_shot_prompt(text) for text in batch]
        batch_input_tokens = sum(len(tokenizer.encode(prompt)) for prompt in prompts)

        # Generate summaries and measure TTFT and latency
        batch_start_time = time.time()
        summaries = []
        for prompt in prompts:
            single_start_time = time.time()
            output = summarizer(prompt, **generation_params)
            single_end_time = time.time()
            ttft_list.append(single_end_time - single_start_time)  # Time to first token
            summaries.append(parse_summary_output(output[0]))
        batch_end_time = time.time()

        # Calculate batch metrics
        batch_output_tokens = sum(len(tokenizer.encode(summary)) for summary in summaries)
        batch_latency = batch_end_time - batch_start_time  # Total time for the batch
        latency_list.append(batch_latency / len(batch))  # Average latency per summary
        throughput_list.append((batch_input_tokens + batch_output_tokens) / batch_latency)

        # Update global metrics
        total_input_tokens += batch_input_tokens
        total_output_tokens += batch_output_tokens
        total_time_spent += batch_latency

        # Store summaries
        generated_summaries.extend(summaries)

    except Exception as e:
        print(f"Error generating summaries for batch starting at index {i}: {e}")
        generated_summaries.extend([""] * len(batch))  # Fill with empty summaries in case of failure

# Add the summaries to the DataFrame
processed_df["generated_summary"] = generated_summaries

# Metrics Calculation
average_latency = sum(latency_list) / len(latency_list) if latency_list else 0
average_ttft = sum(ttft_list) / len(ttft_list) if ttft_list else 0
average_throughput = sum(throughput_list) / len(throughput_list) if throughput_list else 0
token_efficiency = total_output_tokens / total_input_tokens if total_input_tokens else 0

# Print metrics
print("\nComputational Efficiency Metrics:")
print(f"Total Input Tokens: {total_input_tokens}")
print(f"Total Output Tokens: {total_output_tokens}")
print(f"Total Time Spent: {total_time_spent:.2f} seconds")
print(f"Average Latency (Time per Summary): {average_latency:.4f} seconds")
print(f"Average TTFT (Time to First Token): {average_ttft:.4f} seconds")
print(f"Average Throughput: {average_throughput:.2f} tokens/second")
print(f"Token Efficiency (TE): {token_efficiency:.4f}")

# Save results
processed_df.to_csv("generated_summaries_with_metrics.csv", index=False)
print("\nSummaries saved to 'generated_summaries_with_metrics.csv'")


Generating summaries in batches...


Processing Batches:   8%|▊         | 1/13 [02:19<27:51, 139.27s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Processing Batches: 100%|██████████| 13/13 [28:00<00:00, 129.26s/it]



Computational Efficiency Metrics:
Total Input Tokens: 153764
Total Output Tokens: 24674
Total Time Spent: 1679.91 seconds
Average Latency (Time per Summary): 16.9060 seconds
Average TTFT (Time to First Token): 16.7990 seconds
Average Throughput: 106.56 tokens/second
Token Efficiency (TE): 0.1605

Summaries saved to 'generated_summaries_with_metrics.csv'


In [17]:
from tqdm import tqdm

few_shot_examples = [
    {
        "input": "<SEX> F <SERVICE> ONCOLOGY <CHIEF COMPLAINT> worsening back pain <HISTORY OF PRESENT ILLNESS> The patient is a 45-year-old female with a history of metastatic breast cancer presenting with worsening back pain over the last two weeks. Imaging revealed compression fractures in the thoracic spine.",
        "target": "The patient was admitted to oncology for worsening back pain. Imaging revealed metastatic cancer with thoracic spine involvement. She was started on pain management and referred for palliative radiation therapy."
    },
    {
        "input": "<SEX> M <SERVICE> CARDIOLOGY <CHIEF COMPLAINT> chest pain <HISTORY OF PRESENT ILLNESS> A 55-year-old male presented with chest pain radiating to the left arm and jaw. Initial ECG showed ST-segment elevation in the inferior leads. Troponin levels were elevated.",
        "target": "The patient presented to cardiology with chest pain consistent with acute myocardial infarction. He was taken emergently to the cath lab for primary PCI, and a stent was placed in the right coronary artery."
    }
]

generation_params = {
    "do_sample": True,
    "top_p": 0.8,
    "temperature": 0.1,
    "top_k": 40,
    "max_new_tokens": 300,
    "repetition_penalty": 1.1
}

batch_size = 8  # Adjust based on available memory

# Define parse_summary_output
def parse_summary_output(output):
    if isinstance(output, dict) and "generated_text" in output:
        return output["generated_text"].split("Summary:")[-1].strip()
    elif isinstance(output, str):
        return output.split("Summary:")[-1].strip()
    else:
        return ""

# Batch processing
print("Generating summaries in batches...")
generated_summaries = []
for i in tqdm(range(0, len(processed_df), batch_size), desc="Processing Batches"):
    batch = processed_df["reduced_text"][i:i + batch_size].tolist()

    try:
        # Construct prompts for the batch
        prompts = [construct_few_shot_prompt(text) for text in batch]

        # Generate summaries for the batch
        summaries = summarizer(prompts, **generation_params)

        # Parse and store summaries
        for summary in summaries:
            generated_summaries.append(parse_summary_output(summary[0]))
    except Exception as e:
        print(f"Error generating summaries for batch starting at index {i}: {e}")
        generated_summaries.extend([""] * len(batch))  # Fill with empty summaries in case of failure

# Add the summaries to the DataFrame
processed_df["generated_summary"] = generated_summaries

# Inspect the summaries
#print("\nSample of generated sammaries:")
#print(processed_df[["reduced_text", "generated_summary"]].head())


Generating summaries in batches...


Processing Batches:  77%|███████▋  | 10/13 [22:17<07:02, 140.84s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Processing Batches: 100%|██████████| 13/13 [28:38<00:00, 132.19s/it]


In [18]:
processed_df['generated_summary'].iloc[6]

"The patient presents with worsening back pain, shortness of breath, and abdominal pain due to ovarian cancer. The imaging studies reveal metastatic disease to the thoracic spine and abdomen, with ascites and pleural effusions. The patient underwent paracentesis and thoracentesis, which yielded malignant cells consistent with adenocarcinoma. The patient is being considered for chemotherapy before undergoing surgical intervention. The patient's symptoms suggest a possible diagnosis of ovarian cancer, particularly given the presence of malignant cells in the ascitic fluid. The patient's physical examination reveals signs of ascites and pleural effusions, which are consistent with advanced disease. The patient's laboratory results show elevated CA-125 and CEA levels, indicating possible recurrence or progression of the disease. The patient's family history suggests a possible genetic predisposition to ovarian cancer, although further testing is needed to confirm this. The patient's curren

In [19]:
# Save the updated DataFrame to a CSV file
output_file_path = "generated_summaries.csv"  # Define the desired output file path
processed_df.to_csv(output_file_path, index=False)
print(f"Summaries saved to '{output_file_path}'")


Summaries saved to 'generated_summaries.csv'


In [20]:
import time
from tqdm import tqdm

# Ensure your processed DataFrame has the reduced text column
if "reduced_text" not in processed_df.columns or "generated_summary" not in processed_df.columns:
    raise ValueError("The 'reduced_text' or 'generated_summary' column is missing from the DataFrame.")

# Initialize variables for latency calculation
latencies = []

print("Measuring latency for CPTF+LLM...")
for reduced_text in tqdm(processed_df["reduced_text"], desc="Evaluating Latency"):
    try:
        # Measure latency per input
        latency_start = time.time()
        prompt = construct_few_shot_prompt(reduced_text)  # Use the reduced text for the prompt
        generated_summary = summarizer(prompt, **generation_params)[0]["generated_text"]
        latency_end = time.time()

        # Append latency for the input
        latencies.append(latency_end - latency_start)
    except Exception as e:
        print(f"Error processing input: {reduced_text[:50]}... Error: {e}")
        latencies.append(float('inf'))  # Add infinity for failed cases

# Calculate average latency
valid_latencies = [lat for lat in latencies if lat != float('inf')]
average_latency = sum(valid_latencies) / len(valid_latencies) if valid_latencies else float('inf')

# Print the average latency
print(f"Average Latency for CPTF+LLM: {average_latency:.4f} seconds")


Measuring latency for CPTF+LLM...


Evaluating Latency: 100%|██████████| 100/100 [28:26<00:00, 17.06s/it]

Average Latency for CPTF+LLM: 17.0613 seconds





In [25]:
import time
import pandas as pd
from transformers import AutoTokenizer
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm

# Load the processed DataFrame
processed_df = pd.read_csv("generated_summaries.csv")  # Replace with actual file path

# Ensure required columns exist
if "reduced_text" not in processed_df.columns or "generated_summary" not in processed_df.columns:
    raise ValueError("The 'reduced_text' or 'generated_summary' column is missing from the DataFrame.")

# Initialize tokenizer and sentence embedding model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')

# Initialize metrics
total_tokens = 0
total_meaningful_tokens = 0
total_irr = 0
summaries_count = len(processed_df)

# Start time for throughput measurement
start_time = time.time()

# Evaluate each row
for _, row in tqdm(processed_df.iterrows(), desc="Evaluating Summaries", total=summaries_count):
    input_text = row["reduced_text"]
    generated_summary = row["generated_summary"]

    # Tokenization metrics
    total_tokens += len(tokenizer.tokenize(generated_summary))
    total_meaningful_tokens += len(tokenizer.tokenize(generated_summary.strip()))

    # Information Retention Ratio (IRR)
    input_embedding = sbert_model.encode([input_text])
    summary_embedding = sbert_model.encode([generated_summary])
    similarity = cosine_similarity(input_embedding, summary_embedding)[0][0]
    total_irr += similarity

# Calculate total time taken
total_time = time.time() - start_time

# Final metrics
average_te = total_meaningful_tokens / total_tokens if total_tokens > 0 else 0
average_irr = total_irr / summaries_count if summaries_count > 0 else 0
average_ttft = total_time / summaries_count if summaries_count > 0 else 0
throughput = summaries_count / total_time if total_time > 0 else 0

# Print results
print(f"Average Token Efficiency (TE): {average_te:.4f}")
print(f"Average Information Retention Ratio (IRR): {average_irr:.4f}")
print(f"Total Time to First Token (TTFT): {average_ttft:.4f} seconds")
print(f"Throughput: {throughput:.2f} summaries/second")


Evaluating Summaries: 100%|██████████| 100/100 [00:02<00:00, 42.45it/s]

Average Token Efficiency (TE): 1.0000
Average Information Retention Ratio (IRR): 0.5405
Total Time to First Token (TTFT): 0.0236 seconds
Throughput: 42.39 summaries/second





In [26]:
import time

batch_size = 8  # Or any size suitable for your memory
batched_latencies = []
for i in tqdm(range(0, len(processed_df), batch_size), desc="Evaluating Batches"):
    batch = processed_df["reduced_text"].iloc[i:i+batch_size].tolist()
    try:
        start_time = time.time()
        prompts = [construct_few_shot_prompt(text) for text in batch]
        summaries = summarizer(prompts, **generation_params)
        end_time = time.time()
        batched_latencies.append(end_time - start_time)
    except Exception as e:
        print(f"Error processing batch {i}: {e}")
        batched_latencies.append(float('inf'))
average_batch_latency = sum(batched_latencies) / len(batched_latencies)
print(f"Average Batch Latency: {average_batch_latency:.4f} seconds")


Evaluating Latency: 100%|██████████| 100/100 [28:23<00:00, 17.03s/it]

Average Latency: 17.0331 seconds



