In [None]:
import pandas as pd
df_sample = pd.read_csv("sample_summary.csv")
# Display the first few rows
print(df_sample.head())

# Check DataFrame info
print(df_sample.info())

In [2]:
!pip install -q transformers huggingface_hub
!pip install -q --upgrade accelerate
!pip install -q -U bitsandbytes

In [3]:
from huggingface_hub import login
import os

# Use token from environment variable (safer)
login(os.getenv("HF_TOKEN"))


In [7]:
import os
import torch
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
import pandas as pd

# -----------------------------------------------------
# 1. Environment setup (optional but often helpful)
# -----------------------------------------------------
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# -----------------------------------------------------
# 2. Load your model and tokenizer
# -----------------------------------------------------
model_name = "meta-llama/Llama-3.2-1B-Instruct"  # Example model name
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",           # Automatic GPU/CPU placement
    torch_dtype=torch.float16     # Use FP16 for reduced memory usage
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Some LLaMA-based models need a special EOS token setup
tokenizer.padding_side = 'left'
tokenizer.pad_token_id = tokenizer.eos_token_id

# -----------------------------------------------------
# 3. Define a prompt construction function
# -----------------------------------------------------
def construct_prompt(input_text):
    """
    Constructs an instruction-based prompt for summarization.
    """
    prompt = (
        "Summarize the following case. "
        "Do not include any extra or verbatim text from the input. "
        f"Case:\n{input_text}\n\nSummary:"
    )
    return prompt

# -----------------------------------------------------
# 4. Set your generation parameters
# -----------------------------------------------------
generation_params = {
    "do_sample": True,
    "top_p": 0.8,
    "temperature": 1,
    "top_k": 10,
    "max_new_tokens": 20,
    "repetition_penalty": 1.1,
    "eos_token_id": tokenizer.eos_token_id
}

# -----------------------------------------------------
# 5. Load your sample DataFrame (df_sample) with columns "input" and "output"
#    For example, if you've already saved and loaded your CSV:
# -----------------------------------------------------
# df_sample = pd.read_csv("sample_summary.csv")
# For demonstration, if you need to create a dummy DataFrame:
# df_sample = pd.DataFrame({"input": ["Your input text here..."], "output": ["Ground truth summary here..."]})

# -----------------------------------------------------
# 6. Summarize your df_sample DataFrame using partial decoding
# -----------------------------------------------------
batch_size = 8  # Adjust as needed
inputs_list = df_sample["input"].tolist()
generated_summaries = []

def process_batch(batch_inputs):
    batch_generated = []
    for text in batch_inputs:
        prompt = construct_prompt(text)
        # Tokenize prompt
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1000)
        inputs = {k: v.to(model.device) for k, v in inputs.items()}
        prompt_length = inputs["input_ids"].shape[1]

        # Generate output tokens
        summary_ids = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            **generation_params
        )
        # Slice out only the tokens that were generated after the prompt
        generated_tokens = summary_ids[0, prompt_length:]
        generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
        batch_generated.append(generated_text)
    return batch_generated

# -----------------------------------------------------
# 7. Process the DataFrame in batches with a progress bar
# -----------------------------------------------------
with tqdm(total=len(inputs_list), desc="Generating Summaries", unit="row") as pbar:
    for i in range(0, len(inputs_list), batch_size):
        batch = inputs_list[i:i + batch_size]
        try:
            batch_generated = process_batch(batch)
        except RuntimeError as e:
            if "out of memory" in str(e):
                torch.cuda.empty_cache()
                print("Out of memory error; try reducing batch size.")
            raise e
        generated_summaries.extend(batch_generated)
        torch.cuda.empty_cache()
        pbar.update(len(batch))

# -----------------------------------------------------
# 8. Store and Save
# -----------------------------------------------------
# Add generated summaries as a new column in df_sample
df_sample["generated_summary"] = generated_summaries

# Save the DataFrame with input, output, and generated_summary
df_sample.to_csv("Longformer_soap_generated_summaries.csv", index=False)
print("Summaries saved to 'Longformer_soap_generated_summaries.csv'")


Generating Summaries:   0%|          | 0/100 [00:00<?, ?row/s]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Generating Summaries:   8%|▊         | 8/100 [00:04<00:50,  1.82row/s]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end gene

Summaries saved to 'Longformer_soap_generated_summaries.csv'



