<a href="https://colab.research.google.com/github/RicoStaedeli/NLP2025_CQG/blob/main/4_Finetuned_Generation_unsloth.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Finetuned Predictions
In this file we generate the finetuned predictions

## Setup

In [None]:
!pip install unsloth_zoo
!pip install --no-deps unsloth
!pip install -U transformers
!pip install accelerate bitsandbytes
!pip install -U peft

In [None]:
import torch
from google.colab import userdata
import logging
import transformers
import os
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer
from bitsandbytes import nn as bnb_nn
from unsloth import FastLanguageModel
from tqdm.auto import tqdm
import pandas as pd
import json
from collections import defaultdict
from unsloth.chat_templates import get_chat_template
from datasets import load_dataset
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [None]:
token = userdata.get('GITHUB')
repo_url = f"https://{token}@github.com/RicoStaedeli/NLP2025_CQG.git"

!git clone {repo_url}

In [None]:
################################################################################
#######################   PATH VARIABLES        ################################
################################################################################

BASE_MODEL = "unsloth/Meta-Llama-3.1-8B-Instruct"
model_name = "Meta-Llama-3.1-8B-Instruct_DPO"
model_id = "ricostaedeli/Meta-Llama-3.1-8B-Instruct_DPO" #"meta-llama/Llama-3.1-8B-Instruct" # ricostaedeli/Meta-Llama-3.1-8B-Instruct_DPO

test_dataset_path = f"/content/NLP2025_CQG/Data/Processed/test.csv"

results_path = os.path.join(os.getcwd(), f"/content/NLP2025_CQG/Evaluation/Results/results_{model_name}.json")

log_base_path = f"/content/NLP2025_CQG/Logs/"
os.makedirs(log_base_path, exist_ok=True)

log_path = log_base_path + f"4_cqs_generation_{model_name}.log"


################################################################################
#######################   STATIC VARIABLES      ################################
################################################################################


In [None]:
# Setup logger manually
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# Create file handler (only if not already added)
if not logger.handlers:
    fh = logging.FileHandler(log_path)
    fh.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    logger.addHandler(fh)

# Detect device
device = torch.device(
    "mps" if torch.backends.mps.is_available()
    else "cuda" if torch.cuda.is_available()
    else "cpu"
)

# Log the device info
logger.info("--------  Start with Baseline Generation  -------------")
logger.info(f'Device selected: {device}')
logger.info(f'Results Path: {results_path}')
logger.info(f'Log Path: {log_path}')
logger.info("--------------------------------------------------------")

## Generate Answers

Preprocess dataset

In [None]:
df = pd.read_csv(test_dataset_path)

# Define the schema types
schemas = ["CauseToEffect", "ExpertOpinion", "Analogy", "FearAppeal"]

# Prepare the JSON structure
json_data = []
for idx, row in df.iterrows():
    context = row['input'].strip()
    original_id = row['id']

    for schema in schemas:
        entry = {
            "prompt": [
                {
                    "role": "system",
                    "content": "You generate concise, critical, single-sentence questions for argumentative contexts, matching specified question schemas."
                },
                 {
                    "role": "user",
                    "content": f"Generate one critical question addressing the provided context. Ensure it matches the schema:{schema}\n\nContext: {context} Respond only with the question and nothing else."
                },
                 {
                    "role": "assistant",
                    "content": "Question: "
                }

            ],
            "id": f"{original_id}",
            "schema" : schema,
            "input" : context

        }
        json_data.append(entry)


# Save the JSON data to a file
temp_output_file = 'processed_dataset.json'
with open(temp_output_file, 'w') as f:
    json.dump(json_data, f, indent=4)

## Generation with transformers

In [None]:
# del model
# gc.collect()
# torch.cuda.empty_cache()

In [None]:
'''
pipeline = transformers.pipeline(
    "text-generation",
    model = model_id,
    model_kwargs={"torch_dtype": torch.float16},
)
'''

Load pipeline with quantized model for less GPU usage


In [None]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={
        "torch_dtype": torch.float16,
        "quantization_config": {"load_in_4bit": True},
        "low_cpu_mem_usage": True,
    },
    device_map="auto",
)

### Single generation

In [None]:
'''
def get_response(query, max_tokens=512, temperature=0.6, top_p=0.9):

    prompt = pipeline.tokenizer.apply_chat_template(
        query, tokenize=False, add_generation_prompt=True
    )

    outputs = pipeline(
        prompt,
        max_new_tokens=max_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
    )
    response = outputs[0]["generated_text"][len(prompt):]
    return response
    '''

In [None]:
'''
with open(temp_output_file, "r") as f:
    data = json.load(f)

combined_output = {}
i = 0
for item in data:
    print(f"Number: {i}")
    item_id = item["id"]
    schema = item["schema"]
    message = item["prompt"]
    input = item["input"]
    question = get_response(message).strip()

    if item_id not in combined_output:
        combined_output[item_id] = {"input": input, "cqs": []}

    combined_output[item_id]["cqs"].append({"schema": schema, "cq": question})
    i = i + 1
    if i == 2:
      break


# Save the combined questions
with open(results_path, "w") as f:
    json.dump(combined_output, f, indent=4)
print(f"Combined questions saved to {results_path}")
'''

### Batch processing

In [None]:
def get_responses(queries, max_tokens=512, temperature=0.6, top_p=0.9):
    # Prepare the prompts
    prompts = [pipeline.tokenizer.apply_chat_template(q, tokenize=False, add_generation_prompt=True) for q in queries]

    # Run generation in batches to avoid OOM
    all_responses = []
    batch_size = 50
    for i in range(0, len(prompts), batch_size):
        batch_prompts = prompts[i:i+batch_size]

        # Run the pipeline in batch mode
        outputs = pipeline(
            batch_prompts,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
        )

        # Extract responses
        for prompt, output_list in zip(batch_prompts, outputs):
            # Each output_list is a list with a single dictionary
            generated_text = output_list[0]["generated_text"]
            response = generated_text[len(prompt):].strip()
            all_responses.append(response)
            print(response)

    return all_responses

# Load the data
with open(temp_output_file, "r") as f:
    data = json.load(f)

queries = [item["prompt"] for item in data]
responses = get_responses(queries)

# Combine the outputs
combined_output = {}
for item, response in zip(data, responses):
    item_id = item["id"]
    schema = item["schema"]
    input = item["input"]

    if item_id not in combined_output:
        combined_output[item_id] = {"input": input, "cqs": []}

    combined_output[item_id]["cqs"].append({"schema": schema, "cq": response})

# Save the combined questions
with open(results_path, "w") as f:
    json.dump(combined_output, f, indent=4)

print(f"Combined questions saved to {results_path}")

In [None]:
try:
    os.remove(temp_output_file)
    print(f"File '{temp_output_file}' deleted successfully.")
except FileNotFoundError:
    print(f"File '{temp_output_file}' not found.")
except PermissionError:
    print(f"Permission denied to delete '{temp_output_file}'.")
except Exception as e:
    print(f"Error deleting file: {e}")

## Commit & Push

In [None]:
os.chdir("NLP2025_CQG")
!ls

In [None]:
!git config --global user.name "Rico Städeli"
!git config --global user.email "rico@yabrriga.ch"


commit_message = f"Finetuned generation"
!git add .
!git commit -m "{commit_message}"
!git push