<a href="https://colab.research.google.com/github/RicoStaedeli/NLP2025_CQG/blob/main/Development/Inference/ZeroShot/2_Baseline_CQS_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Baseline Predictions
In this file we generate the baseline predictions

## Setup

In [1]:
!pip install -U transformers
!pip install accelerate bitsandbytes

In [2]:
import torch
from google.colab import userdata, drive
import logging
import transformers
import os

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


    PyTorch 2.7.0+cu126 with CUDA 1206 (you have 2.6.0+cu124)
    Python  3.11.12 (you have 3.11.12)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
token = userdata.get('GITHUB')
repo_url = f"https://{token}@github.com/RicoStaedeli/NLP2025_CQG.git"

!git clone {repo_url}

In [None]:
os.chdir("NLP2025_CQG")
!ls

In [3]:
################################################################################
#######################   PATH VARIABLES        ################################
################################################################################

 model_id= "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"


test_dataset_path = f"Data/Processed/test.csv"

results_path = f"Evaluation/Results/results_{model_id}.csv"
os.makedirs(results_path, exist_ok=True)

log_base_path = f"Logs/"
os.makedirs(log_base_path, exist_ok=True)

log_path = log_base_path + "2_baseline_generation.log"


################################################################################
#######################   STATIC VARIABLES      ################################
################################################################################

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

In [4]:
# Setup logger manually
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# Create file handler (only if not already added)
if not logger.handlers:
    fh = logging.FileHandler(log_path)
    fh.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    logger.addHandler(fh)

# Detect device
device = torch.device(
    "mps" if torch.backends.mps.is_available()
    else "cuda" if torch.cuda.is_available()
    else "cpu"
)

# Log the device info
logger.info("--------  Start with Baseline Generation  -------------")
logger.info(f'Device selected: {device}')
logger.info(f'Results Path: {results_path}')
logger.info(f'Log Path: {log_path}')
logger.info("--------------------------------------------------------")

INFO:__main__:--------  Start with Baseline Generation  -------------
INFO:__main__:Device selected: cuda
INFO:__main__:Results Path: Evaluation/Results/
INFO:__main__:Log Path: Logs/2_baseline_generation.log
INFO:__main__:--------------------------------------------------------


## Generate Answers

In [2]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={
        "torch_dtype": torch.float16,
        "quantization_config": {"load_in_4bit": True},
        "low_cpu_mem_usage": True,
    },
)

config.json:   0%|          | 0.00/1.26k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.1k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
def generate_response(prompt_text):
    messages = [
        {'role': 'system', 'content': 'You are a helpful assistant!'},
        {'role': 'user', 'content': prompt_text},
    ]

    prompt = pipeline.tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    terminators = [
        pipeline.tokenizer.eos_token_id,
        pipeline.tokenizer.convert_tokens_to_ids('<|eot_id|>')
    ]

    outputs = pipeline(
        prompt,
        max_new_tokens=256,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
    )

    return outputs[0]['generated_text'][len(prompt):]

In [None]:
def process_dataset(input_csv, output_csv):
    data = pd.read_csv(input_csv)
    responses = []
    for index, row in data.iterrows():
        input_text = row['input']
        formatted_prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Generate one critical question for the given Input. Your answer is just the question without anything else.

### Input:
{input_text}

### Response:
"""
        response = generate_response(formatted_prompt)
        responses.append({'input': input_text, 'response': response})

    output_df = pd.DataFrame(responses)
    output_df.to_csv(output_csv, index=False)
    print(f'Results saved to {output_csv}')

# Run the processing
process_dataset('input_dataset.csv', 'output_dataset.csv')

## Commit & Push

In [None]:
!git config --global user.name "Rico Städeli"
!git config --global user.email "rico@yabriga.ch"

In [None]:
commit_message = "Generate CQs for Baseline models"
!git add .
!git commit -m "{commit_message}"
!git push