In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

In [2]:
from tqdm.auto import tqdm
from typing import List
import re

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
DEVICE = 'cuda:0'
import torch
a= torch.zeros(1,1,device=DEVICE)

In [4]:
torch.cuda.device_count()

1

In [5]:
# !GITHUB_ACTIONS=true pip install auto-gptq

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig

In [6]:
model_name_or_path = "TheBloke/upstage-llama-30b-instruct-2048-GPTQ"
model_basename = "gptq_model-4bit--1g"

use_triton = False

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,
                                          use_fast=True,
                                          cache_dir="/mount/arbeitsdaten/asr-2/vaethdk/resources/weights/",)


In [7]:
model = AutoGPTQForCausalLM.from_quantized("/mount/arbeitsdaten/asr-2/vaethdk/resources/weights/TheBloke--upstage-llama-30b-instruct-2048-GPTQ",
        # model_basename=model_basename,
        # revision="gptq-4bit-32g-actorder_True",
        use_safetensors=True,
        trust_remote_code=False,
        device="cuda:0",
        use_triton=use_triton,
        quantize_config=None)

CUDA extension not installed.
The safetensors archive passed at /mount/arbeitsdaten/asr-2/vaethdk/resources/weights/TheBloke--upstage-llama-30b-instruct-2048-GPTQ/gptq_model-4bit-32g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.
skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.


### System:
{System}

### User:
{User}

### Assistant:
{Assistant}

## Generate Answer Synonyms

In [8]:
from data.dataset import ReimburseGraphDataset
import torch

In [9]:
def generate_prompt(system: str, user: str) -> str:
    return f"""
    ### System:
    {system}

    ### User:
    {user}

    ### Assistant:"""

def generate_output(prompt: str, temperature: float = 0.7, max_new_tokens: int = 512) -> torch.FloatTensor:
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids.cuda()
    output = model.generate(inputs=input_ids, temperature=temperature, max_new_tokens=max_new_tokens)
    return tokenizer.decode(output[0])


In [10]:
train = ReimburseGraphDataset('en/reimburse/train_graph.json', 'en/reimburse/train_answers.json', False)

- not using synonyms
===== Dataset Statistics =====
- files:  resources/en/train_graph.json resources/en/train_answers.json
- synonyms: False
- depth: 20  - degree: 13
- answers: 73
- questions: 279


In [11]:
# check that we don't have any answer synonyms
for answer_candidate in train.answer_synonyms:
    assert len(train.answer_synonyms[answer_candidate]) == 1

In [12]:
def parse_output(original_answer: str, prompt: str, output: str, num_paraphrases: int) -> List[str]:
    # remove prompt from output first (ends at ### ASSISTANT: )
    answers = []
    cleaned = output[len(prompt):]
    
    if not "1." in cleaned: 
        print("NO LIST FOR ANSWER", original_answer)
        return answers
    
    for i in range(1, num_paraphrases+1):
        if not f"{i}." in cleaned: 
            print(f" - NO {i}. CANDIDATE FOR ANSWER", original_answer)
            continue

        start_idx = cleaned.find(f"{i}.") # find i. line
        end_idx = cleaned.find("\n", start_idx) # read until line end 
        if i == num_paraphrases and end_idx == -1:
            # last line might not have line break
            end_idx = len(cleaned)
        if start_idx == -1 or end_idx == -1:
            print(f" - INDEX PROBLEM FOR {i}. CANDIDATE: ({start_idx}, {end_idx})")
            continue
        # parse answer
        answers.append(cleaned[start_idx:end_idx].replace(f"{i}.", "").replace("</s>", "").strip())

        cleaned = cleaned[end_idx:] # remove i. line
    return answers

In [15]:
system = """You are a helpful assistant creating a list of paraphrases for a given answer to a question.
Order the generated paraphrases in a numbered list."""
def user(answer_text: str, num_paraphrases: int) -> str:
    return f'Generate {num_paraphrases} paraphrases for the answer "{answer_text}"'

NUM_PARAPHRASES = 10
TEMPERATURE = 0.7
MAX_NEW_TOKENS = 512
generated_data = {}
for answer_key in tqdm(train.answer_synonyms):
    prompt = generate_prompt(system=system, user=user(answer_key, NUM_PARAPHRASES))
    gen = generate_output(prompt=prompt, temperature=TEMPERATURE, max_new_tokens=MAX_NEW_TOKENS)
    candidates = parse_output(original_answer=answer_key, prompt=prompt, output=gen, num_paraphrases=NUM_PARAPHRASES)
    generated_data[answer_key] = candidates
    

100%|██████████| 73/73 [3:32:00<00:00, 174.26s/it]  


In [16]:
import json

with open("en/reimburse/generated/train_answers.json", "w") as f:
    json.dump(generated_data, f)