In [1]:
!nvidia-smi

Fri Dec 15 14:25:56 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.216.04   Driver Version: 450.216.04   CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  A100-SXM4-80GB      On   | 00000000:B7:00.0 Off |                    0 |
| N/A   31C    P0    61W / 400W |      0MiB / 81252MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [1]:
import re
import pickle
import numpy as np
import pandas as pd
import torch
from transformers import GPT2Tokenizer, T5ForConditionalGeneration, GenerationConfig
from peft import PeftModel, PeftConfig
from tqdm import tqdm
from copy import deepcopy

[2023-12-15 22:54:41,807] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [3]:
# MODEL_CHKP = 'calculator/ep2'
MODEL_CHKP = 'brackets/ep6'
# MODEL_CHKP = 'ilya'
MODEL_NAME = f'./models/{MODEL_CHKP}'
BASE_MODEL = 'ai-forever/FRED-T5-1.7B'

tokenizer = GPT2Tokenizer.from_pretrained(BASE_MODEL, eos_token='</s>')
model = T5ForConditionalGeneration.from_pretrained(
    BASE_MODEL,
    device_map=device,
    # torch_dtype=torch.float16,
    # load_in_8bit=False,
)
peft_config = PeftConfig.from_pretrained(MODEL_NAME)
model = PeftModel.from_pretrained(
    model,
    MODEL_NAME,
    device_map=device,
    # torch_dtype=torch.float16,
    # use_safetensors=True,
)
# model.half()
model.eval()
model.device, MODEL_NAME

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


(device(type='cuda', index=0), './models/brackets/ep6')

In [4]:
generation_config = GenerationConfig(**{
    "pad_token_id": 0,
    "bos_token_id": 1,
    "eos_token_id": 2,
    "max_new_tokens": 1000,
    "no_repeat_ngram_size": 15,
    "repetition_penalty": 1.01,

    "greedy": True,
    "do_sample": False,

    # "do_sample": True,
    # "temperature": 0.1,
    # "top_k": 10,
    # "top_p": 0.95
})
generation_config

GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2,
  "greedy": true,
  "max_new_tokens": 1000,
  "no_repeat_ngram_size": 15,
  "pad_token_id": 0,
  "repetition_penalty": 1.01
}

In [5]:
"""
Helpers to support streaming generate output.
Borrowed from https://github.com/oobabooga/text-generation-webui/blob/ad37f396fc8bcbab90e11ecf17c56c97bfbd4a9c/modules/callbacks.py
"""

import gc
import traceback
from queue import Queue
from threading import Thread
from transformers import StoppingCriteria, StoppingCriteriaList

class Stream(StoppingCriteria):
    def __init__(self, callback_func=None):
        self.callback_func = callback_func

    def __call__(self, input_ids, scores) -> bool:
        if self.callback_func is not None:
            self.callback_func(input_ids[0])
        return False


class Iteratorize:
    """
    Transforms a function that takes a callback
    into a lazy iterator (generator).
    """

    def __init__(self, func, kwargs={}, callback=None):
        self.mfunc = func
        self.c_callback = callback
        self.q = Queue()
        self.sentinel = object()
        self.kwargs = kwargs
        self.stop_now = False

        def _callback(val):
            if self.stop_now:
                raise ValueError
            self.q.put(val)

        def gentask():
            try:
                ret = self.mfunc(callback=_callback, **self.kwargs)
            except ValueError:
                pass
            except:
                traceback.print_exc()
                pass

            self.q.put(self.sentinel)
            if self.c_callback:
                self.c_callback(ret)

        self.thread = Thread(target=gentask)
        self.thread.start()

    def __iter__(self):
        return self

    def __next__(self):
        obj = self.q.get(True, None)
        if obj is self.sentinel:
            raise StopIteration
        else:
            return obj

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.stop_now = True

In [6]:
def generate(model, tokenizer, prompt, generation_config):
    data = tokenizer(prompt, return_tensors="pt").to(model.device)
    generate_params = {
        **data,
        "generation_config": generation_config,
    }

    # Stream the reply 1 token at a time.
    # This is based on the trick of using 'stopping_criteria' to create an iterator,
    # from https://github.com/oobabooga/text-generation-webui/blob/ad37f396fc8bcbab90e11ecf17c56c97bfbd4a9c/modules/text_generation.py#L216-L243.

    all_tokens = list(range(len(tokenizer)))
    next_tokens = []

    def prefix_allowed_tokens_fn(batch_id, input_ids):
        if len(next_tokens) > 0:
            allowed_tokens = [next_tokens.pop(0)]
            # print(allowed_tokens)
            return allowed_tokens
        return all_tokens

    def generate_with_callback(callback=None, **kwargs):
        if "stopping_criteria" in kwargs:
            del kwargs["stopping_criteria"]
        kwargs.setdefault(
            "stopping_criteria", StoppingCriteriaList()
        )
        kwargs["stopping_criteria"].append(
            Stream(callback_func=callback)
        )
        with torch.no_grad():
            model.generate(
                prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
                **kwargs
            )

    def generate_with_streaming(**kwargs):
        return Iteratorize(
            generate_with_callback, kwargs, callback=None
        )

    def get_calc_result(expression):
        try:
            expression = re.sub(r'[^0-9+\-*/.()]', '', expression.replace(',', '.'))
            result = str(round(eval(expression), 2))
            if '.' in result:
                left, right = result.split('.', 1)
                if set(right) == {'0'}:
                    result = left
            # print(expression, '=', result)
            return result
        except Exception as ex:
            print(expression, ex)
            return None

    is_end = False
    last_calculated_idx = 0
    # calc_start, calc_end = '<calculator>', '</calculator>'
    calc_start, calc_end = '[[', ']]'

    for _ in range(100):
        if is_end:
            break
        with generate_with_streaming(**generate_params) as generator:
            for output in generator:
                if output[-1] == tokenizer.eos_token_id or \
                        len(output) == generate_params['generation_config'].max_new_tokens - 1:
                    is_end = True
                    yield decoded_output
                    break
                decoded_output = tokenizer.decode(output, skip_special_tokens=True)
                new_output = decoded_output[last_calculated_idx:]
                # print('new_output:', last_calculated_idx, new_output)
                if calc_end in new_output:
                    exression = new_output.rsplit(calc_end, 1)[0]
                    if calc_start in exression:
                        exression = exression.rsplit(calc_start, 1)[1].strip()
                        result = get_calc_result(exression)
                        if result is not None:
                            result_tokens = tokenizer(result, add_special_tokens=False)['input_ids']
                            # print(result, result_tokens)
                            next_tokens.extend(result_tokens)
                            yield decoded_output
                    last_calculated_idx = len(decoded_output)


def simple_generate(model, tokenizer, prompt, generation_config):
    data = tokenizer(prompt, return_tensors="pt", add_special_tokens=True).to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
            **data,
            generation_config=generation_config
        )[0]
    output = tokenizer.decode(output_ids, skip_special_tokens=True)
    return output.strip()

In [7]:
def get_answer(a):
    numbers = re.findall(r'(\d+\.\d+|\d+\,\d+|\d+)', a)
    if len(numbers) == 0:
        return float('inf')
    return float(numbers[-1].replace(',', '.'))

gsm_test = pd.read_csv('gsm_test.csv')

actual_answers = [
    float(a.rsplit('#### ', 1)[1].strip())
    for a in gsm_test['answer']
]
len(actual_answers)

400

### Fine-tuned evaluation

In [15]:
def make_prompt(input):
    # prompt = f'<SC6>Задача: {input}\nРешение: <extra_id_0>\nОтвет: <extra_id_1></s>'
    # prompt = f'<SC6>Задача: {input}\nРешение: <extra_id_0>####<extra_id_1>'
    prompt = f'<CS6>Задача: {input}\nРешение: <extra_id_0>\nОтвет: <extra_id_1>'
    return prompt

In [16]:
inputs = [
    "Если Анне 9 лет, а ее брат вдвое старше ее, сколько лет будет ее брату через 3 года?",
    "Доставка пиццы Эшли стоит 15 долларов. Какова общая сумма, которую Эшли должна дать доставщику, если она хочет дать чаевые, равные 1/5 заказанной суммы?"
]
for inp in inputs:
    prompt = make_prompt(inp)
    print(prompt)
    result = ''
    for output in generate(model, tokenizer, prompt, generation_config):
        # print(f'###{output}###')
        result = output
    print(result)
    print("\n==============================")

<CS6>Задача: Если Анне 9 лет, а ее брат вдвое старше ее, сколько лет будет ее брату через 3 года?
Решение: <extra_id_0>
Ответ: <extra_id_1>
<extra_id_0> 9 * 2 = [[9 * 2]]18 лет.
3 = [[3]]3 года. <extra_id_1> 3

<CS6>Задача: Доставка пиццы Эшли стоит 15 долларов. Какова общая сумма, которую Эшли должна дать доставщику, если она хочет дать чаевые, равные 1/5 заказанной суммы?
Решение: <extra_id_0>
Ответ: <extra_id_1>
<extra_id_0> Чаевые составляют 15 / 5 = [[15 / 5]]3.
Таким образом, общая сумма составляет 15 + 3 = [[15 + 3]]18. <extra_id_1> 18



In [17]:
import os
gen_dir = f'./generations/{MODEL_CHKP}'
os.makedirs(gen_dir, exist_ok=True)

In [18]:
generated_outputs = []
with open(f'{gen_dir}/gen_t5_ft_output.txt', 'a') as f_out:
    for i, d in tqdm(enumerate(gsm_test.to_dict('records'))):
        if i < len(generated_outputs):
            continue
        prompt = make_prompt(d['question'])
        result = ''
        for output in generate(model, tokenizer, prompt, generation_config):
            result = output
        generated_outputs.append(result)
        f_out.write(f'\n### OUTPUT {i} ###\n{output}\n')
        f_out.flush()

400it [20:50,  3.13s/it]


In [16]:
# with open(f'{gen_dir}/gen_t5_ft_output.txt', 'r') as f_in:
#     generated_outputs = re.split(r'\n### OUTPUT \d+ ###\n', f_in.read())[1:]

In [19]:
len(generated_outputs)

400

In [20]:
with open(f'{gen_dir}/gen_t5_ft_answers.pkl', 'wb') as f:
    pickle.dump(generated_outputs, f)

# with open(f'{gen_dir}/gen_t5_ft_answers.pkl', 'rb') as f:
#     generated_outputs = pickle.load(f)

In [21]:
generated_answers = [get_answer(a) for a in generated_outputs]
np.isclose(generated_answers, actual_answers, rtol=1e-9, atol=1e-3).mean()

0.175

### Fine-tuned evaluation (no calculator)

In [8]:
# def make_prompt_lm(input):
#     prompt = f'<LM>Задача: {input}\nРешение: '
#     return prompt

def make_prompt(input):
    prompt = f'<CS6>Задача: {input}\nРешение: <extra_id_0>\nОтвет: <extra_id_1>'
    return prompt

In [9]:
inputs = [
    "Если Анне 9 лет, а ее брат вдвое старше ее, сколько лет будет ее брату через 3 года?",
    "Доставка пиццы Эшли стоит 15 долларов. Какова общая сумма, которую Эшли должна дать доставщику, если она хочет дать чаевые, равные 1/5 заказанной суммы?"
]
for inp in inputs:
    prompt = make_prompt(inp)
    # print(prompt)
    output = simple_generate(model, tokenizer, prompt, generation_config)
    print(output)
    print("\n==============================")

<extra_id_0> 
Брату Анны 9 лет * 2 = 15 лет.
Таким образом, через 3 года ему будет 15 лет. <extra_id_1> 
15 лет

<extra_id_0> 
Эшли должна заплатить 15 долларов + 15 долларов = 20 долларов.
Таким образом, она должна дать чаевые в размере 1 / 5 от 20 долларов = 15 долларов.
Таким образом, она должна дать чаевые в размере 15 долларов - 15 долларов = 5 долларов. <extra_id_1> 
5 долларов



In [11]:
import os
gen_dir = f'./generations/{MODEL_CHKP}'
os.makedirs(gen_dir, exist_ok=True)

In [50]:
generated_outputs_no_calc = []
with open(f'{gen_dir}/gen_t5_ft_output_no_calc.txt', 'a') as f_out:
    for i, d in tqdm(enumerate(gsm_test.to_dict('records'))):
        if i < len(generated_outputs_no_calc):
            continue
        prompt = make_prompt(d['question'])
        output = simple_generate(model, tokenizer, prompt, generation_config)
        generated_outputs_no_calc.append(output)
        f_out.write(f'\n### OUTPUT {i} ###\n{output}\n')
        f_out.flush()

400it [18:44,  2.81s/it]


In [55]:
# with open(f'{gen_dir}/gen_t5_ft_output_no_calc.txt', 'r') as f_in:
#     generated_outputs_no_calc = re.split(r'\n### OUTPUT \d+ ###\n', f_in.read())[1:]

len(generated_outputs_no_calc)

400

In [52]:
with open(f'{gen_dir}/gen_t5_ft_answers_no_calc.pkl', 'wb') as f:
    pickle.dump(generated_outputs_no_calc, f)

# with open(f'{gen_dir}/gen_t5_ft_answers_no_calc.pkl', 'rb') as f:
#     generated_outputs_no_calc = pickle.load(f)

In [56]:
generated_answers = [get_answer(a) for a in generated_outputs_no_calc]
np.isclose(generated_answers, actual_answers, rtol=1e-9, atol=1e-3).mean()

0.0325

In [None]:
for g, a in zip(generated_answers, actual_answers):
    if g == a:
        print(g, a)

In [None]:
for g, a in zip(generated_answers, actual_answers):
    if g != a:
        print(g, a)