In [None]:
!pip install torchmetrics transformers datasets wandb accelerate nltk sentencepiece bitsandbytes scipy

In [1]:
import wandb

from torchmetrics.text.rouge import ROUGEScore
from torchmetrics import SacreBLEUScore
import nltk
import torch
from tqdm.auto import tqdm
import pandas as pd
import os
import gc

from datasets import load_dataset
from accelerate import Accelerator

from transformers import set_seed, LlamaForCausalLM, LlamaTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [35]:
tokenizer = LlamaTokenizer.from_pretrained(
    "decapoda-research/llama-7b-hf", eos_token="</s>"
)
tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token
tokenizer.padding_side = "left"
print(f"EOS token: {tokenizer.eos_token} EOS toke id: {tokenizer.eos_token_id}")
print(f"PAD token: {tokenizer.pad_token} PAD toke id: {tokenizer.pad_token_id}")
print(f"UNK token: {tokenizer.unk_token} UNK toke id: {tokenizer.unk_token_id}")

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


EOS token: </s> EOS toke id: 2
PAD token: <unk> PAD toke id: 0
UNK token:  UNK toke id: 0


In [3]:
model = LlamaForCausalLM.from_pretrained(
    "decapoda-research/llama-7b-hf",
    load_in_8bit=True,
    # torch_dtype=torch.float16,
    device_map="auto",
)




Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /opt/conda/lib/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


Loading checkpoint shards: 100%|██████████| 33/33 [00:10<00:00,  3.03it/s]


In [4]:
def prepare_inference(
    dataset_name,
    tokenizer,
    split,
    max_prompt_length,
    padding="longest",
    truncate_promt=True,
):
    def _prepare_prompt(question):
        return f"Question: {question}\nAnswer:"

    def promt_tokenize(examples):
        if truncate_promt:
            q_toks = tokenizer.encode(examples["Question"])
            q_toks = q_toks[: max_prompt_length - 8]
            tmp = tokenizer.decode(q_toks).strip()
        else:
            tmp = examples["Question"]

        tmp = _prepare_prompt(tmp)

        tokenized_dict = tokenizer(
            tmp, padding=padding, max_length=max_prompt_length, truncation=True
        )

        return tokenized_dict

    dataset = load_dataset(dataset_name, split=split)
    dataset = dataset.map(promt_tokenize)
    dataset.set_format(
        type="torch", columns=["Question", "Answer", "input_ids", "attention_mask"]
    )

    return dataset

In [5]:
data_config = dict(dataset_name = 'Myashka/SO-Python_QA-API_Usage-tanh_score',
                   split = 'test',
                   max_prompt_length=512,
                   padding='longest',
                   truncate_promt=True)

In [6]:
test_dataset = prepare_inference(tokenizer=tokenizer, **data_config)

Found cached dataset csv (/root/.cache/huggingface/datasets/Myashka___csv/Myashka--SO-Python_QA-API_Usage-tanh_score-55f862e88e900e46/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
Loading cached processed dataset at /root/.cache/huggingface/datasets/Myashka___csv/Myashka--SO-Python_QA-API_Usage-tanh_score-55f862e88e900e46/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-b53552a141b1d64b.arrow


In [7]:
# test_dataset['input_ids'][0:2]

In [8]:
set_seed(42)

In [9]:
generate_config = dict(
    do_sample=True,
    max_new_tokens=512,
    no_repeat_ngram_size=2,
    top_k=50,
    top_p=0.9,
    # min_new_tokens=20
)

In [10]:
accelerator = Accelerator()

In [11]:
from transformers import DataCollatorForSeq2Seq

In [12]:
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer, return_tensors="pt", padding=True
)

In [13]:
input_dataset = test_dataset.remove_columns(['Q_CreationDate', 'Title', 'Question', 'Answer', 'Score', 'Is_accepted', 'N_answers', 'Q_Id'])
test_dataset = test_dataset.remove_columns(['Q_CreationDate', 'Title','input_ids', 'attention_mask', 'Score', 'Is_accepted', 'N_answers', 'Q_Id'])

In [14]:
from torch.utils.data import DataLoader

input_dataloader = DataLoader(input_dataset, batch_size=1, collate_fn=data_collator)
test_dataloader = DataLoader(test_dataset,
                             batch_size=1)

In [15]:
model, input_dataloader = accelerator.prepare(model, input_dataloader)

In [16]:
batch = next(iter(input_dataloader))

In [17]:
model.eval()
with torch.cuda.amp.autocast():
    output_tokens = model.generate(**batch,
                                    **generate_config)



In [19]:
print(*tokenizer.batch_decode(output_tokens, skip_special_tokens=True), sep="\n\n")

Question: <unk>I'm using Imageio, the python library that wraps around ffmpeg to do hardware encoding via nvenc. My issue is that I can't get more than 2 sessions to launch (I am using non-quadro GPUs). Even using multiple GPUs. I looked over NVIDIA's support matrix and they state only 2 sessions per gpu, but it seems to be per system.
For example I have 2 GPUs in a system. I can either use the env variable CUDA_VISIBLE_DEVICES or set the ffmpeg flag -gpu to select the GPU. I've verified gpu usage using Nvidia-smi cli. I can get 2 encoding sessions working on a single gpu. Or 1 session working on 2 separate gpus each. But I can't get 2 encoding sessions working on 2 gpus. 
Even more strangely if I add more gpus I am still stuck at 2 sessions. I can't launch a third encoding session on a 3rd gpu. I am always stuck at 2 regardless of the # of gpus. Any ideas on how to fix this?
Answer: There are a few known issues with ImageIO/nvencc that are reported against the ImageIo/FFmpeg project. 

In [None]:
rouge = ROUGEScore()
bleu = SacreBLEUScore(1, lowercase=True)