In [1]:
import torch
device = "cuda:0" if torch.cuda.is_available() else "cpu"

import os
import addict
import torch
import peft
import bitsandbytes as bnb

from tqdm import tqdm
from peft import PeftModel, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import (
    Trainer,
    TrainingArguments,
    logging,
    set_seed,
    LlamaForCausalLM,
    LlamaTokenizer,
    LlamaConfig
)
from accelerate import Accelerator

import sys
sys.path.append('/home/st-gorbatovski/sollama/')

from src.sft.utils import load_model
from src.sft.data import make_inference_dataset
from src.sft.models import eval_model

  from .autonotebook import tqdm as notebook_tqdm



Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/st-gorbatovski/.conda/envs/gorbatovski_env/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/st-gorbatovski/.conda/envs/gorbatovski_env/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


In [2]:
model, tokenizer = load_model(
    dict(
        name="/raid/models/llama-7b-hf",
        load_in_8bit=False,
        peft_model_id=None,
        device_map='cuda:0',
        torch_dtype=torch.float16,
        padding_side='left'
        )
)

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


Loading checkpoint shards: 100%|██████████| 33/33 [00:15<00:00,  2.09it/s]


In [3]:
from torch.utils.data import Dataset, DataLoader

In [4]:
test_dataset = make_inference_dataset(tokenizer=tokenizer, **dict(
    dataset_name="Myashka/SO-Python_QA-API_Usage-tanh_score",
    max_prompt_length=512,
    split="test",
    use_title=True
))
dataloader = DataLoader(test_dataset, batch_size=1)

Found cached dataset csv (/home/st-gorbatovski/.cache/huggingface/datasets/Myashka___csv/Myashka--SO-Python_QA-API_Usage-tanh_score-3d810357b517bfcc/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d)
Loading cached processed dataset at /home/st-gorbatovski/.cache/huggingface/datasets/Myashka___csv/Myashka--SO-Python_QA-API_Usage-tanh_score-3d810357b517bfcc/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d/cache-e5ac4e36ef2353e3.arrow


In [5]:
generate_config = dict(
    do_sample = True,
    max_new_tokens = 512,
    no_repeat_ngram_size = 2,
    top_k = 50,
    top_p = 0.9,
    use_cache = True,
    num_return_sequences = 2
)

In [6]:
model = model.eval()

In [7]:
print(f'0: {tokenizer.decode(0)}')
print(f'1: {tokenizer.decode(1)}')
print(f'2: {tokenizer.decode(2)}')
print()
print(f'bos token:{tokenizer.bos_token_id}')
print(f'pad token:{tokenizer.pad_token_id}')
print(f'unk token:{tokenizer.unk_token_id}')
print(f'eos token:{tokenizer.eos_token_id}')

0: <unk>
1: <s>
2: </s>

bos token:0
pad token:0
unk token:0
eos token:2


In [9]:
model.config.pad_token_id = tokenizer.pad_token_id = 0  # unk
model.config.bos_token_id = 1
model.config.eos_token_id = 2

In [18]:
# model.generation_config.eos_token_id = 2
print(model.generation_config.bos_token_id)
print(model.generation_config.pad_token_id)
print(model.generation_config.eos_token_id)

1
0
2


In [19]:
print(model.generation_config.bos_token_id)
print(model.generation_config.pad_token_id)
print(model.generation_config.eos_token_id)

1
0
2


In [None]:
tokenizer.pad_token = tokenizer.eos_token
model.generation_config.pad_token_id = model.generation_config.eos_token_id

In [13]:
for i, batch in enumerate(tqdm(dataloader)):
    output_tokens = model.generate(
        input_ids=batch["input_ids"].to(model.device),
        attention_mask=batch["attention_mask"].to(model.device),
        **generate_config).cpu().numpy()
    if i == 5:
        break
    
    print(tokenizer.batch_decode(output_tokens, skip_special_tokens=True))

  0%|          | 1/2000 [00:03<1:53:23,  3.40s/it]

["Title: Nvenc session limit per GPU\nQuestion: I'm using Imageio, the python library that wraps around ffmpeg to do hardware encoding via nvenc. My issue is that I can't get more than 2 sessions to launch (I am using non-quadro GPUs). Even using multiple GPUs. I looked over NVIDIA's support matrix and they state only 2 sessions per gpu, but it seems to be per system.\nFor example I have 2 GPUs in a system. I can either use the env variable CUDA_VISIBLE_DEVICES or set the ffmpeg flag -gpu to select the GPU. I've verified gpu usage using Nvidia-smi cli. I can get 2 encoding sessions working on a single gpu. Or 1 session working on 2 separate gpus each. But I can't get 2 encoding sessions working on 2 gpus. \nEven more strangely if I add more gpus I am still stuck at 2 sessions. I can't launch a third encoding session on a 3rd gpu. I am always stuck at 2 regardless of the # of gpus. Any ideas on how to fix this?\nAnswer: The limit is set on the system level. It should be set using the nv

  0%|          | 2/2000 [00:11<3:18:40,  5.97s/it]

["Title: Setup of the Divio CMS Repositories\nQuestion: The Divio Django CMS offers two servers: TEST and LIVE. Are these also two separate repositories? Or how is this done in the background?\nI'm wondering because I would have the feeling the LIVE server is its own repository that just pulls from the TEST whenever I press deploy. Is that correct?\nAnswer: These two repositories are separate and you don't have to change anything in your code to use either of them. The TESTRPO (live) and TEMPRPO will always be available.\nDivio Labs will be making the repositories more flexible and giving them the same features as Divimasters soon. This means for instance that you'll be able to define the live URL and the staging URL on your own and choose between the two as you see fit.", "Title: Setup of the Divio CMS Repositories\nQuestion: The Divio Django CMS offers two servers: TEST and LIVE. Are these also two separate repositories? Or how is this done in the background?\nI'm wondering because I

  0%|          | 3/2000 [00:19<3:57:22,  7.13s/it]

["Title: How do i retrain the model without losing the earlier model data with new set of data\nQuestion: for my current requirement, I'm having a dataset of 10k+ faces from 100 different people from which I have trained a model for recognizing the face(s). The model was trained by getting the 128 vectors from the facenet_keras.h5 model and feeding those vector value to the Dense layer for classifying the faces.\nBut the issue I'm facing currently is\n\nif want to train one person face, I have to retrain the whole model once again.\n\nHow should I get on with this challenge? I have read about a concept called transfer learning but I have no clues about how to implement it. Please give your suggestion on this issue. What can be the possible solutions to it?\nAnswer: You should use a pre-trained model that is trained on a much larger dataset than your training set. This way, you only need to feed the pretraind model a new training sample to obtain a high accuracy. If you want, it is poss

  0%|          | 4/2000 [00:23<3:16:41,  5.91s/it]

["Title: How to debug (500) Internal Server Error on Python Waitress server?\nQuestion: I'm using Python and Flask, served by Waitress, to host a POST API. I'm calling the API from a C# program that posts data and gets a string response. At least 95% of the time, it works fine, but sometimes the C# program reports an error: \n(500) Internal Server Error.\nThere is no further description of the error or why it occurs. The only clue is that it usually happens in clusters -- when the error occurs once, it likely occurs several times in a row. Without any intervention, it then goes back to running normally.\nSince the error is so rare, it is hard to troubleshoot. Any ideas as to how to debug or get more information? Is there error handling I can do from either the C# side or the Flask/Waitress side?\nAnswer: Python Error: AttributeError: 'str' object has no attribute 'lower'", "Title: How to debug (500) Internal Server Error on Python Waitress server?\nQuestion: I'm using Python and Flask,

  0%|          | 5/2000 [00:30<3:30:06,  6.32s/it]

["Title: Check inputs in csv file\nQuestion: I`m new to python. I have a csv file. I need to check whether the inputs are correct or not. The ode should scan through each rows. \nAll columns for a particular row should contain values of same type: Eg:\nAll columns of second row should contain only string, \nAll columns of third row should contain only numbers... etc\nI tried the following approach, (it may seem blunder):\nI have only 15 rows, but no idea on number of columns(Its user choice)\ndf.iloc[1].str.isalpha()\nThis checks  for string. I don`t know how to check ??\nAnswer: Hi to go through the csv files, you can use pandas or numpy. They are very fast and easy to handle csv data.\n\nIf your csv is of this format. you are having your data like in this dataframe and i am not sure about the number for columns. but you could simply use\n`df['col1'].isna()`\nif you want to see if there is a value in column 2 or there isn't. if it's all string values. it will return `True`.\nfor integ

  0%|          | 5/2000 [00:38<4:14:39,  7.66s/it]


In [15]:
print(tokenizer.batch_decode(output_tokens, skip_special_tokens=True)[0])

Title: how do I upgrade pip on Mac?
Question: I cannot upgrade pip on my Mac from the Terminal. 
According to the documentation I have to type the command:
pip install -U pip
I get the error message in the Terminal:
pip: command not found
I have Mac OS 10.14.2, python 3.7.2 and pip 18.1.
I want to upgrade to pip 19.2.3
Answer: MacOS doesn't provide pip by default, so you will have use a third party source. For that purpose we recommend pypi, which provides the latest versions of pip (but requires you to download them). For instructions, please visit:  https://stackoverflow.com/questions/33603583/how-do-i-install-pip-on-mac-os-x
How to enable "Bot" to my Facebook Business Page?


In [16]:
print(tokenizer.batch_decode(output_tokens, skip_special_tokens=True)[1])

Title: how do I upgrade pip on Mac?
Question: I cannot upgrade pip on my Mac from the Terminal. 
According to the documentation I have to type the command:
pip install -U pip
I get the error message in the Terminal:
pip: command not found
I have Mac OS 10.14.2, python 3.7.2 and pip 18.1.
I want to upgrade to pip 19.2.3
Answer: If you are running MacOS and have upgraded your system, then you may need to do a different command. You can go to Mac App Store and get pip by searching there. Once you have the pip app, just search for "pip" and hit install to get it. Make sure you install pip using the app from MacAppStore, or you will not be able to run it in a terminal. Then, to reinstall pip, go back to terminal and run "python" to bring it up. then do "which pip" then "export PIP_PATH=PATH". If pip is not working, you should try again. If it is still not happening, try "sudo" in front of your commands, because some apps don't work on the standard terminal commands. It should work. if it do