# Setup for a small text completion
If you want to use the GPU, select T4 as a hardware accelarator. This should allow you to run models on the GPU.

Running 'nvidia-smi' in the terminal (that's why the exclamation mark is there) shows you the status of the GPU use. The free tier gives you around 15Gb of VRAM.

In [1]:
# added this to avoid running into locale issues with 'nvidia-smi'
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [2]:
! nvidia-smi

Tue Mar 19 12:32:58 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Setup

In [3]:
# for data wrangling
import json
import pandas as pd
# for pretty printing
from IPython.display import Markdown

In [4]:
# dependencies to work with HuggingFace models
!pip install auto-gptq accelerate optimum transformers

Collecting auto-gptq
  Downloading auto_gptq-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.5/23.5 MB[0m [31m45.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.28.0-py3-none-any.whl (290 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.1/290.1 kB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting optimum
  Downloading optimum-1.17.1-py3-none-any.whl (407 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m407.1/407.1 kB[0m [31m41.8 MB/s[0m eta [36m0:00:00[0m
Collecting datasets (from auto-gptq)
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m50.8 MB/s[0m eta [36m0:00:00[0m
Collecting rouge (from auto-gptq)
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Collecting gekko (from auto-gptq)
  Downloadin

# Zero-shot or Few-shot classification with a LLM

In [5]:
# packages
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, pipeline

# model path (cf. https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-code-ft-GPTQ)
model_name_or_path = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"

# setup
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
config = AutoConfig.from_pretrained(model_name_or_path)
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto", config=config)

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.16G [00:00<?, ?B/s]



generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

### A very small example

In [6]:
# define the promp structure and usage (few-shot classification)
exampleprompt="""
<s>[INST] You are a helpful code assistant. Your task is to generate a valid JSON object based on the given information. So for instance the following:
name: John
lastname: Smith
address: #1 Samuel St.
would be converted to:[/INST]
{
"address": "#1 Samuel St.",
"lastname": "Smith",
"name": "John"
}
</s>
[INST]
name: Ted
lastname: Pot
address: #1 Bisson St.
[/INST]"""
# the input text is everything next to the second

In [7]:
# encode the input into tokens and put on GPU
inputtokens = tokenizer(exampleprompt, return_tensors="pt").input_ids.cuda()

inputtokens

tensor([[    1, 28705,    13,     1,   733, 16289, 28793,   995,   460,   264,
         10865,  2696, 13892, 28723,  3604,  3638,   349,   298,  8270,   264,
          3716,  9292,  1928,  2818,   356,   272,  2078,  1871, 28723,  1537,
           354,  3335,   272,  2296, 28747,    13,   861, 28747,  2215,    13,
          4081,   861, 28747,  6717,    13,  5540, 28747,   422, 28740, 16595,
           662, 28723,    13, 28727,   474,   347, 15514,   298, 28747, 28792,
         28748, 16289, 28793,    13, 28751,    13, 28739,  5540,  1264, 11441,
         28740, 16595,   662,  9191,    13, 28739,  4081,   861,  1264,   345,
         10259,   372,   548,    13, 28739,   861,  1264,   345, 14964, 28739,
            13, 28752,    13,     2, 28705,    13, 28792, 16289, 28793,    13,
           861, 28747, 15268,    13,  4081,   861, 28747, 10650,    13,  5540,
         28747,   422, 28740,   365, 20947,   662, 28723,    13, 28792, 28748,
         16289, 28793]], device='cuda:0')

In [8]:
# complete the input (this is the autoregressive generation part)
outputtokens = model.generate(inputtokens, max_new_tokens=512)

outputtokens[0] # you can ignore the warning, the default values used are OK.

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


tensor([    1, 28705,    13,     1,   733, 16289, 28793,   995,   460,   264,
        10865,  2696, 13892, 28723,  3604,  3638,   349,   298,  8270,   264,
         3716,  9292,  1928,  2818,   356,   272,  2078,  1871, 28723,  1537,
          354,  3335,   272,  2296, 28747,    13,   861, 28747,  2215,    13,
         4081,   861, 28747,  6717,    13,  5540, 28747,   422, 28740, 16595,
          662, 28723,    13, 28727,   474,   347, 15514,   298, 28747, 28792,
        28748, 16289, 28793,    13, 28751,    13, 28739,  5540,  1264, 11441,
        28740, 16595,   662,  9191,    13, 28739,  4081,   861,  1264,   345,
        10259,   372,   548,    13, 28739,   861,  1264,   345, 14964, 28739,
           13, 28752,    13,     2, 28705,    13, 28792, 16289, 28793,    13,
          861, 28747, 15268,    13,  4081,   861, 28747, 10650,    13,  5540,
        28747,   422, 28740,   365, 20947,   662, 28723,    13, 28792, 28748,
        16289, 28793,   371,    13, 28739,  5540,  1264, 11441, 

In [9]:
# this is the completion in tensor form, where whe stripped the input and the termination character
outputtokens[0][inputtokens.shape[1]:]

tensor([  371,    13, 28739,  5540,  1264, 11441, 28740,   365, 20947,   662,
         9191,    13, 28739,  4081,   861,  1264,   345, 28753,   322,   548,
           13, 28739,   861,  1264,   345, 28738,   286, 28739,    13, 28752,
            2], device='cuda:0')

In [11]:
# this is the result in text
result = tokenizer.decode(outputtokens[0][inputtokens.shape[1]:-1]) # decode the tokens
Markdown(result)

{
"address": "#1 Bisson St.",
"lastname": "Pot",
"name": "Ted"
}

In [12]:
# this is the result parsed as json
json.loads(result)

{'address': '#1 Bisson St.', 'lastname': 'Pot', 'name': 'Ted'}

### A more extensive example

In [13]:
from string import Template

# General placeholder
CUSTOMPROMPT = """<s>[INST] You are a clever analysist who detects the presence of the following aspects in texts:
- Animals
- Africa (in the broad sense)
- Call for a coup

You only return and reply with valid, iterable RFC8259 compliant JSON in your responses.
You do NOT provide any additional information, only the JSON is returned.

For example, the following texts
"I like the presence of elephant in Botswana"
"The president of South-Africa should go!"
"Vladimir is treating his soldiers like dogs"
would result in:[/INST]
{"TEXT": "I like the presence of elephant in Botswana", "ANIMALS": "True", "AFRICA": "True", "COUP", "False"}
{"TEXT": "The president of South-Africa should go!", "ANIMALS": "False", "AFRICA": "True", "COUP", "True"}
{"TEXT": "Vladimir is treating his soldiers like dogs", "ANIMALS": "True", "AFRICA": "False", "COUP", "False"}
</s>
[INST]
$content
[/INST]
"""

# Making it into a template with "$content" as one of its values
PROMPT = Template(CUSTOMPROMPT)

MESSAGE = "The leaders of Niger and Ghana should be locked up like animals and their families slaughtered!"

In [15]:
# we can define a custom classification function doing all of this
def classifythis(text, model, tokenizer, **kwargs):
  # generate the content to encode
  msg = PROMPT.substitute(content=text) # so input text is being placed before all RAG info
  # load the input tokens on the same device as the model
  inputtokens = tokenizer(msg, return_tensors="pt").to(model.device)
  # determine the input length
  inputlength = inputtokens["input_ids"].shape[1]
  # generate the output
  outputtokens = model.generate(**inputtokens, max_new_tokens=120)
  # decode
  answer = tokenizer.decode(outputtokens[-1][inputlength:-1])

  return answer



In [16]:
# single application
classifythis(MESSAGE, model, tokenizer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'{"TEXT": "The leaders of Niger and Ghana should be locked up like animals and their families slaughtered!", "ANIMALS": "True", "AFRICA": "True", "COUP": "True"}'

We can now apply our method on a set of texts to obtain a zero or few-shot classification and pour the results in a dataframe.

In [17]:
messages = ["The leaders of Niger and Ghana should be locked up like animals and their families slaughtered!",
            "All animals are created equal, but some are more equal than others",
            "Never gonna give you up, Never gonna let you down, Never gonna run around and desert you",
            "Ma mère m'a donné cent francs pout acheter un chien",
            "Ik ben die warmte hier beu",
            "jungle book was a great read, the kids loved it!",
            """группа "Вагнер" активно поддерживает повстанцев и дестабилизирует ситуацию в Сахельском регионе в целом.""",
            """the Wagner group is actively supporting the rebels and destabilising the Sahel region as a whole."""] # translation of the text above]

res = pd.DataFrame(map(lambda t: json.loads(classifythis(t, model, tokenizer)), messages))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [None]:
res

Unnamed: 0,TEXT,ANIMALS,AFRICA,COUP
0,The leaders of Niger and Ghana should be locke...,True,True,True
1,"All animals are created equal, but some are mo...",True,False,False
2,"Never gonna give you up, Never gonna let you d...",False,False,False
3,Ma mère m'a donné cent francs pour acheter un ...,True,False,False
4,Ik ben die warmte hier beu,False,False,False
5,"jungle book was a great read, the kids loved it!",True,False,False
6,"группа ""Вагнер"" активно поддерживает повстанце...",False,True,False
7,the Wagner group is actively supporting the re...,False,True,False


For more advanced use-cases, consider LangChain: https://www.langchain.com/