Installing dependencies. You might need to tweak the CMAKE_ARGS for the `llama-cpp-python` pip package.

In [1]:
# GPU llama-cpp-python; Starting from version llama-cpp-python==0.1.79, it supports GGUF
# !CMAKE_ARGS="-DLLAMA_CUBLAS=off " pip install 'llama-cpp-python>=0.1.79' --force-reinstall --upgrade --no-cache-dir
# For download the models
# !pip install huggingface_hub
# !pip install datasets

We start by downloading an instruction-finetuned Mistral model.

In [2]:
from huggingface_hub import hf_hub_download

model_name_or_path = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
model_basename = "mistral-7b-instruct-v0.2.Q6_K.gguf"
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

# This config has been tested on an RTX 3080 (VRAM of 16GB).
# you might need to tweak with respect to your hardware.
from llama_cpp import Llama
lcpp_llm = Llama(
    model_path=model_path,
    n_threads=4, # CPU cores
    n_batch=8000, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    # n_gpu_layers=32, # Change this value based on your model and your GPU VRAM pool.
    n_ctx=8192, # Context window
    logits_all=True
)

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /home/mickus/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.2-GGUF/snapshots/3a6fbf4a41a1d52e415a4958cde6856d34b2db93/mistral-7b-instruct-v0.2.Q6_K.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_mode

In [3]:
records = [
    {'id': 1, 'lang': 'EN', 'prompt': f"During which centuries did William II of Angoulême live?"}, 
    {'id': 1, 'lang': 'FR', 'prompt': f"Durant quels siècles a vécu Guillaume IV d'Angoulême?"},
    {'id': 2, 'lang': 'EN', 'prompt': f"Where is the Theban tomb TT171 located?"},
    {'id': 2, 'lang': 'FR', 'prompt': f"Où se trouve la tombe thébaine TT171?"},
    {'id': 3, 'lang': 'EN', 'prompt': f"What is Brahim Boudrali famous for?"},
    {'id': 3, 'lang': 'FR', 'prompt': f"Pour quelle raison Brahim Boudrali est-il célèbre?"},
    {'id': 4, 'lang': 'EN', 'prompt': f"Explain in three sentences what was the Reichrat of the Weimar Republic."},
    {'id': 4, 'lang': 'FR', 'prompt': f"Explique en trois phrases ce qu'était le Reichsrat de la République de Weimar."},
    {'id': 5, 'lang': 'EN', 'prompt': f"Where is Kristjan Ilves from and what sport does he play?"},
    {'id': 5, 'lang': 'FR', 'prompt': f"D'où vient Kristjan Ilves et quel sport pratique-t-il?"},
]

import tqdm.notebook as tqdm

for record in tqdm.tqdm(records):
    message = record['prompt']
    prompt = f"<s>[INST] {message} [/INST]"
    
    response = lcpp_llm(
        prompt=prompt,
        temperature= 0.0,
        logprobs=1,
        max_tokens=512,
    )
    record['text'] = response['choices'][0]['text']
    record['tokens'] = response['choices'][0]['logprobs']['tokens']
    record['logits'] = response['choices'][0]['logprobs']['token_logprobs']

  0%|          | 0/10 [00:00<?, ?it/s]


llama_print_timings:        load time =    6255.50 ms
llama_print_timings:      sample time =      45.79 ms /    84 runs   (    0.55 ms per token,  1834.46 tokens per second)
llama_print_timings: prompt eval time =    6255.43 ms /    21 tokens (  297.88 ms per token,     3.36 tokens per second)
llama_print_timings:        eval time =   41725.14 ms /    83 runs   (  502.71 ms per token,     1.99 tokens per second)
llama_print_timings:       total time =   48510.63 ms /   104 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =    6255.50 ms
llama_print_timings:      sample time =      46.48 ms /   101 runs   (    0.46 ms per token,  2172.84 tokens per second)
llama_print_timings: prompt eval time =    7268.34 ms /    24 tokens (  302.85 ms per token,     3.30 tokens per second)
llama_print_timings:        eval time =   47700.00 ms /   100 runs   (  477.00 ms per token,     2.10 tokens per second)
llama_print_timings:       total time =   55573.77 ms /   124 

In [4]:
import pandas as pd
df = pd.DataFrame.from_records(records)
df

Unnamed: 0,id,lang,prompt,text,tokens,logits
0,1,EN,During which centuries did William II of Angou...,"William II, also known as Guillaume II or ""Wi...","[ William, II, ,, also, known, as, Gu, il...","[-0.00025233423, -3.5285328e-05, -0.6388749, -..."
1,1,FR,Durant quels siècles a vécu Guillaume IV d'Ang...,"Guillaume IV d'Angoulême, qui est également c...","[ Gu, illa, ume, IV, d, ', Ang, oul, ême, ,,...","[-0.0025836201, -3.349725e-05, -2.980228e-06, ..."
2,2,EN,Where is the Theban tomb TT171 located?,"The Theban tomb TT171, also known as the ""Tom...","[ The, The, ban, tomb, T, T, 1, 7, 1, ,, a...","[-0.16108914, -0.058819428, -9.894322e-06, -0...."
3,2,FR,Où se trouve la tombe thébaine TT171?,"The Theban Tombs, also known as the Necropole...","[ The, The, ban, Tom, bs, ,, also, known, ...","[-0.6857958, -0.09681804, -1.2993728e-05, -0.3..."
4,3,EN,What is Brahim Boudrali famous for?,Brahim Boudrali is a French-Algerian professi...,"[ Bra, him, B, oud, ral, i, is, a, French,...","[-0.0685153, -9.655905e-06, -0.0004923323, -0...."
5,3,FR,Pour quelle raison Brahim Boudrali est-il célè...,Brahim Boudrali n'est pas une personne célèbr...,"[ Bra, him, B, oud, ral, i, n, ', est, pas,...","[-0.012253834, -1.239769e-05, -0.00042822727, ..."
6,4,EN,Explain in three sentences what was the Reichr...,"The Reichrat, also known as the Reichstag's P...","[ The, Reich, rat, ,, also, known, as, th...","[-0.000696893, -3.6596583e-05, -0.038970422, -..."
7,4,FR,Explique en trois phrases ce qu'était le Reich...,Le Reichsrat était l'assemblée législatrice d...,"[ Le, Reich, s, rat, était, l, ', assem, bl...","[-0.24649408, -0.00039021508, -0.094294235, -3..."
8,5,EN,Where is Kristjan Ilves from and what sport do...,Kristjan Ilves is a hockey player. He was bor...,"[ Krist, jan, Il, ves, is, a, hockey, pla...","[-0.018138371, -0.0032320188, -0.0005134218, -..."
9,5,FR,D'où vient Kristjan Ilves et quel sport pratiq...,Kristjan Ilves est un joueur estonien de hock...,"[ Krist, jan, Il, ves, est, un, jou, eur, ...","[-0.00062112586, -0.009923174, -0.0024308201, ..."


In [5]:
with open('pilot-data.json', 'w') as ostr:
    print(df.to_json(lines=False, orient='records'), file=ostr)