Based on https://huggingface.co/docs/transformers/v4.41.0/en/llm_tutorial

In [1]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import pipeline
from transformers import AutoTokenizer, LlamaForCausalLM, AutoModelForCausalLM, BitsAndBytesConfig

In [None]:
from huggingface_hub import login
login()

In [2]:
from numba import cuda
device = cuda.get_current_device()
device.reset()

In [3]:
# model_id = "meta-llama/Meta-Llama-3-8B"
# model_id = "mistralai/Mistral-7B-v0.1"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
# model_id = "microsoft/Phi-3-mini-128k-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    load_in_8bit=True,
    attn_implementation="sdpa",
)

tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
model_inputs = tokenizer(
    ["'The soup is hot' translated to the Southern Nigerian language Obolo is", "'The soup is hot' translated to the Southern Indian language Tamil is"], return_tensors="pt", padding=True
).to("cuda")
generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


 "'The soup is hot' translated to the Southern Indian language Tamil is 'Kootu varum'. In this context, 'Kootu' means soup and 'varum' means is hot.\nIn the Southern Indian language Telugu, 'The soup is hot' is translated to 'Kootu pedata'. Here, 'Kootu' means soup and 'pedata' means is hot.\nIn the Southern Indian language Malayalam, 'The soup is hot' is translated to 'Kootu kaanum'. Here, 'Kootu"]

In [6]:
tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
model_inputs = tokenizer(
    ["End your answer to the following question with the tag '[END]' and do not provide anything but the answer to the question. What is the French translation of 'The small fox loved croissants.'? [BEGIN]",
     "End your answer to the following question with the tag '[END]' and do not provide anything but the answer to the question. What is the Obolo translation of 'The small bird loved grass.'? [BEGIN]"], return_tensors="pt", padding=True
).to("cuda")
generated_ids = model.generate(**model_inputs, max_new_tokens=100)
tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


['End your answer to the following question with the tag \'[END]\' and do not provide anything but the answer to the question. What is the French translation of \'The small fox loved croissants.\'? [BEGIN] The French translation of \'The small fox loved croissants.\' is \'Le petit renard aimait les croissants.\' [END]... Read more →\nPosted at 11:30 AM in French, Language Translation | Permalink | Comments (0)\nWhat is the French translation of "The small fox loved croissants."?\n[BEGIN]\nThe French translation of "The small fox loved croissants." is "Le petit renard aimait les croissants."\n[',
 "End your answer to the following question with the tag '[END]' and do not provide anything but the answer to the question. What is the Obolo translation of 'The small bird loved grass.'? [BEGIN] The Obolo translation of 'The small bird loved grass.' is 'Ibibi ebelebi ebele.' [END]...\n\n### Other questions from the same topic\n\nWhat is the Obolo translation of 'The small bird loved grass.'? 

In [3]:
# Prepare the input as before
chat = [
    {"role": "system", "content": "You are an expert translator in many languages. You will simply answer the given translation question, which starts with a [BEGIN] tag and ends with a [END] tag. Do not repeat the question or provide any other text that is not the translation of the provided text."},
    {"role": "user", "content": "What is the French translation of 'The small fox loved croissants.'? [BEGIN]"}
]

# 1: Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, load_in_8bit=True, attn_implementation="sdpa")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", padding_side="left")
tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default

# 2: Apply the chat template
formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
# print("Formatted chat:\n", formatted_chat)

# 3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False, padding=True).to("cuda")
# Move the tokenized inputs to the same device the model is on (GPU/CPU)
# inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
# print("Tokenized inputs:\n", inputs)

# 4: Generate text from the model
generated_ids = model.generate(**inputs, max_new_tokens=512)
# print("Generated tokens:\n", generated_ids)

# 5: Decode the output back to a string
decoded_output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

print("Decoded output:\n", decoded_output[0])

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


Decoded output:
 system

You are an expert translator in many languages. You will simply answer the given translation question, which starts with a [BEGIN] tag and ends with a [END] tag. Do not repeat the question or provide any other text that is not the translation of the provided text.user

What is the French translation of 'The small fox loved croissants.'? [BEGIN]assistant

Le renard petit aimait les croissants.


In [4]:
chat.append(
    {"role": "user", "content": "What is the Obolo translation of 'The small bird loved grass.'? [BEGIN]"}
)

formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False, padding=True).to("cuda")
generated_ids = model.generate(**inputs, max_new_tokens=512)
decoded_output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

print(decoded_output[0])

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


system

You are an expert translator in many languages. You will simply answer the given translation question, which starts with a [BEGIN] tag and ends with a [END] tag. Do not repeat the question or provide any other text that is not the translation of the provided text.user

What is the French translation of 'The small fox loved croissants.'? [BEGIN]user

What is the Obolo translation of 'The small bird loved grass.'? [BEGIN]assistant

Ibo: Nkpo mkpo na-akpa ọkụ. [END]


In [7]:
chat.append(
    {"role": "user", "content": "What is the Obolo translation of 'one, two, three, four, five'? [BEGIN]"}
)

formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False, padding=True).to("cuda")
generated_ids = model.generate(**inputs, max_new_tokens=512)
decoded_output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

print(decoded_output[0])

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


system

You are an expert translator in many languages. You will simply answer the given translation question, which starts with a [BEGIN] tag and ends with a [END] tag. Do not repeat the question or provide any other text that is not the translation of the provided text.user

What is the French translation of 'The small fox loved croissants.'? [BEGIN]user

What is the Obolo translation of 'The small bird loved grass.'? [BEGIN]user

What is the Obolo translation of 'one, two, three, four, five'? [BEGIN]user

What is the Obolo translation of 'one, two, three, four, five'? [BEGIN]user

What is the Obolo translation of 'one, two, three, four, five'? [BEGIN]assistant

Abi, abasi, abakwa, abana, abana. [END]


In [8]:
chat.append(
    {"role": "user", "content": "What is the Tamil translation of 'The man is young.'? [BEGIN]"}
)

formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False, padding=True).to("cuda")
generated_ids = model.generate(**inputs, max_new_tokens=512)
decoded_output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

print(decoded_output[0])

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


system

You are an expert translator in many languages. You will simply answer the given translation question, which starts with a [BEGIN] tag and ends with a [END] tag. Do not repeat the question or provide any other text that is not the translation of the provided text.user

What is the French translation of 'The small fox loved croissants.'? [BEGIN]user

What is the Obolo translation of 'The small bird loved grass.'? [BEGIN]user

What is the Obolo translation of 'one, two, three, four, five'? [BEGIN]user

What is the Obolo translation of 'one, two, three, four, five'? [BEGIN]user

What is the Obolo translation of 'one, two, three, four, five'? [BEGIN]user

What is the Tamil translation of 'The man is young.'? [BEGIN]assistant

ஆண் இளையவர்.


Now we want to do evaluation on of the model on our Bible dataset

In [3]:
from datasets import load_dataset
dataset = load_dataset("csv", data_files="data/v3.csv")

In [69]:
dataset

DatasetDict({
    train: Dataset({
        features: ['Verse', 'Obolo', 'English'],
        num_rows: 31097
    })
})

In [80]:
# def encode(examples):
#     return tokenizer(examples["Obolo"], examples["English"], padding="max_length")

# dataset = dataset.map(encode, batched=True)
# # dataset['train'][:2]

In [5]:
data_splits = dataset['train'].train_test_split(0.1)
data_splits

DatasetDict({
    train: Dataset({
        features: ['Verse', 'Obolo', 'English'],
        num_rows: 27987
    })
    test: Dataset({
        features: ['Verse', 'Obolo', 'English'],
        num_rows: 3110
    })
})

In [6]:
train, test = data_splits['train'], data_splits['test']
display(train, test)

Dataset({
    features: ['Verse', 'Obolo', 'English'],
    num_rows: 27987
})

Dataset({
    features: ['Verse', 'Obolo', 'English'],
    num_rows: 3110
})

Evaluation metrics

In [7]:
from evaluate import load, combine

In [8]:
chrf = load('chrf')
gleu = load('google_bleu')
rouge = load('rouge') 
bleu = load('bleu')
meteor = load('meteor')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\abhiv\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\abhiv\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\abhiv\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [9]:
metrics = combine([chrf, bleu, rouge, meteor, gleu])

In [6]:
# for now do a check that these metrics work on a default dataset
rt_data = load_dataset("rotten_tomatoes")
rt_data

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 8530
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
})

In [78]:
# def tokenization(example):
#     return tokenizer(example["text"])

# rt_data = rt_data.map(tokenization, batched=True)
# # rt_data.set_format(type="torch", columns=["input_ids", "token_type_ids", "attention_mask", "label"])
# rt_data

Map:   0%|          | 0/8530 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 8530
    })
    validation: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 1066
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 1066
    })
})

In [24]:
from datasets import load_metric
metric = load_metric('glue', 'mrpc')
chat = [
    {"role": "system", "content": "You are a sentiment analysis bot. Given an input sentence, output 0 if it has negative sentiment and 1 if it has positive sentiment. Your answer is always exactly 1 character long."}
]

length = 10
preds = []
for idx, line in enumerate(rt_data['test']['text'][:length]):
    chat += [{"role": "user", "content": line} ]
    formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False, padding=True).to("cuda")
    generated_ids = model.generate(**inputs, max_new_tokens=20)
    decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    preds.append(decoded_output[-1])

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [28]:
preds = [int(p) for p in preds]
print(preds)
refs = rt_data['test']['label'][:length]
print(refs)

[1, 1, 0, 1, 0, 1, 1, 1, 1, 1]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [30]:
final_score = metric.compute(predictions=preds, references=refs)
final_score

{'accuracy': 0.8, 'f1': 0.888888888888889}

In [57]:
beg_tok, end_tok = '[BEGIN]', '[END]'
chat = [
    {"role": "system", "content": f"You are an expert translator in Obolo and English. You will simply translate the given line from Obolo into English. Your answer will start with a {beg_tok} tag and end with a {end_tok} tag. Do not repeat the question or provide any other text that is not the translation of the provided text."}
]

length = 5
preds = []
for idx, line in enumerate(test['Obolo'][:length]):
    chat += [{"role": "user", "content": line} ]
    formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False, padding=True).to("cuda")
    generated_ids = model.generate(**inputs, max_new_tokens=512, do_sample=True)
    decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    idx_beg, idx_end = decoded_output.rindex(beg_tok), decoded_output.rindex(end_tok)
    preds.append(decoded_output[idx_beg+len(beg_tok):idx_end].strip())

refs  = test['English'][:length]

print(preds[:2])
print(refs[:2])

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


['The people of Awaji are being killed by the war, but they are not being defeated; the war is being fought, and we are being killed in it.', 'The elders are saying that they will not be happy if we do not bring back our brothers who went to the war; and they are saying that if we do not bring them back, the people of the land will not forgive us.']
['behold, happy [is] the man whom god correcteth: therefore despise not thou the chastening of the almighty:', 'so the priests and the prophets and all the people heard jeremiah speaking these words in the house of the lord.']


In [58]:
scores = metrics.compute(predictions=preds, references=refs)
scores

{'score': 20.631105127578163,
 'char_order': 6,
 'word_order': 0,
 'beta': 2,
 'bleu': 0.0,
 'precisions': [0.10869565217391304, 0.01675977653631285, 0.0, 0.0],
 'brevity_penalty': 1.0,
 'length_ratio': 1.936842105263158,
 'translation_length': 184,
 'reference_length': 95,
 'rouge1': 0.11044534412955465,
 'rouge2': 0.012698412698412698,
 'rougeL': 0.083903990746096,
 'rougeLsum': 0.08,
 'meteor': 0.12263219123316241,
 'google_bleu': 0.032577903682719546}

In [10]:
# model_id = "microsoft/Phi-3-mini-128k-instruct"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
# model_id = "google/gemma-1.1-2b-it"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, load_in_8bit=True, attn_implementation="sdpa")
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [11]:
beg_tok, end_tok = '[BEGIN]', '[END]'
chat = [
    {"role": "system", "content": f"You are an expert translator in Obolo and English. You will simply translate the given line from Obolo into English. Your answer will start with a {beg_tok} tag and end with a {end_tok} tag. Do not repeat the question or provide any other text that is not the translation of the provided text."}
]

length = 10
preds = []
for idx, line in enumerate(test['Obolo'][:length]):
    # chat += [{"role": ("user" if idx%2 else "assistant"), "content": line} ]
    chat += [{"role": "user", "content": line} ]
    formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False, padding=True).to("cuda")
    generated_ids = model.generate(**inputs, max_new_tokens=512, do_sample=True)
    decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    idx_beg, idx_end = decoded_output.rindex(beg_tok), decoded_output.rindex(end_tok)
    preds.append(decoded_output[idx_beg+len(beg_tok):idx_end].strip())

refs  = test['English'][:length]

# print(preds[:2])
# print(refs[:2])
scores = metrics.compute(predictions=preds, references=refs)
scores

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  attn_output = torch.nn.functional.scaled_dot_product_attention(
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


{'score': 17.093869231335184,
 'char_order': 6,
 'word_order': 0,
 'beta': 2,
 'bleu': 0.0,
 'precisions': [0.08349146110056926, 0.0038684719535783366, 0.0, 0.0],
 'brevity_penalty': 1.0,
 'length_ratio': 2.271551724137931,
 'translation_length': 527,
 'reference_length': 232,
 'rouge1': 0.07797474302118265,
 'rouge2': 0.000790513833992095,
 'rougeL': 0.07364187240348231,
 'rougeLsum': 0.0740951817499805,
 'meteor': 0.08617908313991111,
 'google_bleu': 0.021904761904761906}

In [13]:
list(zip(preds, refs))

[("My mother's friend's child is the one who has eaten the yam with the man's child.",
  'and over the host of the tribe of the children of issachar [was] nethaneel the son of zuar.'),
 ('Jonathan Ofofook says, "I\'ll take care of it! The burden of Solomon\'s debt is on me."',
  'and jonathan answered and said to adonijah, verily our lord king david hath made solomon king.'),
 ('Jonathan said, "What a shame! The king\'s daughter is in debt, and she is asking for Solomon\'s help."',
  'mattaniah, mattenai, and jaasau,'),
 ('There, I have taken the fruit, I have taken the fruit of the land, I have taken the fruit of the land and given to the king, I have given to the king and his son. There, I have taken the fruit, I have taken the fruit of the land.',
  'but the days will come, when the bridegroom shall be taken away from them, and then shall they fast in those days.'),
 ('“The children are playing, they are happy, but the adults are not at home, they are going to the farm, and I will g

In [14]:
scores

{'score': 17.093869231335184,
 'char_order': 6,
 'word_order': 0,
 'beta': 2,
 'bleu': 0.0,
 'precisions': [0.08349146110056926, 0.0038684719535783366, 0.0, 0.0],
 'brevity_penalty': 1.0,
 'length_ratio': 2.271551724137931,
 'translation_length': 527,
 'reference_length': 232,
 'rouge1': 0.07797474302118265,
 'rouge2': 0.000790513833992095,
 'rougeL': 0.07364187240348231,
 'rougeLsum': 0.0740951817499805,
 'meteor': 0.08617908313991111,
 'google_bleu': 0.021904761904761906}