# [English-to-Hungarian translator LLM](https://github.com/MartinKondor/EnglishToHungarianLLM)

In [1]:
# @markdown # Settings
model_id = "meta-llama/Llama-2-7b-hf"
context_size = 4096

# Setup

In [2]:
# !pip install -q transformers sentencepiece accelerate bitsandbytes py7zr scipy peft fire torch_tb_profiler ipywidgets
!apt install nvidia-cuda-toolkit
!export CUDA_LAUNCH_BLOCKING=1

!pip install -q git+https://github.com/huggingface/transformers
!pip install -q datasets accelerate sentencepiece py7zr scipy peft fire torch_tb_profiler ipywidgets
!pip install -q protobuf==3.20 bitsandbytes==0.40.0

!pip install -q --upgrade git+https://github.com/trisongz/lazyops
!pip install -q git+https://github.com/MartinKondor/jsonl.git
!pip install -q --upgrade typing-extensions

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  fonts-dejavu-core fonts-dejavu-extra libaccinj64-11.5 libatk-wrapper-java libatk-wrapper-java-jni
  libbabeltrace1 libcub-dev libcublas11 libcublaslt11 libcudart11.0 libcufft10 libcufftw10
  libcuinj64-11.5 libcupti-dev libcupti-doc libcupti11.5 libcurand10 libcusolver11 libcusolvermg11
  libcusparse11 libdebuginfod-common libdebuginfod1 libegl-dev libfontenc1 libgail-common libgail18
  libgl-dev libgl1-mesa-dev libgles-dev libgles1 libglvnd-core-dev libglvnd-dev libglx-dev
  libgtk2.0-0 libgtk2.0-bin libgtk2.0-common libipt2 libnppc11 libnppial11 libnppicc11 libnppidei11
  libnppif11 libnppig11 libnppim11 libnppist11 libnppisu11 libnppitc11 libnpps11 libnvblas11
  libnvidia-compute-495 libnvidia-compute-510 libnvidia-compute-525 libnvidia-ml-dev libnvjpeg11
  libnvrtc-builtins11.5 libnvrtc11.2 libnvtoolsext1 libnvvm4 libopengl-dev libr

In [3]:
!git clone https://github.com/artidoro/qlora.git
!mv /content/qlora/* /content/
!pip install -r requirements.txt

Cloning into 'qlora'...
remote: Enumerating objects: 578, done.[K
remote: Total 578 (delta 0), reused 0 (delta 0), pack-reused 578[K
Receiving objects: 100% (578/578), 30.63 MiB | 15.16 MiB/s, done.
Resolving deltas: 100% (369/369), done.
Updating files: 100% (274/274), done.
Collecting transformers==4.31.0 (from -r requirements.txt (line 2))
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m62.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft==0.4.0 (from -r requirements.txt (line 3))
  Downloading peft-0.4.0-py3-none-any.whl (72 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate==0.21.0 (from -r requirements.txt (line 4))
  Downloading accelerate-0.21.0-py3-none-any.whl (244 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m33.5 MB/s[0m eta [

In [4]:
import locale
import gc

import torch
from huggingface_hub import notebook_login, HfApi, list_models, snapshot_download

gc.collect()
torch.cuda.empty_cache()

locale.getpreferredencoding = lambda: "UTF-8"
hf_token = "..."
!huggingface-cli login --token $hf_token

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


# Model

In [5]:
import os
from huggingface_hub import snapshot_download


# Download as a snapshot
snapshot_download(model_id)
snapshotdir = "/root/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-hf/snapshots/"
snapshotdir += os.listdir(snapshotdir)[0]

# Move snapshot files
!mkdir models
!mkdir models/XB
!mv $snapshotdir/* models/XB/

# Convert links to files
b = "/content/models/XB"
for file in os.listdir(b):
  dest = f"{b}/{file}"
  source = snapshotdir + "/" + os.readlink(dest)
  !mv $source $dest

!mv models models_hf

Fetching 17 files:   0%|          | 0/17 [00:00<?, ?it/s]

Responsible-Use-Guide.pdf:   0%|          | 0.00/1.25M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

USE_POLICY.md:   0%|          | 0.00/4.77k [00:00<?, ?B/s]

LICENSE.txt:   0%|          | 0.00/7.02k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

In [6]:
from transformers import LlamaForCausalLM, LlamaTokenizer


model_id = "./models_hf/XB"
tokenizer = LlamaTokenizer.from_pretrained(model_id)

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


# Dataset

In [7]:
from typing import List, Dict
from jsonl import jsonl
from lazyops.utils import logger


!git clone https://github.com/MartinKondor/EnglishToHungarianLLM.git
dataset: List[Dict[str, str]] = jsonl.load("EnglishToHungarianLLM/data/data.jsonl")
logger.info(f"{len(dataset)} samples are loaded")

Cloning into 'EnglishToHungarianLLM'...
remote: Enumerating objects: 30, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 30 (delta 8), reused 10 (delta 1), pack-reused 0[K
Receiving objects: 100% (30/30), 22.15 MiB | 10.23 MiB/s, done.
Resolving deltas: 100% (8/8), done.
[1mINFO    [0m [32m2023-11-21 23:00:43.651[0m: [36m__main__[0m:[36m<cell line: 8>[0m: [1m39 samples are loaded[0m


In [8]:
from tqdm import tqdm


inst_prompt_template = """
Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
""".strip()
base_instruction = "You are an expert English-Hungarian translator. Translate the English sentences in the Input to Hungarian."
train_samples = []

for data in tqdm(dataset):
  train_samples.append({
      "instruction": base_instruction,
      "input": data["en"],
      "output": data["hu"]
  })

100%|██████████| 39/39 [00:00<00:00, 342930.52it/s]


In [9]:
test_samples = [
    {
        "instruction": base_instruction,
        "input": "No one knew where he had gone or what he had done.",
        "output": "Senki se tudta, hová lett, mit müvelt.",
    },
    {
        "instruction": base_instruction,
        "input": "They only suspected from what happened.",
        "output": "Csak gyanitják a később történtekből.",
    },
    {
        "instruction": base_instruction,
        "input": "This is suspected because at about two o'clock in the afternoon a terrible tragedy occurred in this place.",
        "output": "Ezt onnan gyanitják, mert délután két óra tájban szörnyü tragikus eset történt ezen a helyen."
    },
]

In [10]:
import json

with open("test.json", "w+") as file:
  json.dump(test_samples, file)

with open("train.json", "w+") as file:
  json.dump(train_samples, file)

# Training

In [11]:
max_iters = 50
!python qlora.py --model_name_or_path /content/models_hf/XB --dataset /content/train.json --max_steps $max_iters --num_train_epochs 1 --output_dir ./output


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so...
2023-11-21 23:00:47.791019: E tensorflow/compiler/xla/st

In [12]:
!ls ./output
print(42*"-")
!ls ./output/completed

all_results.json  checkpoint-50  completed  metrics.json  trainer_state.json  train_results.json
------------------------------------------
./output/completed


# Inference example

In [13]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer, BitsAndBytesConfig, GenerationConfig
from peft import PeftModel
from peft.tuners.lora import LoraLayer


def generate_prompt(sample: dict) -> str:
  return """
Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
  """.strip().format(instruction=sample["instruction"], input=sample["input"])


def generate(model, prompt, max_new_tokens=512, top_p=0.9, temperature=1.0):
    inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
    outputs = model.generate(
        **inputs,
        generation_config=GenerationConfig(
            do_sample=True,
            max_new_tokens=max_new_tokens,
            top_p=top_p,
            temperature=temperature,
        )
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


adapter_path = f"/content/output/checkpoint-{max_iters}/adapter_model"
tokenizer = LlamaTokenizer.from_pretrained(model_id)
tokenizer.bos_token_id = 1

# Load the model (use bf16 for faster inference)
model = LlamaForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map={"": 0},
    load_in_4bit=True,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type='nf4',
    )
)

model = PeftModel.from_pretrained(model, adapter_path)
model.eval()

for test_sample in test_samples:
  prompt = generate_prompt(test_sample)
  predicted = generate(model, prompt)
  logger.info(f"Output:\n{test_sample['output']}")
  logger.info(f"Predicted:\n{predicted}")
  print(42*"-")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

------------------------------------------[1mINFO    [0m [32m2023-11-21 23:09:43.228[0m: [36m__main__[0m:[36m<cell line: 57>[0m: [1mOutput:
Senki se tudta, hová lett, mit müvelt.[0m

[1mINFO    [0m [32m2023-11-21 23:09:43.229[0m: [36m__main__[0m:[36m<cell line: 57>[0m: [1mPredicted:
Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
You are an expert English-Hungarian translator. Translate the English sentences in the Input to Hungarian.

### Input:
No one knew where he had gone or what he had done.

### Response:
Nemo volt elé, hol csöbbent s mirált.[0m
------------------------------------------[1mINFO    [0m [32m2023-11-21 23:09:45.987[0m: [36m__main__[0m:[36m<cell line: 57>[0m: [1mOutput:
Csak gyanitják a később történtekből.[0m

[1mINFO    [0m [32m2023-11-21 23:09:45.987[0m: [36m__main__[0m:[36m<cell line: 57>[0m: [1m