<a href="https://colab.research.google.com/github/Andron00e/Fine-Tuning-project/blob/main/CsmTrnslt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q git+https://github.com/huggingface/peft.git git+https://github.com/huggingface/transformers.git
!pip install accelerate
!pip install bitsandbytes
!pip install sentencepiece
!pip install datasets
!pip uninstall wandb

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[0m

In [2]:
import torch
torch.cuda.is_available()

True

In [3]:
torch.cuda.empty_cache()

In [32]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("openlm-research/open_llama_3b")
tokenizer.pad_token_id = (
    0  # unk. we want this to be different from the eos token
)
tokenizer.padding_side = "left"
tokenizer

LlamaTokenizerFast(name_or_path='openlm-research/open_llama_3b', vocab_size=32000, model_max_length=2048, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'pad_token': '<unk>'}, clean_up_tokenization_spaces=False)

In [7]:
model = AutoModelForCausalLM.from_pretrained(
    "openlm-research/open_llama_3b",
    torch_dtype=torch.float16,
    load_in_8bit=True,
    device_map='auto',
)
print(model)

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 3200, padding_idx=0)
    (layers): ModuleList(
      (0-25): 26 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear8bitLt(in_features=3200, out_features=3200, bias=False)
          (k_proj): Linear8bitLt(in_features=3200, out_features=3200, bias=False)
          (v_proj): Linear8bitLt(in_features=3200, out_features=3200, bias=False)
          (o_proj): Linear8bitLt(in_features=3200, out_features=3200, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear8bitLt(in_features=3200, out_features=8640, bias=False)
          (up_proj): Linear8bitLt(in_features=3200, out_features=8640, bias=False)
          (down_proj): Linear8bitLt(in_features=8640, out_features=3200, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm

In [8]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

In [9]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [98]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
    "q_proj",
    "v_proj",
],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 2662400 || all params: 3429136000 || trainable%: 0.07764054852300988


# My dataset

In [11]:
from datasets import load_dataset

train_dataset = load_dataset("csv", data_files="concatenated_trans.csv", split = "train")

Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-0944c3b6d35127ec/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-0944c3b6d35127ec/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d. Subsequent calls will reuse this data.


In [12]:
train_dataset

Dataset({
    features: ['Unnamed: 0', 'system_prompt', 'question', 'response', 'tr question', 'tr response'],
    num_rows: 16891
})

In [13]:
train_dataset = train_dataset.rename_column("tr question", "tr_question")

In [14]:
train_dataset = train_dataset.rename_column("tr response", "tr_response")

In [15]:
train_dataset = train_dataset.remove_columns("Unnamed: 0")

In [16]:
train_dataset

Dataset({
    features: ['system_prompt', 'question', 'response', 'tr_question', 'tr_response'],
    num_rows: 16891
})

prompt

In [17]:
def create_prompt(system_prompt: str, question: str, response: str, tr_question: str, tr_response: str):
  if len(response) < 1:
    response = "Cannot Find Answer"
  else:
    response = response
  prompt_template = f"### CONTEXT\n{system_prompt}\n\n### QUESTION\n{question}\n\n### RESPONSE\n{response}\n\n### TRANSLATED QUESTION\n{tr_question}\n\n### TRANSLATED RESPONSE\n{tr_response}</s>"
  return prompt_template

In [18]:
def tokenize_function(samples):
  return tokenizer(create_prompt(samples['system_prompt'],
                                 samples['question'],
                                 samples['response'],
                                 samples['tr_question'],
                                 samples['tr_response']),
                                 padding='max_length',
                                 truncation=True
                   )



mapped_train_dataset = train_dataset.map(tokenize_function)

Map:   0%|          | 0/16891 [00:00<?, ? examples/s]

In [19]:
mapped_train_dataset

Dataset({
    features: ['system_prompt', 'question', 'response', 'tr_question', 'tr_response', 'input_ids', 'attention_mask'],
    num_rows: 16891
})

# Dataset for translation

In [42]:
from datasets import inspect_dataset, load_dataset_builder, load_dataset

dataset = load_dataset("wmt19", 'ru-en')

'''
inspect_dataset("wmt19")
builder = load_dataset_builder(
    local_path = "/content",
    language_pair=("en", "ru"),
    split = "train"
    #subsets={
    #    datasets.Split.TRAIN: ["commoncrawl_frde"],
    #    datasets.Split.VALIDATION: ["euelections_dev2019"],
    #},
'''

Downloading and preparing dataset wmt19/ru-en to /root/.cache/huggingface/datasets/wmt19/ru-en/1.0.0/29e210fae5690e843cae5dc43b53db36c4e02f927db50cd5235a22ab42dde90a...


Downloading data files:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/668M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/919M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/41.4M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/20.3M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.45G [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/38.7M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/6 [00:00<?, ?it/s]

Extracting data files: 0it [00:00, ?it/s]

Generating train split:   0%|          | 0/37492126 [00:00<?, ? examples/s]



Generating validation split:   0%|          | 0/3000 [00:00<?, ? examples/s]

Dataset wmt19 downloaded and prepared to /root/.cache/huggingface/datasets/wmt19/ru-en/1.0.0/29e210fae5690e843cae5dc43b53db36c4e02f927db50cd5235a22ab42dde90a. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

'\ninspect_dataset("wmt19")\nbuilder = load_dataset_builder(\n    local_path = "/content",\n    language_pair=("en", "ru"),\n    split = "train"\n    #subsets={\n    #    datasets.Split.TRAIN: ["commoncrawl_frde"],\n    #    datasets.Split.VALIDATION: ["euelections_dev2019"],\n    #},\n'

In [71]:
dataset

DatasetDict({
    train: Dataset({
        features: ['translation'],
        num_rows: 29993700
    })
    test: Dataset({
        features: ['translation'],
        num_rows: 7498426
    })
})

In [72]:
dataset = dataset["train"].train_test_split(test_size=0.2)
dataset

DatasetDict({
    train: Dataset({
        features: ['translation'],
        num_rows: 23994960
    })
    test: Dataset({
        features: ['translation'],
        num_rows: 5998740
    })
})

In [76]:
dataset["train"][5]

{'translation': {'en': '1 long blast repeated at least once a minute',
  'ru': '1 продолжительный звук, повторяющийся с интервалом не более одной минуты'}}

In [77]:
data = dataset["train"]
data

Dataset({
    features: ['translation'],
    num_rows: 23994960
})

In [82]:
sharded_dataset = data.shard(num_shards = 300, index=0)
sharded_dataset

Dataset({
    features: ['translation'],
    num_rows: 79984
})

In [85]:
sharded_dataset[5]

{'translation': {'en': '23 February 1991', 'ru': '23 февраля 1991 года'}}

In [92]:
dic = {'translation': {'en': '23 February 1991', 'ru': '23 февраля 1991 года'}}
dic['translation']['en']

'23 February 1991'

In [96]:
sharded_dataset[17]['translation']['en']

'In fact, the very opposite is true; they have only served to harden attitudes of one community against the other.'

In [97]:
from tqdm import tqdm

input = []
for i in tqdm(range(len(sharded_dataset))):
  input.append(sharded_dataset[i]['translation']['en'])

output = []
for i in tqdm(range(len(sharded_dataset))):
  output.append(sharded_dataset[i]['translation']['ru'])

print(len(input), '\n')
print(len(output))

100%|██████████| 79984/79984 [04:55<00:00, 270.52it/s]
100%|██████████| 79984/79984 [05:21<00:00, 248.97it/s]

79984 

79984





In [118]:
sharded_dataset = sharded_dataset.add_column("input", input)
sharded_dataset

Flattening the indices:   0%|          | 0/79984 [00:00<?, ? examples/s]

Dataset({
    features: ['translation', 'input'],
    num_rows: 79984
})

In [120]:
sharded_dataset=sharded_dataset.remove_columns("translation")

In [121]:
sharded_dataset = sharded_dataset.add_column("output", output)
sharded_dataset

Dataset({
    features: ['input', 'output'],
    num_rows: 79984
})

In [110]:
def create_prompt(input: str, output: str):
  if len(output) < 1:
    output = "Cannot Find Answer"
  else:
    output = output
  prompt_template = f"### INPUT\n{input}\n\n### OUTPUT\n{output}"
  return prompt_template

In [122]:
def tokenize_function(samples):
  return tokenizer(create_prompt(samples['input'], samples['output']), padding='max_length', truncation=True)



mapped_train_dataset = sharded_dataset.map(tokenize_function)

Map:   0%|          | 0/79984 [00:00<?, ? examples/s]

In [123]:
mapped_train_dataset

Dataset({
    features: ['input', 'output', 'input_ids', 'attention_mask'],
    num_rows: 79984
})

# Training and stuff

In [128]:
import transformers

trainer = transformers.Trainer(
    model=model,
    train_dataset=mapped_train_dataset,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=16,
        warmup_steps=100,
        max_steps=100,
        learning_rate=1e-3,
        fp16=True,
        logging_steps=1,
        output_dir='outputs',
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False

In [130]:
with torch.autocast("cuda"):
    trainer.train()

TypeError: ignored

for CausalLM

In [29]:
import transformers

trainer = transformers.Trainer(
    model=model,
    train_dataset=mapped_train_dataset,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=16,
        warmup_steps=100,
        max_steps=100,
        learning_rate=1e-3,
        fp16=True,
        logging_steps=1,
        output_dir='outputs',
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!

In [30]:
with torch.autocast("cuda"):
    trainer.train()

TypeError: ignored

In [6]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [19]:
HUGGING_FACE_USER_NAME = "Andron00e"

In [20]:
adapter_name = "YetAnother_Open-Llama-3B-LoRA-adapter"

In [21]:
model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{adapter_name}", use_auth_token=True)

adapter_model.bin:   0%|          | 0.00/10.7M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Andron00e/YetAnother_Open-Llama-3B-LoRA-adapter/commit/eb414491f1ee1115875da39f77580738a32f592b', commit_message='Upload model', commit_description='', oid='eb414491f1ee1115875da39f77580738a32f592b', pr_url=None, pr_revision=None, pr_num=None)

In [29]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = f"{HUGGING_FACE_USER_NAME}/{adapter_name}"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=False, torch_dtype=torch.bfloat16, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)


qa_model = PeftModel.from_pretrained(model, peft_model_id)

In [23]:
from IPython.display import display, Markdown

def make_inference(system_prompt, question):
  batch = tokenizer(f"### CONTEXT\n{system_prompt}\n\n### QUESTION\n{question}\n\n### RESPONSE\n\n### TRANSLATED RESPONSE\n", return_tensors='pt')

  #with torch.cuda.amp.autocast():
    #output_tokens = qa_model.generate(**batch, max_new_tokens=200)
  with torch.cuda.amp.autocast():
  batch = {k: v.to(qa_model.device) for k, v in batch.items()}
  output_tokens = qa_model.generate(**batch, max_new_tokens=200)

  display(Markdown((tokenizer.decode(output_tokens[0], skip_special_tokens=True))))

In [None]:
system_prompt = "Cheese is the best food."
question = "What is the best food?"

make_inference(system_prompt, question)

In [30]:
merged_model = qa_model.merge_and_unload()

In [31]:
merged_model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 3200, padding_idx=0)
    (layers): ModuleList(
      (0-25): 26 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=3200, out_features=3200, bias=False)
          (k_proj): Linear(in_features=3200, out_features=3200, bias=False)
          (v_proj): Linear(in_features=3200, out_features=3200, bias=False)
          (o_proj): Linear(in_features=3200, out_features=3200, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=3200, out_features=8640, bias=False)
          (up_proj): Linear(in_features=3200, out_features=8640, bias=False)
          (down_proj): Linear(in_features=8640, out_features=3200, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm(

In [32]:
tokenizer

LlamaTokenizerFast(name_or_path='openlm-research/open_llama_3b', vocab_size=32000, model_max_length=2048, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True)}, clean_up_tokenization_spaces=False)

In [33]:
merged_model_name = "YetAnother_Open-Llama-3B-LoRA"

merged_model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{merged_model_name}", use_auth_token=True)
tokenizer.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{merged_model_name}", use_auth_token=True)

pytorch_model.bin:   0%|          | 0.00/6.85G [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/534k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Andron00e/YetAnother_Open-Llama-3B-LoRA/commit/101108cebb8bdc8fa9e461c2d3758492da02b0cb', commit_message='Upload tokenizer', commit_description='', oid='101108cebb8bdc8fa9e461c2d3758492da02b0cb', pr_url=None, pr_revision=None, pr_num=None)

evaluation

In [10]:
import torch

In [11]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Andron00e/YetAnother_Open-Llama-3B-LoRA")
model = AutoModelForCausalLM.from_pretrained("Andron00e/YetAnother_Open-Llama-3B-LoRA", load_in_8bit=True, torch_dtype=torch.bfloat16, device_map='auto')

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [5]:
!pip install safetensors



In [15]:
from safetensors import safe_open

tensors = {}
with safe_open("model.safetensors", framework="pt", device=0) as f:
    for k in f.keys():
        tensors[k] = f.get_tensor(k)

In [14]:
import torch
from safetensors.torch import save_file

tensors = {
    "embedding": torch.zeros((2, 2)),
    "attention": torch.zeros((2, 3))
}
save_file(tensors, "model.safetensors")

In [25]:
from safetensors.torch import load_file

model.safetensors = load_file("./model.safetensors")
model.safetensors

{'attention': tensor([[0., 0., 0.],
         [0., 0., 0.]]),
 'embedding': tensor([[0., 0.],
         [0., 0.]])}

In [24]:
merged_model_name = "YetAnother_Open-Llama-3B-LoRA"
HUGGING_FACE_USER_NAME = "Andron00e"

model.safetensors.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{merged_model_name}", use_auth_token=True)

AttributeError: ignored

In [29]:
from huggingface_hub import upload_file

In [32]:
upload_file(path_or_fileobj="./model.safetensors",
    path_in_repo="model.safetensors",
    repo_id="Andron00e/YetAnother_Open-Llama-3B-LoRA")

model.safetensors:   0%|          | 0.00/184 [00:00<?, ?B/s]

'https://huggingface.co/Andron00e/YetAnother_Open-Llama-3B-LoRA/blob/main/model.safetensors'

In [33]:
!git clone https://github.com/EleutherAI/lm-evaluation-harness

fatal: destination path 'lm-evaluation-harness' already exists and is not an empty directory.


In [34]:
%cd lm-evaluation-harness

/content/lm-evaluation-harness


In [36]:
!pip install -e ".[multilingual]"

Obtaining file:///content/lm-evaluation-harness
  Preparing metadata (setup.py) ... [?25l[?25hdone
Installing collected packages: lm-eval
  Attempting uninstall: lm-eval
    Found existing installation: lm-eval 0.3.0
    Uninstalling lm-eval-0.3.0:
      Successfully uninstalled lm-eval-0.3.0
  Running setup.py develop for lm-eval
Successfully installed lm-eval-0.3.0


In [38]:
!python main.py \
    --model hf-causal \
    --model_args pretrained=Andron00e/YetAnother_Open-Llama-3B-LoRA \
    --tasks hellaswag \
    --device cuda:0

[dynet] random seed: 1234
[dynet] allocating memory: 32MB
[dynet] memory allocation done.
Selected Tasks: ['hellaswag']
Using device 'cuda:0'
Traceback (most recent call last):
  File "/content/lm-evaluation-harness/main.py", line 93, in <module>
    main()
  File "/content/lm-evaluation-harness/main.py", line 59, in main
    results = evaluator.simple_evaluate(
  File "/content/lm-evaluation-harness/lm_eval/utils.py", line 243, in _wrapper
    return fn(*args, **kwargs)
  File "/content/lm-evaluation-harness/lm_eval/evaluator.py", line 76, in simple_evaluate
    lm = lm_eval.models.get_model(model).create_from_arg_string(
  File "/content/lm-evaluation-harness/lm_eval/base.py", line 115, in create_from_arg_string
    return cls(**args, **args2)
  File "/content/lm-evaluation-harness/lm_eval/models/gpt2.py", line 85, in __init__
    self.model = transformers.AutoModelForCausalLM.from_pretrained(
  File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", 

In [1]:
!git clone https://github.com/EleutherAI/lm-evaluation-harness
%cd lm-evaluation-harness

fatal: destination path 'lm-evaluation-harness' already exists and is not an empty directory.
/content/lm-evaluation-harness


In [None]:
!pip install -e .

In [2]:
!pip install -e ".[multilingual]"

Obtaining file:///content/lm-evaluation-harness
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting datasets>=2.0.0 (from lm-eval==0.3.0)
  Downloading datasets-2.13.1-py3-none-any.whl (486 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.2/486.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jsonlines (from lm-eval==0.3.0)
  Downloading jsonlines-3.1.0-py3-none-any.whl (8.6 kB)
Collecting openai>=0.6.4 (from lm-eval==0.3.0)
  Downloading openai-0.27.8-py3-none-any.whl (73 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.6/73.6 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting omegaconf>=2.2 (from lm-eval==0.3.0)
  Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.5/79.5 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft>=0.2.0 (from lm-eval==0.3.0)
  Downloading peft-0.4.0-py3-none-any.whl (72 kB)
[2K     [90m━━━━

In [2]:
!python main.py \
    --model hf-causal \
    --model_args pretrained=Andron00e/YetAnother_Open-Llama-3B-LoRA \
    --tasks hellaswag \
    --device cuda:0

[dynet] random seed: 1234
[dynet] allocating memory: 32MB
[dynet] memory allocation done.
Selected Tasks: ['hellaswag']
Using device 'cuda:0'
Downloading (…)lve/main/config.json: 100% 635/635 [00:00<00:00, 3.65MB/s]
Downloading model.safetensors: 100% 6.85G/6.85G [01:42<00:00, 66.7MB/s]
Downloading (…)neration_config.json: 100% 137/137 [00:00<00:00, 851kB/s]
Downloading (…)okenizer_config.json: 100% 718/718 [00:00<00:00, 4.83MB/s]
Downloading tokenizer.model: 100% 534k/534k [00:00<00:00, 11.0MB/s]
Downloading (…)/main/tokenizer.json: 100% 1.98M/1.98M [00:00<00:00, 26.1MB/s]
Downloading (…)cial_tokens_map.json: 100% 411/411 [00:00<00:00, 2.75MB/s]
Downloading builder script: 100% 4.36k/4.36k [00:00<00:00, 21.4MB/s]
Downloading metadata: 100% 2.53k/2.53k [00:00<00:00, 19.1MB/s]
Downloading readme: 100% 6.85k/6.85k [00:00<00:00, 34.2MB/s]
Downloading and preparing dataset hellaswag/default to /root/.cache/huggingface/datasets/hellaswag/default/0.1.0/512a66dd8b1b1643ab4a48aa4f150d04c91680d