In [None]:
# !git clone https://github.com/Desire32/lora-ml-transfomers.git
# !pip install -r ./lora-ml-transfomers/requirements.txt

In [None]:
import subprocess 
import mlflow # mlFlow
from pyngrok import ngrok # workaround for localhost
from huggingface_hub import login

wandb_token = "write_your_token_here"
hugginface_token = "write_your_token_here"
ngrok_token = "write_your_token_here" # optional, mlFlow

# https://huggingface.co/settings/tokens
login(hugginface_token)

# https://dashboard.ngrok.com/authtokens
ngrok.set_auth_token(ngrok_token)

port = 5000

mlflow_proc = subprocess.Popen(["mlflow", "ui", "--port", port])
mlflow.autolog()
# mlflow_proc.terminate()

public_url = ngrok.connect(port)
print(f"MLflow UI: {public_url}")

In [None]:
from transformers import (
    AutoTokenizer, # language models
    AutoModelForCausalLM,
    Trainer, # fine-tuning
    TrainingArguments,
    DataCollatorForLanguageModeling, # part of pipeline responsible for assembling
    BitsAndBytesConfig, # BitsAndBytes for quantum compression, less data size for models, yet not losing in speed at all
    )

import torch # pyTorch
import safetensors.torch
from datasets import Dataset # converting text file in dataset (increases file reading speed)
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model # (Parameter Efficient Fine Tuning) -> we take LoRA only

In [None]:
class LLMLoaderPipeline:

  def __init__(self, model_name, file_path, test_size=0.2, max_length=512):
    self.model_name = model_name
    self.file_path = file_path
    self.test_size = test_size
    self.max_length = max_length

    self.model = None
    self.tokenizer = None
    self.dataset = None
    self.train_dataset = None
    self.val_dataset = None

    self.load_model()
    self.file_read()
    self.tokenize_and_split()

  def load_model(self):
    self.model = AutoModelForCausalLM.from_pretrained(
      self.model_name,
      torch_dtype=torch.float16, ## eat less resources
      device_map="auto",
      quantization_config=BitsAndBytesConfig(load_in_8bit=True) # bitsAndBytes
    )
    self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
    self.tokenizer.pad_token = self.tokenizer.eos_token # in case some custom models dont have pad_token by default

  def file_read(self):
    with open(self.file_path, 'r') as file:
        data = file.read()
    self.dataset = Dataset.from_dict({"text": data.split("\n\n")})
    print("DEBUG: dataset size =", len(self.dataset))


  def tokenize_and_split(self):
    def token_func(example):
      return self.tokenizer(
          example["text"],
          truncation=True,
          padding="max_length",
          max_length=self.max_length,
        )

    dataset_tokenized = self.dataset.map(token_func, batched=True)

    dataset_list = [item for item in dataset_tokenized]
    train_list, val_list = train_test_split(
        dataset_list, test_size=self.test_size, random_state=42, shuffle=True
    )
    self.train_dataset = Dataset.from_list(train_list)
    self.val_dataset = Dataset.from_list(val_list)

  def get_model_and_tokenizer(self):
      return self.model, self.tokenizer

  def get_datasets(self):
      return self.train_dataset, self.val_dataset


In [None]:
class LoraTrainerPipeline:
  def __init__(self, model, tokenizer, train_dataset, val_dataset=None, output_dir="./pretrain-max-steps"):
    self.base_model = model
    self.tokenizer = tokenizer
    self.train_dataset = train_dataset
    self.val_dataset = val_dataset
    self.output_dir = output_dir

    self.lora_model = None
    self.trainer = None

  def lora_training(self):
    model = prepare_model_for_kbit_training(self.base_model)
    lora_config = LoraConfig(
      r=8, # a rank, the bigger the rank, more accuracy we get, but becomes slower
      lora_alpha=42, # an influence of LoRA on a model
      target_modules=["q_proj", "v_proj"], # there are modules we touch to change, q_proj = query, v_proj = value
      lora_dropout=0.05, # in order to avoid overtraining
      bias="none",
      task_type="CAUSAL_LM" # model wise
    )
    self.lora_model = get_peft_model(model, lora_config)
    # model.gradient_checkpointing_enable()

  def model_train(self, max_steps=100, batch_size=10, learning_rate=5e-6):

    self.lora_training()

    data_collator = DataCollatorForLanguageModeling(
      tokenizer=self.tokenizer,
      mlm=False
    )

    # adam is working under the hood by default
    training_args = TrainingArguments(
      output_dir=self.output_dir,
      overwrite_output_dir=True,
      max_steps=max_steps,
      per_device_train_batch_size=batch_size, # количество рассмотренных обьектов за один раз -> усреднение -> лучшая точность
      save_steps=50,
      save_total_limit=1,
      prediction_loss_only=True,
      fp16=True,
      learning_rate=learning_rate,
      ######################
      logging_steps=10,    # <- training losses
      ######################
      eval_strategy="steps",
      eval_steps=10, # <- validation losses
      ######################
    )

     #use_cache=False, ## turn off cache to avoid cuda errors TODO
     
    self.trainer = Trainer(
      model=self.lora_model,
      args=training_args,
      train_dataset=self.train_dataset,  # ← training
      eval_dataset=self.val_dataset,     # ← validations
      tokenizer=self.tokenizer,         
      data_collator=data_collator,
    )

    self.trainer.train()

  def merge_and_unload(self, checkpoint_path):
    print("Merging LoRA and unloading PEFT weights...")
    self.lora_model = PeftModel.from_pretrained(self.lora_model, checkpoint_path)
    self.lora_model = self.lora_model.merge_and_unload()
    return self.lora_model


In [None]:
models = ["TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mistralai/Mistral-7B-Instruct-v0.3", "meta-llama/Llama-3.1-8B-Instruct", "microsoft/phi-2"]
files = "./lora-ml-transfomers/cache/church_text"

########################
pipeline = LLMLoaderPipeline(
    model_name=models[0], # <-- choose a model here
    file_path=files # <-- choose a dataset path here
)
########################

##########################
# training test split init, train val datasets
model, tokenizer = pipeline.get_model_and_tokenizer()
train_dataset, val_dataset = pipeline.get_datasets()
##########################

max_steps = 100

##########################
trainer_pipeline = LoraTrainerPipeline(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    val_dataset=val_dataset
)
##########################
trainer_pipeline.model_train(max_steps=max_steps) # <- LoRA fine-tuning

# Make a merge on site, instead of attaching adapters manually
merged_model = trainer_pipeline.merge_and_unload(f"./pretrain-max_steps/checkpoint{max_steps}")


In [None]:
def gen(question, model, tokenizer):
    prompt = f"Question: {question}\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=500,
        num_return_sequences=1,
        do_sample=False, ## variety, turn off for now
        top_p=0.95,
        temperature=0.7, ## temp
        pad_token_id=tokenizer.eos_token_id,
    )
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = answer.split("Answer:")[-1].strip()
    return answer

gen("prompt", merged_model, tokenizer)