In [None]:
from transformers import (
    AutoTokenizer, # language models
    AutoModelForCausalLM,
    Trainer, # fine-tuning
    TrainingArguments,
    DataCollatorForLanguageModeling, # part of pipeline responsible for assembling
    BitsAndBytesConfig, # BitsAndBytes for quantum compression, less data size for models, yet not losing in speed at all
    )

import torch # pyTorch
import safetensors.torch
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model # (Parameter Efficient Fine Tuning) -> we take LoRA only

# !pip install -U bitsandbytes
# !pip install --upgrade transformers
# !unzip deepseek-clear.zip -d deepseek/
# !unzip mistral500steps.zip -d mistral/
# !unzip llama-1000steps.zip -d llama/

In [None]:
def import_model(model_name):
  model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16, ## gpu support
    device_map="auto",
    quantization_config=BitsAndBytesConfig(load_in_8bit=True) # bitsAndBytes
    )
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.pad_token = tokenizer.eos_token # in case some custom models dont have pad_token by default
  return model, tokenizer

models = ["deepseek-ai/deepseek-llm-7b-base", "mistralai/Mistral-7B-Instruct-v0.3", "meta-llama/Llama-3.1-8B-Instruct"]
# deepseek_model, deepseek_tokenizer = import_model(models[0])
# mistral_model, mistral_tokenizer = import_model(models[1])
# llama_model, llama_tokenizer = import_model(models[2])

In [None]:
def lora_training(model):
  model = prepare_model_for_kbit_training(model)
  lora_config = LoraConfig(
    r=8, # a rank, the bigger the rank, more accuracy we get, but becomes slower
    lora_alpha=42, # an influence of LoRA on a model
    target_modules=["q_proj", "v_proj"], # there are modules we touch to change, q_proj = query, v_proj = value
    lora_dropout=0.05, # in order to avoid overtraining
    bias="none", # means do not touch bias
    task_type="CAUSAL_LM" # model wise
  )
  model = get_peft_model(model, lora_config)
  model.gradient_checkpointing_enable()
  return model

# deepseek_model = lora_training(deepseek_model)
# mistral_model = lora_training(mistral_model)
# llama_model = lora_training(llama_model)

In [None]:
# mistral_model.load_adapter("./mistral", adapter_name="custom")
# mistral_model.set_adapter("custom")

# deepseek_model.load_adapter("./deepseek", adapter_name="custom")
# deepseek_model.set_adapter("custom")

# llama_model.load_adapter("./llama", adapter_name="custom")
# llama_model.set_adapter("custom")


In [None]:
def gen(question, model, tokenizer):
    prompt = f"Question: {question}\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=500,
        num_return_sequences=1,
        do_sample=True, ## variety, turn off for now
        top_p=0.95,
        temperature=0.7, ## temp
        pad_token_id=tokenizer.eos_token_id,
    )
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = answer.split("Answer:")[-1].strip()
    return answer

# gen("The church of Panagia Aggeloktisti", deepseek_model, deepseek_tokenizer)
# gen("The church of Panagia Aggeloktisti", mistral_model, mistral_tokenizer)
# gen("The church of Panagia Aggeloktisti", llama_model, llama_tokenizer)