In [None]:
import json

# Загрузка исходных данных
with open("training_data.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Формирование датасета
dataset = []
for i in range(0, len(data), 2):
    user_msg = data[i]
    assistant_msg = data[i + 1]

    entry = {"messages": [user_msg, assistant_msg]}
    # entry = [data[i], data[i+1]]
    dataset.append(entry)

# Пример: вывод первых 3 элементов
# for example in dataset[:3]:
#     print(json.dumps(example, indent=2, ensure_ascii=False))

with open("chatml_dataset.jsonl", "w", encoding="utf-8") as f:
    for entry in dataset:
        f.write(json.dumps(entry, ensure_ascii=False) + "\n")


In [None]:
from datasets import load_dataset

# Загрузка .jsonl
dataset = load_dataset("json", data_files="chatml_dataset.jsonl", split="train")

In [None]:
import json


def json_to_chatml(messages):
    chatml = []
    for message in messages:
        role = message["role"]
        content = message["content"]

        # Если это ассистент и content является JSON-строкой, форматируем его
        if role == "assistant":
            try:
                content_json = json.loads(content)
                # Красиво форматируем JSON для вывода
                formatted_content = json.dumps(content_json, indent=2)
            except json.JSONDecodeError:
                formatted_content = content
        else:
            formatted_content = content

        chatml.append(f"<|im_start|>{role}\n{formatted_content}<|im_end|>")

    return "\n".join(chatml)

In [None]:
def process_dataset(sample):
    sample = json_to_chatml(sample["messages"])
    return {"text": sample}


chat_dataset = dataset.map(process_dataset)

In [None]:
# Пример: вывод первых 3 элементов
for example in chat_dataset[:3]["text"]:
    print(json.dumps(example, indent=2, ensure_ascii=False))

In [None]:
import os

import torch
from dotenv import load_dotenv
from peft import prepare_model_for_kbit_training
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

load_dotenv()

if torch.cuda.is_available():
    device = "cuda"
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )
else:
    device = "cpu"
    bnb_config = None
# from models import load_model, load_tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    os.getenv("LLM_MODEL_NAME"),
    token=os.getenv("HF_TOKEN"),
)
print(device)
model = AutoModelForCausalLM.from_pretrained(
    os.getenv("LLM_MODEL_NAME"),
    token=os.getenv("HF_TOKEN"),
    quantization_config=bnb_config,
    device_map="auto",  # квантование требует автоматического распределения между cpu и gpu
)
# tokenizer = load_tokenizer()
# model = load_model()

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
model.enable_input_require_grads()


In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=16,
    # "gate_up_proj", "down_proj"
    target_modules=["qkv_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
# model.unload()
model = get_peft_model(model, config)
print_trainable_parameters(model)

In [None]:
from datasets import Dataset
from transformers import DataCollatorForLanguageModeling, Trainer, TrainingArguments

# tokenizer.pad_token = tokenizer.eos_token

# texts = ["Hello world", "How are you?"]
inputs = tokenizer(
    chat_dataset[:]["text"], return_tensors="pt", padding=True, truncation=True
)
train_dataset = Dataset.from_dict({"input_ids": inputs["input_ids"]})


training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    warmup_steps=2,
    max_steps=10,
    learning_rate=2e-4,
    bf16=True,
    # fp16=True,
    logging_steps=1,
    output_dir="outputs",
    optim="paged_adamw_8bit",  # позволяет снизить нагрузку на gpu память и ускорить работу
    report_to="mlflow",
    label_names=["input_ids", "attention_mask"],
)

trainer = Trainer(
    model=model,
    processing_class=tokenizer,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
)


In [None]:
# TODO: добавить оценочные данные

# import math

# eval_results = trainer.evaluate()
# print("Perplexity:", round(math.exp(eval_results["eval_loss"]), 2))

In [None]:
import mlflow

mlflow.tracking.fluent._tracking_uri = None
mlflow.set_tracking_uri("file:///mlruns")  # Локальная папка
mlflow.set_experiment("LoRA Fine-tuning")


In [None]:
import mlflow
import peft
import torch
import transformers
from transformers import pipeline

mlflow.tracking.fluent._tracking_uri = None
# можно настроить под postgresql: mlflow server --backend-store-uri="postgres://username@hostname:port/database" --default-artifact-root=s3://your-bucket --host=0.0.0.0 --port=5000
# или через sqlite mlflow server --backend-store-uri sqlite:///mydb.sqlite
# mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")
# запускать через mlflow ui --backend-store-uri file:///mlruns

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)


mlflow.set_tracking_uri("file:///mlruns")  # Локальная папка
mlflow.set_experiment("LoRA Fine-tuning")

requirements = [
    f"torch=={torch.__version__}",
    f"transformers=={transformers.__version__}",
    f"peft=={peft.__version__}",
]

with mlflow.start_run():
    trainer.train()
    mlflow.log_artifacts("outputs", artifact_path="checkpoints")

    # Адаптер из последнего шага обучения
    model.load_adapter("./outputs/checkpoint-10", adapter_name="lora_adapter_base")
    # Объединяем LoRA с базовой моделью
    merged_model = model.merge_and_unload()
    # Сохраняем модель
    merged_model.save_pretrained("./merged_model", max_shard_size="1GB")
    tokenizer.save_pretrained("./merged_model", max_shard_size="1GB")

    mlflow.transformers.log_model(
        # transformers_model={
        #     "model": merged_model,
        #     "tokenizer": tokenizer,
        # },
        transformers_model=pipe,
        artifact_path="merged_model",
        task="text-generation",
        pip_requirements=requirements,
    )
    # peft_model_path = os.path.join("outputs", "lora_model")
    # merged_model = trainer.model.save_pretrained(peft_model_path)
    # сохраняем lora адаптеры
    # trainer.model.save_pretrained("./lora_adapters")  # Сохраняем веса LoRA
    # tokenizer.save_pretrained("./lora_adapters")  # Сохраняем токенизатор
    # Логируем папку с адаптерами в MLflow как артефакт
    # mlflow.log_artifacts("./lora_adapters", artifact_path="lora_model")
    # Логируем модель в MLflow
    # mlflow.transformers.log_model(
    #     transformer_model={
    #         "model": trainer.model,
    #         "tokenizer": tokenizer,
    #     },
    #     artifact_path="lora_model",
    #     task="text-generation",
    # )

In [None]:
# pipe = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
# )
# generation_args = {
#     "max_new_tokens": 200,
#     "return_full_text": False,
#     "do_sample": False,
# }

# output = pipe(["Happiness", "Дегтярев наградил мать Овечкина почетным знаком Минспорта."], **generation_args)

In [None]:
import mlflow

model_uri = "runs:/f7f6514ad4394e32a32a6dca3dcdb9ea/merged_model"

# Replace INPUT_EXAMPLE with your own input example to the model
# A valid input example is a data instance suitable for pyfunc prediction
input_data = "category: happiness; text: Дегтярев наградил мать Овечкина почетным знаком Минспорта."

# Verify the model with the provided input data using the logged dependencies.
# For more details, refer to:
# https://mlflow.org/docs/latest/models.html#validate-models-before-deployment
mlflow.models.predict(
    model_uri=model_uri,
    input_data=input_data,
    env_manager="uv",
)

In [None]:
import mlflow
import pandas as pd

logged_model = "runs:/f7f6514ad4394e32a32a6dca3dcdb9ea/merged_model"

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
input_data = "category: happiness; text: Дегтярев наградил мать Овечкина почетным знаком Минспорта."

loaded_model.predict(pd.DataFrame(input_data))

In [None]:
import mlflow.pyfunc
from transformers import pipeline

# pipe = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
# )

# generation_args = {
#     "max_new_tokens": 200,
#     "return_full_text": False,
#     "do_sample": False,
# }

# output = pipe(context, **generation_args)

model_name = "qlora-analyzer-base"
model_version = 2

model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{model_version}")

output = model.generate(
    ["Happiness", "Дегтярев наградил мать Овечкина почетным знаком Минспорта."]
)
print(output)

In [None]:
# Получить список всех экспериментов (включая удалённые)
all_experiments = mlflow.search_experiments(view_type=mlflow.entities.ViewType.ALL)

# Вывести ID и имена
for exp in all_experiments:
    print(
        f"ID: {exp.experiment_id}, Name: {exp.name}, Lifecycle: {exp.lifecycle_stage}"
    )

In [None]:
mlflow.restore_experiment(experiment_id="522564765305824673")

In [None]:
exp = mlflow.get_experiment_by_name("LoRA Fine-tuning")
if exp:
    mlflow.delete_experiment(exp.experiment_id)  # Полное удаление