In [None]:
import mlflow
from mlflow.models import infer_signature

from trl import SFTTrainer
from datasets import load_dataset
from peft import LoraConfig, prepare_model_for_kbit_training
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    BitsAndBytesConfig
)

from dotenv import load_dotenv


load_dotenv()
mlflow.set_tracking_uri("http://localhost:5000")

## Dataset Info in MinIO

In [None]:
train_dataset_uri = "s3://dataset/win_assist_dataset/v1.0/mistral_inst_format.json"
dataset_version = "v1.0"

In [None]:
train_dataset = load_dataset("json", data_files=train_dataset_uri)["train"]

## Load base model and tokenizer

In [None]:
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(load_in_4bit=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="cuda:0"
)

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

## LoRA Config

In [None]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

## Training Arguments

In [None]:
training_args = TrainingArguments(
    output_dir="./lora_mistral_checkpoint",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    num_train_epochs=20,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_strategy="steps",
    logging_strategy="steps",
    save_steps=200,
    save_total_limit=2,
    remove_unused_columns=False,
    report_to="mlflow"
)

## Run MLflow exp run

In [None]:
mlflow.set_experiment("win_assistant_mistral7b_inst")

with mlflow.start_run(run_name="mistral7b_lora_finetune_v2"):
    # Log dataset info
    mlflow.log_param("train_dataset_uri", train_dataset_uri)
    mlflow.log_param("train_dataset_version", dataset_version)

    # Log model + LoRA hyperparameters
    mlflow.log_params({
        "model_name": model_name,
        "lora_r": peft_config.r,
        "lora_alpha": peft_config.lora_alpha,
        "lora_dropout": peft_config.lora_dropout,
        "target_modules": ",".join(peft_config.target_modules),
        "gradient_accumulation_steps": training_args.gradient_accumulation_steps,
        "batch_size": training_args.per_device_train_batch_size,
        "num_train_epochs": training_args.num_train_epochs,
        "learning_rate": training_args.learning_rate,
        "fp16": training_args.fp16
    })

    # Initialize trainer
    trainer = SFTTrainer(
        model=model,    
        train_dataset=train_dataset,
        peft_config=peft_config,
        args=training_args
    )

    trainer.train()

## Log Model

In [None]:
last_run_id = mlflow.last_active_run().info.run_id

tokenizer_no_pad = AutoTokenizer.from_pretrained(model_name, add_bos_token=True)

with mlflow.start_run(run_id=last_run_id):
    mlflow.transformers.log_model(
        transformers_model={"model": trainer.model, "tokenizer": tokenizer_no_pad},
        name="win_assist_model",
    )

## Inferencing

In [None]:
mlflow_model = mlflow.pyfunc.load_model("runs:/c7af974a12444890acd4d10bcf736935/win_assist_model").to("cuda:0")

In [None]:
ed_query = mlflow_model.predict("Open notepad")