In [1]:
import os

import mlflow

from trl import SFTTrainer
from datasets import load_dataset
from peft import LoraConfig, prepare_model_for_kbit_training
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    BitsAndBytesConfig
)

from dotenv import load_dotenv


load_dotenv()
mlflow.set_tracking_uri("http://localhost:5000")

In [2]:
os.environ["AWS_ENDPOINT_URL"]

'http://localhost:9000'

## Dataset Info in MinIO

In [3]:
train_dataset_uri = "s3://dataset/win_assist_dataset/v1.0/mistral_inst_format.json"
dataset_version = "v1.0"

In [4]:
train_dataset = load_dataset("json", data_files=train_dataset_uri)["train"]

## Load base model and tokenizer

In [5]:
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(load_in_4bit=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="cuda:0"
)

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## LoRA Config

In [6]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

## Training Arguments

In [7]:
training_args = TrainingArguments(
    output_dir="./lora_mistral_checkpoint",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    num_train_epochs=5,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_strategy="steps",
    logging_strategy="steps",
    save_steps=200,
    save_total_limit=2,
    remove_unused_columns=False,
    report_to="mlflow"
)

## Run MLflow exp run

In [8]:
mlflow.set_experiment("win_assistant_mistral7b_inst")

with mlflow.start_run(run_name="mistral7b_lora_finetune_v2"):
    # Log dataset info
    mlflow.log_param("train_dataset_uri", train_dataset_uri)
    mlflow.log_param("train_dataset_version", dataset_version)

    # Log model + LoRA hyperparameters
    mlflow.log_params({
        "model_name": model_name,
        "lora_r": peft_config.r,
        "lora_alpha": peft_config.lora_alpha,
        "lora_dropout": peft_config.lora_dropout,
        "target_modules": ",".join(peft_config.target_modules),
        "gradient_accumulation_steps": training_args.gradient_accumulation_steps,
        "batch_size": training_args.per_device_train_batch_size,
        "num_train_epochs": training_args.num_train_epochs,
        "learning_rate": training_args.learning_rate,
        "fp16": training_args.fp16
    })

    # Initialize trainer
    trainer = SFTTrainer(
        model=model,    
        train_dataset=train_dataset,
        peft_config=peft_config,
        args=training_args
    )

    trainer.train()

2025/09/29 23:31:06 INFO mlflow.tracking.fluent: Experiment with name 'win_assistant_mistral7b_inst' does not exist. Creating a new experiment.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,3.225
20,1.8948
30,1.0912
40,0.7858
50,0.6458
60,0.5796


🏃 View run mistral7b_lora_finetune_v2 at: http://localhost:5000/#/experiments/1/runs/72970c479b3344df9df0c0111ddcdc38
🧪 View experiment at: http://localhost:5000/#/experiments/1


## Log Model

In [10]:
last_run_id = mlflow.last_active_run().info.run_id

tokenizer_no_pad = AutoTokenizer.from_pretrained(model_name, add_bos_token=True)

with mlflow.start_run(run_id=last_run_id):
    mlflow.transformers.log_model(
        transformers_model={"model": trainer.model, "tokenizer": tokenizer_no_pad},
        name="win_assist_model",
    )

Device set to use cuda:0
2025/09/29 23:36:22 INFO mlflow.transformers: Overriding save_pretrained to False for PEFT models, following the Transformers behavior. The PEFT adaptor and config will be saved, but the base model weights will not and reference to the HuggingFace Hub repository will be logged instead.
2025/09/29 23:36:23 INFO mlflow.transformers: Skipping saving pretrained model weights to disk as the save_pretrained argumentis set to False. The reference to the HuggingFace Hub repository mistralai/Mistral-7B-Instruct-v0.1 will be logged instead.
2025/09/29 23:36:24 INFO mlflow.transformers: A local checkpoint path or PEFT model is given as the `transformers_model`. To avoid loading the full model into memory, we don't infer the pip requirement for the model. Instead, we will use the default requirements, but it may not capture all required pip libraries for the model. Consider providing the pip requirements explicitly.


🏃 View run mistral7b_lora_finetune_v2 at: http://localhost:5000/#/experiments/1/runs/72970c479b3344df9df0c0111ddcdc38
🧪 View experiment at: http://localhost:5000/#/experiments/1
