In [1]:
import ast
import mlflow
import pandas as pd

from trl import SFTTrainer
from datasets import load_dataset, Dataset, ClassLabel, Features, Value
from peft import LoraConfig, prepare_model_for_kbit_training
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig


mlflow.set_tracking_uri("http://localhost:5000")

### Define variables

In [2]:
base_model = "mistralai/Mistral-7B-Instruct-v0.1"
dataset_version = "0.1"

dataset_path = "s3://dataset/fball_dataset/1.0/football_training_mistral_with_teams.json"

### Load base model

In [56]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)
if tokenizer.pad_token == None:
    tokenizer.pad_token = tokenizer.eos_token

# Initialize bits and byte config
bits_bytes_config = BitsAndBytesConfig(load_in_4bit=True)

model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=bits_bytes_config, device_map="cuda:0")

model.gradient_checkpointing_enable()

model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Prepare dataset

In [57]:
df = pd.read_json(dataset_path)
df["type"] = df["text"].apply(lambda x: ast.literal_eval(x.split("[/INST]")[1].strip().replace("</s>", ""))["type"])

article_types = df["type"].unique().tolist()
class_label = ClassLabel(names=article_types)

features = Features(
    {
        "text": Value("string"),
        "type": class_label
    }
)

hf_dataset = Dataset.from_pandas(df, features=features)

dataset_splits = hf_dataset.train_test_split(test_size=0.2, stratify_by_column="type", seed=42)
dataset_splits["train"] = dataset_splits["train"].remove_columns("type")
dataset_splits["test"] = dataset_splits["test"].remove_columns("type")

In [58]:
print(dataset_splits)

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 640
    })
    test: Dataset({
        features: ['text'],
        num_rows: 160
    })
})


### LoRA Config

In [66]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    bias="none",
    task_type="CAUSAL_LM"
)

### Training Arguments

In [67]:
training_args = TrainingArguments(
    output_dir="./fball_mistral_checkpoint",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4, # Batches to accumulate before performing backward propagation to update weights. Each backward propagation is known as a step.
    optim="paged_adamw_32bit",
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True, # This is the precision in which the weights of LoRA adapters will be computed and saved
    logging_steps=10,
    save_strategy="epoch",
    logging_strategy="steps",
    save_steps=200,
    save_total_limit=2,
    remove_unused_columns=False,
    report_to="mlflow",
    seed=42,
    warmup_steps=50 # For first 50 step the lr is gradually increased in first 50 steps till we reach the target lr
)

## Run MLflow exp run

In [68]:
mlflow.set_experiment("fball_mistral_experiments")

with mlflow.start_run(run_name="fball_run_v1"):
    # Log dataset info
    mlflow.log_param("train_dataset_uri", dataset_path)
    mlflow.log_param("train_dataset_version", dataset_version)

    # Log model + LoRA hyperparameters
    mlflow.log_params({
        "model_name": 'fball_mistral7b_instruct',
        "lora_r": peft_config.r,
        "lora_alpha": peft_config.lora_alpha,
        "lora_dropout": peft_config.lora_dropout,
        "target_modules": ",".join(peft_config.target_modules),
        "gradient_accumulation_steps": training_args.gradient_accumulation_steps,
        "batch_size": training_args.per_device_train_batch_size,
        "num_train_epochs": training_args.num_train_epochs,
        "learning_rate": training_args.learning_rate,
        "fp16": training_args.fp16
    })

    # Initialize trainer
    trainer = SFTTrainer(
        model=model,    
        train_dataset=dataset_splits['train'],
        peft_config=peft_config,
        args=training_args
    )

    trainer.train()

Adding EOS to train dataset:   0%|          | 0/640 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/640 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/640 [00:00<?, ? examples/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,2.4208
20,2.2488
30,2.1522
40,2.1188
50,2.0011
60,1.9703
70,1.9731
80,1.9796
90,1.852
100,1.8968


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


🏃 View run fball_run_v1 at: http://localhost:5000/#/experiments/2/runs/ab3d9e65df7c47e4a356c064117dd23e
🧪 View experiment at: http://localhost:5000/#/experiments/2


### Log Model

In [69]:
last_run_id = mlflow.last_active_run().info.run_id

tokenizer_no_pad = AutoTokenizer.from_pretrained(base_model, add_bos_token=True)

with mlflow.start_run(run_id=last_run_id):
    mlflow.transformers.log_model(
        transformers_model={"model": trainer.model, "tokenizer": tokenizer_no_pad},
        name="fball_model",
    )

Device set to use cuda:0
2025/10/06 14:17:44 INFO mlflow.transformers: Overriding save_pretrained to False for PEFT models, following the Transformers behavior. The PEFT adaptor and config will be saved, but the base model weights will not and reference to the HuggingFace Hub repository will be logged instead.
2025/10/06 14:17:45 INFO mlflow.transformers: Skipping saving pretrained model weights to disk as the save_pretrained argumentis set to False. The reference to the HuggingFace Hub repository mistralai/Mistral-7B-Instruct-v0.1 will be logged instead.
2025/10/06 14:17:47 INFO mlflow.transformers: A local checkpoint path or PEFT model is given as the `transformers_model`. To avoid loading the full model into memory, we don't infer the pip requirement for the model. Instead, we will use the default requirements, but it may not capture all required pip libraries for the model. Consider providing the pip requirements explicitly.


🏃 View run fball_run_v1 at: http://localhost:5000/#/experiments/2/runs/ab3d9e65df7c47e4a356c064117dd23e
🧪 View experiment at: http://localhost:5000/#/experiments/2


## Inferencing

In [4]:
mlflow_model = mlflow.pyfunc.load_model("runs:/ab3d9e65df7c47e4a356c064117dd23e/fball_model")

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/10 [00:00<?, ?it/s]



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

ConnectionError: (MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /api/models/mistralai/Mistral-7B-Instruct-v0.1/tree/ec5deb64f2c6e6fa90c1abf74a91d5c93a9669ca/additional_chat_templates?recursive=False&expand=False (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000002A9BABF1F60>: Failed to resolve \'huggingface.co\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: 7a61fbdd-cded-48fe-91de-7309fe424a0b)')