In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score

import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)

import mlflow
import mlflow.pytorch

mlflow.set_experiment("SmartSupport-Transformer-Research")

  from .autonotebook import tqdm as notebook_tqdm
2026-01-05 05:18:23.051818: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  if not hasattr(np, "object"):
2026/01/05 05:18:26 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/05 05:18:26 INFO mlflow.store.db.utils: Updating database tables
2026/01/05 05:18:26 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/05 05:18:26 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/01/05 05:18:26 INFO alembic.runtime.migration: Running upgrade  -> 451aebb31d03, add metric step
2026/01/05 05:18:26 INFO alembic.runtime.migration: Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
2026/01/05 05:18:26 INFO alembic.runtime.migration

<Experiment: artifact_location='/workspace/SmartSupport/notebooks/mlruns/1', creation_time=1767590307289, experiment_id='1', last_update_time=1767590307289, lifecycle_stage='active', name='SmartSupport-Transformer-Research', tags={}>

In [2]:
print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")

CUDA available: True
GPU name: Tesla T4


In [3]:
DATA_PATH = "../data/raw/tickets.csv"
df = pd.read_csv(DATA_PATH)

# Use BOTH English + German (key difference vs LSTM)
df = df[["subject", "body", "type"]].reset_index(drop=True)
df.head()

Unnamed: 0,subject,body,type
0,Wesentlicher Sicherheitsvorfall,"Sehr geehrtes Support-Team,\n\nich möchte eine...",Incident
1,Account Disruption,"Dear Customer Support Team,\n\nI am writing to...",Incident
2,Query About Smart Home System Integration Feat...,"Dear Customer Support Team,\n\nI hope this mes...",Request
3,Inquiry Regarding Invoice Details,"Dear Customer Support Team,\n\nI hope this mes...",Request
4,Question About Marketing Agency Software Compa...,"Dear Support Team,\n\nI hope this message reac...",Problem


In [4]:
df["text"] = df["subject"].fillna("") + " " + df["body"].fillna("")
df['text'] = df['text'].str.replace('\\n', ' ')

In [5]:
le = LabelEncoder()
df["label"] = le.fit_transform(df["type"])
num_labels = df["label"].nunique()
num_labels

4

In [6]:
train_df, test_df = train_test_split(
    df[["text", "label"]],
    test_size=0.2,
    random_state=42,
    stratify=df["label"]
)

In [7]:
MODEL_NAME = "xlm-roberta-base"  # multilingual
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=128
    )

train_ds = Dataset.from_pandas(train_df).map(tokenize, batched=True)
test_ds  = Dataset.from_pandas(test_df).map(tokenize, batched=True)

train_ds = train_ds.remove_columns(["text"])
test_ds  = test_ds.remove_columns(["text"])
train_ds.set_format("torch")
test_ds.set_format("torch")

Map: 100%|██████████| 22869/22869 [00:03<00:00, 6957.18 examples/s]
Map: 100%|██████████| 5718/5718 [00:00<00:00, 6446.71 examples/s]


In [16]:
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=num_labels
)

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "macro_f1": f1_score(labels, preds, average="macro")
    }

In [18]:
training_args = TrainingArguments(
    output_dir="./transformer_out",

    eval_strategy="epoch",
    save_strategy="no",

    learning_rate=2e-5,

    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,

    gradient_accumulation_steps=2,  

    num_train_epochs=3,
    weight_decay=0.01,

    fp16=True,
    fp16_full_eval=True,

    logging_steps=50,

    report_to="none"
)


In [19]:
with mlflow.start_run(run_name="xlm_roberta_multilingual"):

    mlflow.log_param("model_type", "Transformer")
    mlflow.log_param("model_name", MODEL_NAME)
    mlflow.log_param("languages", "en+de")
    mlflow.log_param("max_length", 128)
    mlflow.log_param("epochs", 3)
    mlflow.log_param("batch_size", 16)
    mlflow.log_param("fp16", True)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=test_ds,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    trainer.train()
    eval_metrics = trainer.evaluate()

    mlflow.log_metric("accuracy", eval_metrics["eval_accuracy"])
    mlflow.log_metric("macro_f1", eval_metrics["eval_macro_f1"])


  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Macro F1
1,0.4176,0.377144,0.812172,0.791127
2,0.3247,0.360536,0.833683,0.840467
3,0.2608,0.371282,0.845575,0.853809
