# IT Ticket Classifier — DHAUZ Challenge

Notebook executável: carrega o dataset, amostra 200 tickets, monta o RAG (embeddings + FAISS), executa o fluxo LangGraph em exemplos e na amostra, calcula métricas.

In [1]:
import os
import sys
import numpy as np
from pathlib import Path

ROOT = Path(".").resolve()
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from dotenv import load_dotenv
load_dotenv(ROOT / ".env")

import config
np.random.seed(config.SEED)

## 0. Baixar dataset do Kaggle (só se ainda não tiver o CSV em data/raw)

In [2]:
from src.prep import download_from_kaggle

path = download_from_kaggle()
print(f"Dataset em: {path}")

Dataset em: /Users/moises/Documents/ticket-classifier:/data/raw/all_tickets_processed_improved_v3.csv


## 1. Dataset completo → vector store; sample_200 → test set

In [3]:
import pandas as pd
from src.prep import document_text, load_dataset, get_text_and_label_columns, stratified_sample
from src.rag import VectorStore

df_full = load_dataset()
text_cols, label_col = get_text_and_label_columns(df_full)
classes = sorted(set(df_full[label_col].astype(str)))

n_sample = min(config.SAMPLE_SIZE, len(df_full))
df_sample = stratified_sample(df_full, label_col, n=n_sample)
df_sample.to_csv(config.DATA_PROCESSED / "sample_200.csv", index=False)
ids_test = set(df_sample["id"])
df_train = df_full[~df_full["id"].isin(ids_test)]

texts_train = [document_text(row, text_cols) for _, row in df_train.iterrows()]
labels_train = df_train[label_col].astype(str).tolist()
ids_train = df_train["id"].tolist()

artifact_path = config.ARTIFACTS_DIR
if (artifact_path / "index.faiss").exists():
    store = VectorStore.load(artifact_path)
    print("Vector store carregado de:", artifact_path)
else:
    vc = VectorStore()
    store = vc.build(texts_train, labels_train, ids=ids_train)
    store.save(artifact_path)
    print("Vector store construído (apenas train) e salvo em:", artifact_path)

texts = [document_text(row, text_cols) for _, row in df_sample.iterrows()]

print("Classes:", classes)
print("Vector store (train): %d documentos" % len(df_train))
print("Test set (sample_200): %d tickets" % len(df_sample))

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Vector store carregado de: /Users/moises/Documents/ticket-classifier:/outputs/artifacts
Classes: ['Access', 'Administrative rights', 'HR Support', 'Hardware', 'Internal Project', 'Miscellaneous', 'Purchase', 'Storage']
Vector store (train): 47637 documentos
Test set (sample_200): 200 tickets


In [4]:
print("Distribuição por classe:")
print(df_sample[label_col].value_counts())

Distribuição por classe:
Topic_group
Hardware                 25
Access                   25
Miscellaneous            25
HR Support               25
Purchase                 25
Administrative rights    25
Storage                  25
Internal Project         25
Name: count, dtype: int64


## 2. Inferência em exemplos


In [5]:
# store, classes e texts já carregados na célula 1.
import logging
from tqdm.auto import tqdm
from src.graph import build_pipeline, run_pipeline

logging.getLogger("ticket_classifier.llm_usage").setLevel(logging.WARNING)

compiled, _, _, _ = build_pipeline(store, classes)

for i in tqdm(range(min(3, len(texts))), desc="Exemplos"):
    out = run_pipeline(compiled, texts[i], classes)
    print(f"--- Ticket {i+1} ---")
    print("Texto (trecho):", texts[i][:150], "...")
    print("Saída:", out)
    print()

Exemplos:   0%|          | 0/3 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m
{"event": "classification", "classifier": "knn", "classe": "Hardware", "confidence": 1.0}
llama_context: n_ctx_per_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
ggml_metal_init: skipping kernel_get_rows_bf16                     (not supported)
ggml_metal_init: skipping kernel_set_rows_bf16                     (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_c4                (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row              (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16

--- Ticket 1 ---
Texto (trecho): monitor request vulcan friday october pm hello please log each user monitor allocation user vulcan thank weekend engineer friday october vulcan parte  ...
Saída: {'classe': 'Hardware', 'justificativa': 'O KNN foi favorável à classe Hardware pois os vizinhos mais próximos (6/6) também foram classificados como Hardware, com distâncias de 0.59 a 0.69. Essa alta confiança e semelhanças nos textos sustentam a classificação como problema relacionado a Hardware.', 'classification_source': 'knn', 'inference_time_sec': 22.402748082997277}



{"event": "classification", "classifier": "knn", "classe": "Hardware", "confidence": 0.5714}
{"event": "classification", "classifier": "llm", "classe": "Hardware", "model": "llama-local", "input_tokens": 384, "output_tokens": 9}
{"event": "justification", "model": "llama-local", "input_tokens": 493, "output_tokens": 93, "total_tokens": 586}
{"event": "inference", "classification_source": "llm", "classe": "Hardware", "inference_time_sec": 21.2903, "classification_tokens": 393, "justification_tokens": 586}


--- Ticket 2 ---
Texto (trecho): stopped when docker start was executed sent wednesday february hi we having same problem we had few days ago server was stopped when executed docker s ...
Saída: {'classe': 'Hardware', 'justificativa': 'O KNN foi favorável à classe Hardware porque os vizinhos mais próximos (distâncias 0.42, 0.74, 0.91 e 0.96) também foram classificados como Hardware, indicando que a similaridade entre os problemas é alta e que a causa provável é relacionada ao hardware, como problemas de instalação, acesso ou funcionamento do servidor.', 'classification_source': 'llm', 'inference_time_sec': 21.290285165945534}



{"event": "classification", "classifier": "knn", "classe": "Hardware", "confidence": 1.0}
{"event": "justification", "model": "llama-local", "input_tokens": 464, "output_tokens": 76, "total_tokens": 540}
{"event": "inference", "classification_source": "knn", "classe": "Hardware", "inference_time_sec": 14.1442, "classification_tokens": 393, "justification_tokens": 540}


--- Ticket 3 ---
Texto (trecho): issue re access through for hello still work attached log error received during installation restarted machine disconnected tethered phone can connect ...
Saída: {'classe': 'Hardware', 'justificativa': 'O KNN foi favorável à classe Hardware pois todos os vizinhos mais próximos (distâncias entre 0,63 e 0,72) apresentam problemas de conexão relacionados a hardware, como instalação, autenticação e problemas de acesso, o que sustenta a classificação atribuída.', 'classification_source': 'knn', 'inference_time_sec': 14.144190667022485}



## 3. Rodar na amostra de 200 e salvar resultados

In [None]:
from src.logging_utils import log_result
from tqdm.auto import tqdm

results_path = config.OUTPUTS / "results_sample.jsonl"
if results_path.exists():
    results_path.unlink()

predictions = []
for pos, (_, row) in enumerate(tqdm(list(df_sample.iterrows()), desc="Pipeline")):
    text = document_text(row, text_cols)
    out = run_pipeline(compiled, text, classes, thread_id=str(pos), instance_id=row["id"])
    pred = out["classe"]
    predictions.append(pred)
    log_result({
        "id": row["id"],
        "ticket_index": pos,
        "true": row[label_col],
        "pred": pred,
        "justificativa": out["justificativa"],
        "classification_source": out.get("classification_source"),
        "confidence": out.get("confidence"),
        "inference_time_sec": out.get("inference_time_sec"),
    })

print(f"Salvos {len(predictions)} resultados em {results_path}")

Pipeline:   0%|          | 0/200 [00:00<?, ?it/s]

{"event": "classification", "classifier": "knn", "classe": "Hardware", "confidence": 1.0}
{"event": "justification", "model": "llama-local", "input_tokens": 463, "output_tokens": 96, "total_tokens": 559}
{"event": "inference", "classification_source": "knn", "classe": "Hardware", "inference_time_sec": 17.1582, "justification_tokens": 559}
{"event": "classification", "classifier": "knn", "classe": "Hardware", "confidence": 0.5714}
{"event": "classification", "classifier": "llm", "classe": "Hardware", "model": "llama-local", "input_tokens": 384, "output_tokens": 9}
{"event": "justification", "model": "llama-local", "input_tokens": 493, "output_tokens": 75, "total_tokens": 568}
{"event": "inference", "classification_source": "llm", "classe": "Hardware", "inference_time_sec": 22.8091, "classification_tokens": 393, "justification_tokens": 568}
{"event": "classification", "classifier": "knn", "classe": "Hardware", "confidence": 1.0}
{"event": "justification", "model": "llama-local", "input_t

## 4. Métricas e relatório

In [None]:
from src.metrics import compute_metrics, save_metrics_report

y_true = df_sample[label_col].astype(str).tolist()
metrics = compute_metrics(y_true, predictions, labels=classes)
save_metrics_report(metrics)

print("Accuracy:", metrics["accuracy"])
print("F1 macro:", metrics["f1_macro"])
print("F1 weighted:", metrics["f1_weighted"])
print("\nClassification report:")
for k, v in metrics["classification_report"].items():
    if isinstance(v, dict):
        print(k, v)
    else:
        print(k, v)

## 5. Exemplo de saída JSON

In [None]:
import json
ex = run_pipeline(compiled, texts[0], classes)
print(json.dumps(ex, indent=2, ensure_ascii=False))

---

In [None]:
import json
ex = run_pipeline(compiled, texts[0], classes)
print(json.dumps(ex, indent=2, ensure_ascii=False))