In [None]:
from sentence_transformers import SentenceTransformer, InputExample, losses, models
from torch.utils.data import DataLoader
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import torch
import os
from datasets import Dataset



  from .autonotebook import tqdm as notebook_tqdm


: 

In [None]:
# ========= Cargar y preparar el dataset =========
df = pd.read_csv("resume-job-description-fit (texto plano)/train.csv")

# Revisá los nombres de columnas
print(df.columns)

# Usá los nombres correctos según tu archivo
df = df.dropna(subset=['resume_text', 'job_description_text', 'label'])

# Convertir clases a números
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label']).astype(int)  # e.g., 'good fit' → 2

# Crear InputExamples
train_samples = [
    InputExample(texts=[row['resume_text'], row['job_description_text']], label=row['label'])
    for _, row in df.iterrows()
]

# ========= Modelo base =========
model = SentenceTransformer('all-mpnet-base-v2')

# ========= Dataset y dataloader =========
train_dataloader = DataLoader(train_samples, shuffle=True, batch_size=16)

# ========= Pérdida para clasificación multiclase =========
train_loss = losses.SoftmaxLoss(
    model=model,
    sentence_embedding_dimension=model.get_sentence_embedding_dimension(),
    num_labels=3  # Ajustá si cambiás la cantidad de clases
)

# ========= Entrenamiento =========
output_path = "output/job_match_model"
os.makedirs(output_path, exist_ok=True)

model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=3,
    warmup_steps=100,
    output_path=output_path,
    show_progress_bar=True
)

# ========= Guardar mapeo de etiquetas =========
label_map = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
with open(os.path.join(output_path, "label_mapping.txt"), "w") as f:
    for label, idx in label_map.items():
        f.write(f"{idx}\t{label}\n")

print("✅ Entrenamiento terminado. Modelo guardado en:", output_path)


Index(['resume_text', 'job_description_text', 'label'], dtype='object')


                                                                     