In [1]:
import sys

try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    sys.path.append('/content/drive/MyDrive/ApplAI/')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Usando dispositivo:", device)

Usando dispositivo: cuda


In [3]:
%pip install -y pytorch torchvision torchaudio cpuonly -c pytorch
%pip install sentence-transformers
%pip install skorch


Usage:   
  pip3 install [options] <requirement specifier> [package-index-options] ...
  pip3 install [options] -r <requirements file> [package-index-options] ...
  pip3 install [options] [-e] <vcs project url> ...
  pip3 install [options] [-e] <local project path> ...
  pip3 install [options] <archive url/path> ...

no such option: -y


In [4]:
import pandas as pd

base_path = '/content/drive/MyDrive/ApplAI/'  # adaptá este nombre

# Elegí el que quiera usar
modelUse = "params/model5.py"

modelUsed = base_path+modelUse
df = pd.read_csv(base_path+"dataset/plain_text_resume_data.csv")

In [5]:
from funcs_src import *

config = load_config_with_exec(modelUsed)
param_grid = config["param_grid"]
CV = config["cv"]

In [6]:
from sentence_transformers import SentenceTransformer
from huggingface_hub import login
import os

# Esto descarga y guarda el modelo localmente
model_name = config["model"]
local_path = f'./{model_name}'

if os.path.isdir(local_path):
    # Ya existe: carga desde disco
    model = SentenceTransformer(local_path)
else:
    # No existe: descarga y guarda
    login(token="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")
    model = SentenceTransformer(model_name)
    model.save(local_path)

In [7]:
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.model_selection import train_test_split
from funcs_src import *

In [8]:
train_df, test_df = train_test_split(
    df,
    train_size=0.8,
    test_size=0.2,
    random_state=42,
)


XTrain, YTrain = get_or_create_embeddings(model, train_df, config["typeEmbedding"], prefix="train", folder="/content/drive/MyDrive/ApplAI/Embeddings")
XTest, YTest = get_or_create_embeddings(model, test_df, config["typeEmbedding"], prefix="test", folder="/content/drive/MyDrive/ApplAI/Embeddings")

⚙️ Generando embeddings para train...
✅ Embeddings guardados en: /content/drive/MyDrive/ApplAI/Embeddings/train_X_NormalEmbbedings.pkl
⚙️ Generando embeddings para test...
✅ Embeddings guardados en: /content/drive/MyDrive/ApplAI/Embeddings/test_X_NormalEmbbedings.pkl


In [9]:
import torch
from skorch import NeuralNetRegressor


net = NeuralNetRegressor(
    module=STClassifier,
    module__model_name=config["model"],
    module__model_downloaded=True,
    module__device='cuda' if torch.cuda.is_available() else 'cpu',
    max_epochs=5,
    lr=1e-3,
    batch_size=32,
    iterator_train__shuffle=True,
    train_split=None,
    verbose=1
)

In [10]:
from sklearn.model_selection import GridSearchCV
import numpy as np

# RMSE que utilizaremos
rmse_scorer = make_scorer(
    lambda y_true, y_pred: np.sqrt(mean_squared_error(y_true, y_pred)),
    greater_is_better=False
)

gs = GridSearchCV(
    estimator=net,
    param_grid=param_grid,
    scoring=rmse_scorer,
    cv=3,
    verbose=3,
    n_jobs=1,
    error_score='raise'
)

In [None]:
gs.fit(XTrain, YTrain)

Fitting 3 folds for each of 4 candidates, totalling 12 fits
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m0.0334[0m  0.7878
      2        [36m0.0218[0m  1.1961
      3        [36m0.0197[0m  1.2784
      4        [36m0.0190[0m  1.3131
      5        [36m0.0184[0m  1.2134
      6        [36m0.0179[0m  1.4193
      7        [36m0.0175[0m  0.8368
      8        [36m0.0173[0m  0.7953
      9        [36m0.0170[0m  0.7894
     10        [36m0.0168[0m  0.7731
     11        [36m0.0165[0m  0.7828
     12        [36m0.0165[0m  0.7707
     13        [36m0.0164[0m  0.7782
     14        [36m0.0162[0m  0.7680
     15        0.0162  0.7942
     16        [36m0.0161[0m  0.8452
     17        [36m0.0159[0m  0.7799
     18        [36m0.0157[0m  0.7667
     19        0.0158  0.9515
     20        0.0157  1.1777
     21        [36m0.0156[0m  1.1873
     22        [36m0.0155[0m  0.9723
     23        [36m0.0155[0m  0.7550
     24   

In [None]:
best_rmse = -gs.best_score_
print("Mejor RMSE:", best_rmse)

In [None]:
best_model = gs.best_estimator_

# Hacemos predicciones con el set de test
y_pred = best_model.predict(XTest)

rmse_test = np.sqrt(mean_squared_error(YTest, y_pred))

print(f"RMSE en test: {rmse_test:.4f}")

In [None]:
save_final_model(
    gs,
    config_py_path=modelUsed,
    base_name="finalmodel",
    out_root="/content/drive/MyDrive/ApplAI/modelsSave"
)