# Registro del modelo base de Hugging Face en MLflow
Esta sección descarga el modelo base de Hugging Face y lo registra en MLflow como 'image-to-text-base'.

In [1]:
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
import mlflow
import mlflow.pytorch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Configuración
MODEL_NAME = 'nlpconnect/vit-gpt2-image-captioning'
MLFLOW_MODEL_NAME = 'image-to-text-base'
mlflow.set_tracking_uri('http://localhost:5000')

In [3]:
# Descargar modelo base
print('Descargando modelo base de Hugging Face...')
model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME)
feature_extractor = ViTImageProcessor.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
print('Modelo base descargado.')

Descargando modelo base de Hugging Face...
Modelo base descargado.


In [4]:

# Registrar en MLflow
with mlflow.start_run(run_name='register-base-model') as run:
    print(f'Registrando modelo base en MLflow como {MLFLOW_MODEL_NAME}...')
    mlflow.log_param('huggingface_model', MODEL_NAME)
    mlflow.pytorch.log_model(model, 'model', registered_model_name=MLFLOW_MODEL_NAME)
    print('Modelo base registrado en MLflow.')
    # Guardar tokenizer y feature extractor localmente
    tokenizer.save_pretrained('./results/tokenizer_base')
    feature_extractor.save_pretrained('./results/feature_extractor_base')
    print('Tokenizer y feature extractor guardados.')

Registrando modelo base en MLflow como image-to-text-base...


Successfully registered model 'image-to-text-base'.
2025/07/05 11:16:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: image-to-text-base, version 1


Modelo base registrado en MLflow.
Tokenizer y feature extractor guardados.
🏃 View run register-base-model at: http://localhost:5000/#/experiments/0/runs/239736e6fd9d4a61809a613071c5747d
🧪 View experiment at: http://localhost:5000/#/experiments/0


Created version '1' of model 'image-to-text-base'.


# Cargar el modelo base desde MLflow para fine-tuning
Esta celda carga el modelo base registrado en MLflow para usarlo como punto de partida en el fine-tuning.

In [5]:
import torch
from transformers import ViTImageProcessor, AutoTokenizer

In [6]:

# Nombre del modelo registrado en MLflow
MLFLOW_MODEL_NAME = 'image-to-text-base'

# Cargar modelo desde MLflow
print('Cargando modelo base desde MLflow...')
model = mlflow.pytorch.load_model(f'models:/{MLFLOW_MODEL_NAME}/latest')
print('Modelo base cargado desde MLflow.')


Cargando modelo base desde MLflow...


Downloading artifacts: 100%|██████████| 6/6 [00:29<00:00,  4.88s/it]   

Modelo base cargado desde MLflow.





In [7]:

# Cargar tokenizer y feature extractor locales (guardados previamente)
tokenizer = AutoTokenizer.from_pretrained('./results/tokenizer_base')
feature_extractor = ViTImageProcessor.from_pretrained('./results/feature_extractor_base')
print('Tokenizer y feature extractor cargados.')

Tokenizer y feature extractor cargados.
