In [None]:
import torch
print(f"GPU disponible: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

GPU disponible: True
GPU: NVIDIA A100-SXM4-80GB
VRAM: 85.2 GB


In [None]:
!pip install transformers datasets accelerate torch --quiet

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
print("Archivos en Drive:")
os.listdir('/content/drive/MyDrive/T5_Training/')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Archivos en Drive:


['synthetic_pairs.jsonl', 'notebookProyecto.ipynb']

In [None]:
import json

data_path = '/content/drive/MyDrive/T5_Training/synthetic_pairs.jsonl'
pairs = []

with open(data_path, 'r', encoding='utf-8') as f:
    for line in f:
        pairs.append(json.loads(line))

print(f"Total pares cargados: {len(pairs)}")
print("Ejemplo:", pairs[0])

Total pares cargados: 21527
Ejemplo: {'texto_tecnico': "Ways to facilitate people follow prescribed medicines Background Patients who are prescribed medicines consume only about half of their doses and many discontinue treatment entirely. Assisting patients to adhere better to medicines could improve their health, and many studies have tested ways to achieve this. Question We updated our review from 2007 to answer the question: What are the findings of high‐quality studies that tested ways to assist patients with adhering to their medicines? Search strategy We retrieved studies published until 11 January 2013. To identify relevant studies we searched six online databases and references in other reviews, and we contacted authors of relevant studies and reviews. Selection criteria We selected studies reporting a randomized controlled trial (RCT) comparing a group receiving an intervention to improve medicine adherence with a group not receiving the intervention. We included trials if the

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import Dataset
import torch

# Cargar T5-BASE
print("Cargando T5-BASE...")
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

print(f"Modelo: {model.__class__.__name__}")
print(f"Parámetros: {model.num_parameters():,}")
print(f"VRAM usado: ~1-2 GB")

train_pairs = [p for p in pairs if p['split'] == 'train']
val_pairs = [p for p in pairs if p['split'] == 'test']

print(f"Train: {len(train_pairs)} pares")
print(f"Val: {len(val_pairs)} pares")

Cargando T5-BASE...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Modelo: T5ForConditionalGeneration
Parámetros: 222,903,552
VRAM usado: ~1-2 GB
Train: 17298 pares
Val: 4229 pares


In [None]:
def tokenize_function(examples):
    """Tokenizar para T5 (encoder-decoder)"""
    inputs = [f"simplify: {text}" for text in examples['texto_tecnico']]

    model_inputs = tokenizer(
        inputs,
        max_length=256,
        padding=True,
        truncation=True,
        return_tensors='pt'
    )

    labels = tokenizer(
        examples['texto_simple'],
        max_length=256,
        padding=True,
        truncation=True,
        return_tensors='pt'
    )

    labels = labels['input_ids']
    labels[labels == tokenizer.pad_token_id] = -100

    model_inputs['labels'] = labels
    return model_inputs

train_dataset = Dataset.from_list(train_pairs)
val_dataset = Dataset.from_list(val_pairs)

train_dataset = train_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=train_dataset.column_names
)

val_dataset = val_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=val_dataset.column_names
)

print("Datos preparados!")

Map:   0%|          | 0/17298 [00:00<?, ? examples/s]

Map:   0%|          | 0/4229 [00:00<?, ? examples/s]

Datos preparados!


In [None]:
from transformers import TrainingArguments, Trainer
import os

os.environ['WANDB_DISABLED'] = 'true'

training_args = TrainingArguments(
    output_dir='./t5_generator',
    per_device_train_batch_size=16,  # GRANDE (A100)
    per_device_eval_batch_size=32,
    gradient_accumulation_steps=2,  # Batch efectivo = 32
    num_train_epochs=3,
    learning_rate=5e-5,
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps=100,
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    fp16=True,
    dataloader_pin_memory=True,
    dataloader_num_workers=4,
    report_to=None,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

print("Listo para A100!")

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Listo para A100!


In [None]:
print("=== ENTRENANDO EN A100 ===")
print(f"GPU: {torch.cuda.get_device_name(0)}")
print("Estimado: ~25-30 minutos")
print("\nIniciando...")

trainer.train()

print("\n¡Completado!")

=== ENTRENANDO EN A100 ===
GPU: NVIDIA A100-SXM4-80GB
Estimado: ~25-30 minutos

Iniciando...


Step,Training Loss,Validation Loss
500,0.1601,0.134267
1000,0.125,0.12118
1500,0.1215,0.118248


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].



¡Completado!


In [None]:
# Guardar
trainer.save_model('./t5_generator/model')
tokenizer.save_pretrained('./t5_generator/tokenizer')

import shutil
shutil.copytree('./t5_generator', '/content/drive/MyDrive/t5_generator', dirs_exist_ok=True)
print("Guardado en Drive!")

# Probar
model = T5ForConditionalGeneration.from_pretrained('./t5_generator/model')
tokenizer = T5Tokenizer.from_pretrained('./t5_generator/tokenizer')

text = "simplify: The patient, a 54-year-old female with a past medical history significant for hypertension controlled with losartan, type 2 diabetes mellitus on metformin, and mixed dyslipidemia, presented with a 48-hour history of febrile syndrome, malaise, right upper quadrant abdominal pain with colicky characteristics, nausea, and a single episode of vomiting. On admission, she was tachycardic (HR 112 bpm), hypotensive (BP 95/60 mmHg), febrile (38.9 °C), and mildly icteric, with right upper quadrant tenderness and hepatomegaly palpable 3 cm below the costal margin. Laboratory workup revealed leukocytosis with marked neutrophilia, elevated acute-phase reactants (CRP 185 mg/L, procalcitonin 12 ng/mL), cholestatic pattern of liver enzyme elevation (AST/ALT 165/178 U/L, ALP 410 U/L, GGT 690 U/L), and hyperbilirubinemia (total 7.8 mg/dL, direct 6.1 mg/dL). Abdominal ultrasound demonstrated gallbladder wall thickening, biliary sludge, and intrahepatic ductal dilatation, consistent with acute calculous cholecystitis complicated by ascending cholangitis. Empiric broad-spectrum IV antibiotic therapy with piperacillin–tazobactam was initiated alongside goal-directed fluid resuscitation and vasopressor support with norepinephrine for septic shock (lactate 3.8 mmol/L, SOFA 7). Endoscopic retrograde cholangiopancreatography (ERCP) with stone extraction and placement of a 7 Fr plastic stent was performed, achieving biliary decompression and hemodynamic stabilization. The patient exhibited progressive clinical improvement with defervescence by day 3 and normalization of liver function tests, completing a 7-day antibiotic course before planning for interval laparoscopic cholecystectomy."

inputs = tokenizer(text, return_tensors='pt', max_length=256, truncation=True)

with torch.no_grad():
    outputs = model.generate(
        inputs['input_ids'],
        max_length=128,
        num_beams=4,
        early_stopping=True
    )

generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Generado:", generated)

Guardado en Drive!
Generado: The patient, a 54-year-old female with a past medical history significant for high blood pressure controlled with losartan, type 2 diabetes on metformin, and mixed dyslipidemia, presented with a 48-hour history of febrile syndrome, malaise, right upper quadrant abdominal pain with colicky characteristics, nausea, and a single episode of vomiting. On admission, she was tachycardic (HR 112 bpm), hypotensive (BP 95/60 mmHg), febrile (38.9 °C), and mildly


VERSION 2222

In [None]:

import torch
import numpy as np
from pathlib import Path
import json
import pandas as pd
from tqdm import tqdm
import warnings
from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from torch.utils.data import Dataset, DataLoader
import os

warnings.filterwarnings('ignore')

In [None]:
print("=" * 80)
print("CONFIGURACIÓN DE ENTRENAMIENTO T5")
print("=" * 80)

# Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')
print("\nDrive montado exitosamente!")

# Verificar GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\nDevice: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memoria GPU: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
    print(f"Compute Capability: {torch.cuda.get_device_properties(0).major}.{torch.cuda.get_device_properties(0).minor}")

# Configuración optimizada para A100
config = {
    'model_name': 't5-base',  # t5-base (220M) o t5-large (770M)
    'batch_size': 32,  # Aumentado para A100
    'gradient_accumulation_steps': 2,  # Batch efectivo: 64
    'learning_rate': 3e-5,
    'weight_decay': 0.01,
    'epochs': 5,
    'warmup_ratio': 0.1,
    'max_length_source': 512,  # Tokens entrada
    'max_length_target': 256,  # Tokens salida
    'save_steps': 500,
    'eval_steps': 250,
    'logging_steps': 50,
    'fp16': True,  # Mixed precision para A100
    'bf16': False,  # Si A100 soporta bf16, usar esto es mejor
    'dataloader_num_workers': 4,
    'dataloader_pin_memory': True,
    'gradient_checkpointing': True,  # Ahorra memoria
    'save_total_limit': 3,
    'load_best_model_at_end': True,
    'metric_for_best_model': 'eval_loss',
    'greater_is_better': False,
    'early_stopping_patience': 3,
    'seed': 42,
    'output_dir': '/content/models/t5_generator',
    'data_file': '/content/drive/MyDrive/T5_Training/synthetic_pairs.jsonl'  # Ruta desde Drive
}

print(f"\nConfiguración:")
for k, v in config.items():
    print(f"  {k}: {v}")


CONFIGURACIÓN DE ENTRENAMIENTO T5
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Drive montado exitosamente!

Device: cuda
GPU: NVIDIA A100-SXM4-80GB
Memoria GPU: 85.2 GB
Compute Capability: 8.0

Configuración:
  model_name: t5-base
  batch_size: 32
  gradient_accumulation_steps: 2
  learning_rate: 3e-05
  weight_decay: 0.01
  epochs: 5
  warmup_ratio: 0.1
  max_length_source: 512
  max_length_target: 256
  save_steps: 500
  eval_steps: 250
  logging_steps: 50
  fp16: True
  bf16: False
  dataloader_num_workers: 4
  dataloader_pin_memory: True
  gradient_checkpointing: True
  save_total_limit: 3
  load_best_model_at_end: True
  metric_for_best_model: eval_loss
  greater_is_better: False
  early_stopping_patience: 3
  seed: 42
  output_dir: /content/models/t5_generator
  data_file: /content/drive/MyDrive/T5_Training/synthetic_pairs.jsonl


In [None]:
print("\n" + "=" * 80)
print("CARGANDO DATOS")
print("=" * 80)

def load_synthetic_pairs(data_path: str):
    """Cargar pares sintéticos desde JSONL"""
    pairs = []
    with open(data_path, 'r', encoding='utf-8') as f:
        for line in f:
            pairs.append(json.loads(line))
    return pairs

# Cargar datos desde Drive
train_pairs = load_synthetic_pairs(config['data_file'])

# Dividir en train/eval (80/20)
np.random.seed(config['seed'])
shuffled_indices = np.random.permutation(len(train_pairs))
split_idx = int(len(train_pairs) * 0.8)

train_data = [train_pairs[i] for i in shuffled_indices[:split_idx]]
eval_data = [train_pairs[i] for i in shuffled_indices[split_idx:]]

print(f"Total pares: {len(train_pairs)}")
print(f"Train: {len(train_data)}")
print(f"Eval: {len(eval_data)}")
print(f"\nEjemplo train:")
print(f"  Técnico: {train_data[0]['texto_tecnico'][:100]}...")
print(f"  Simple: {train_data[0]['texto_simple'][:100]}...")


CARGANDO DATOS
Total pares: 21527
Train: 17221
Eval: 4306

Ejemplo train:
  Técnico: the research team often do not have precise information about side effects and complications. This i...
  Simple: We often do not have precise information about side effects and complications. This is particularly ...


In [None]:
print("\n" + "=" * 80)
print("PREPARANDO DATASET")
print("=" * 80)

class PLSDataset(Dataset):
    """Dataset para pares PLS (técnico → simple)"""

    def __init__(self, pairs, tokenizer, max_length_source, max_length_target):
        self.pairs = pairs
        self.tokenizer = tokenizer
        self.max_length_source = max_length_source
        self.max_length_target = max_length_target

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        pair = self.pairs[idx]
        source = pair.get('input_text', pair['texto_tecnico'])  # Usar input_text si existe (con prefijo)
        target = pair['texto_simple']

        # Tokenizar
        source_encoded = self.tokenizer(
            source,
            max_length=self.max_length_source,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        target_encoded = self.tokenizer(
            target,
            max_length=self.max_length_target,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': source_encoded['input_ids'].squeeze(),
            'attention_mask': source_encoded['attention_mask'].squeeze(),
            'labels': target_encoded['input_ids'].squeeze()
        }

# Inicializar tokenizer
tokenizer = T5Tokenizer.from_pretrained(config['model_name'])
print(f"Tokenizer vocab size: {tokenizer.vocab_size}")

# Crear Preset
prepend_text = "simplify: "  # Prefijo para la tarea

# Agregar prefijo a todos los textos técnicos (no modificar el original)
for pair in train_data:
    pair['input_text'] = prepend_text + pair['texto_tecnico']
for pair in eval_data:
    pair['input_text'] = prepend_text + pair['texto_tecnico']

# Crear datasets
train_dataset = PLSDataset(train_data, tokenizer, config['max_length_source'], config['max_length_target'])
eval_dataset = PLSDataset(eval_data, tokenizer, config['max_length_source'], config['max_length_target'])

print(f"Train dataset: {len(train_dataset)} ejemplos")
print(f"Eval dataset: {len(eval_dataset)} ejemplos")


PREPARANDO DATASET


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Tokenizer vocab size: 32000
Train dataset: 17221 ejemplos
Eval dataset: 4306 ejemplos


In [None]:
print("\n" + "=" * 80)
print("CARGANDO MODELO T5")
print("=" * 80)

model = T5ForConditionalGeneration.from_pretrained(config['model_name'])

# Configuración optimizada para A100
model.config.gradient_checkpointing = True
model.gradient_checkpointing_enable()  # Ahorra ~40% memoria

print(f"Modelo: {config['model_name']}")
print(f"Parámetros totales: {sum(p.numel() for p in model.parameters()):,}")
print(f"Parámetros entrenables: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

# Mover a GPU
model = model.to(device)
print(f"Modelo cargado en {device}")


CARGANDO MODELO T5


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Modelo: t5-base
Parámetros totales: 222,903,552
Parámetros entrenables: 222,903,552
Modelo cargado en cuda


In [None]:
print("\n" + "=" * 80)
print("CONFIGURANDO ARGUMENTOS DE ENTRENAMIENTO")
print("=" * 80)

# Calcular steps totales
total_steps = len(train_dataset) // (config['batch_size'] * config['gradient_accumulation_steps']) * config['epochs']
warmup_steps = int(total_steps * config['warmup_ratio'])

print(f"Steps totales: {total_steps}")
print(f"Warmup steps: {warmup_steps}")

# Training arguments optimizado para A100
training_args = TrainingArguments(
    output_dir=config['output_dir'],
    num_train_epochs=config['epochs'],
    per_device_train_batch_size=config['batch_size'],
    per_device_eval_batch_size=config['batch_size'] * 2,  # Eval puede ser más grande
    gradient_accumulation_steps=config['gradient_accumulation_steps'],
    learning_rate=config['learning_rate'],
    weight_decay=config['weight_decay'],
    warmup_ratio=config['warmup_ratio'],

    # Optimización para A100
    fp16=config['fp16'],
    bf16=config['bf16'],
    dataloader_num_workers=config['dataloader_num_workers'],
    dataloader_pin_memory=config['dataloader_pin_memory'],
    gradient_checkpointing=config['gradient_checkpointing'],

    # Logging y checkpoints
    logging_strategy='steps',
    logging_steps=config['logging_steps'],
    eval_strategy='steps',
    eval_steps=config['eval_steps'],
    save_strategy='steps',
    save_steps=config['save_steps'],
    save_total_limit=config['save_total_limit'],

    # Early stopping
    load_best_model_at_end=config['load_best_model_at_end'],
    metric_for_best_model=config['metric_for_best_model'],
    greater_is_better=config['greater_is_better'],

    # Reproducibilidad
    seed=config['seed'],

    # Reportes
    report_to='tensorboard',  # Logging a TensorBoard
    logging_dir=f"{config['output_dir']}/logs",

    # Otros optimizadores
    remove_unused_columns=False,
    prediction_loss_only=True,
)

print("Training arguments configurados")



CONFIGURANDO ARGUMENTOS DE ENTRENAMIENTO
Steps totales: 1345
Warmup steps: 134
Training arguments configurados


In [None]:
print("\n" + "=" * 80)
print("CONFIGURANDO MÉTRICAS")
print("=" * 80)

def compute_metrics(eval_pred):
    """Calcular métricas durante evaluación"""
    predictions, labels = eval_pred

    # Decodificar predictions
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Calcular ROUGE (simple)
    rouge_scores = []
    for pred, label in zip(decoded_preds, decoded_labels):
        # Overlap simple
        pred_tokens = set(pred.lower().split())
        label_tokens = set(label.lower().split())
        if len(label_tokens) > 0:
            overlap = len(pred_tokens & label_tokens) / len(label_tokens)
            rouge_scores.append(overlap)

    rouge_avg = np.mean(rouge_scores) if rouge_scores else 0.0

    return {
        'rouge_overlap': rouge_avg
    }

print("Métricas configuradas")


CONFIGURANDO MÉTRICAS
Métricas configuradas


In [None]:
print("\n" + "=" * 80)
print("INICIANDO ENTRENAMIENTO")
print("=" * 80)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=config['early_stopping_patience'])]
)

# Entrenar
train_result = trainer.train()

print("\n" + "=" * 80)
print("ENTRENAMIENTO COMPLETADO")
print("=" * 80)
print(f"Loss final: {train_result.training_loss:.4f}")
print(f"Steps totales: {train_result.global_step}")


INICIANDO ENTRENAMIENTO


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
250,0.0647,0.040417
500,0.0376,0.024972
750,0.0439,0.022746
1000,0.047,0.022717
1250,0.0471,0.022506


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].



ENTRENAMIENTO COMPLETADO
Loss final: 0.5952
Steps totales: 1350


In [None]:
print("\n" + "=" * 80)
print("EVALUACIÓN FINAL")
print("=" * 80)

eval_results = trainer.evaluate()
print(f"Eval Loss: {eval_results['eval_loss']:.4f}")


EVALUACIÓN FINAL


Eval Loss: 0.0227


In [None]:
print("\n" + "=" * 80)
print("GUARDANDO MODELO FINAL")
print("=" * 80)

# Guardar modelo
model.save_pretrained(f"{config['output_dir']}/final_model")
tokenizer.save_pretrained(f"{config['output_dir']}/final_tokenizer")

print(f"Modelo guardado en: {config['output_dir']}/final_model")
print(f"Tokenizer guardado en: {config['output_dir']}/final_tokenizer")

# Guardar métricas
metrics = {
    'training_loss': train_result.training_loss,
    'eval_loss': eval_results['eval_loss'],
    'total_steps': train_result.global_step,
    'epochs': config['epochs'],
    'model_name': config['model_name'],
    'batch_size': config['batch_size'],
    'gradient_accumulation_steps': config['gradient_accumulation_steps']
}

with open(f"{config['output_dir']}/final_model/metrics.json", 'w') as f:
    json.dump(metrics, f, indent=2)

print("Métricas guardadas en metrics.json")


GUARDANDO MODELO FINAL
Modelo guardado en: /content/models/t5_generator/final_model
Tokenizer guardado en: /content/models/t5_generator/final_tokenizer
Métricas guardadas en metrics.json


In [None]:
print("\n" + "=" * 80)
print("COPIANDO MODELO A GOOGLE DRIVE")
print("=" * 80)

import shutil
from pathlib import Path

# Crear directorio en Drive
drive_model_dir = '/content/drive/MyDrive/T5_Training/models/final_model'
Path(drive_model_dir).mkdir(parents=True, exist_ok=True)

print(f"\nCopiando modelo a: {drive_model_dir}")

# Copiar modelo
shutil.copytree(
    f"{config['output_dir']}/final_model",
    f"{drive_model_dir}/model",
    dirs_exist_ok=True
)

# Copiar tokenizer
shutil.copytree(
    f"{config['output_dir']}/final_tokenizer",
    f"{drive_model_dir}/tokenizer",
    dirs_exist_ok=True
)

# Copiar métricas
shutil.copy(
    f"{config['output_dir']}/final_model/metrics.json",
    f"{drive_model_dir}/metrics.json"
)

print("✅ Modelo copiado exitosamente a Google Drive!")
print(f"\nUbicación en Drive:")
print(f"  {drive_model_dir}/")
print(f"  - model/ (modelo)")
print(f"  - tokenizer/ (tokenizer)")
print(f"  - metrics.json (métricas)")



COPIANDO MODELO A GOOGLE DRIVE

Copiando modelo a: /content/drive/MyDrive/T5_Training/models/final_model
✅ Modelo copiado exitosamente a Google Drive!

Ubicación en Drive:
  /content/drive/MyDrive/T5_Training/models/final_model/
  - model/ (modelo)
  - tokenizer/ (tokenizer)
  - metrics.json (métricas)
