In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from accelerate import Accelerator
import torch
import pandas as pd
import gc

def setup_mistral_pipeline():
    """
    Configure the Mistral-Nemo model pipeline with multi-GPU acceleration
    """
    # Configurar Accelerator para usar múltiples GPUs
    # Puedes crear un archivo de configuración o pasar los argumentos directamente
    accelerator = Accelerator(split_batches=True, device_placement=True)
    device = accelerator.device
    print(f"Using device: {device}")

    # Verificar GPUs disponibles
    if torch.cuda.is_available():
        gpu_count = torch.cuda.device_count()
        print(f"Available GPUs: {gpu_count}")
        for i in range(gpu_count):
            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

    # Cargar modelo y tokenizer
    model_name = "mistralai/Mistral-Nemo-Instruct-2407"

    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    print("Loading model...")
    # Configurar para dividir el modelo entre GPUs
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="auto"  # Permite que transformers distribuya el modelo entre las GPUs disponibles
    )

    print(f"Model device map: {model.hf_device_map if hasattr(model, 'hf_device_map') else 'Not available'}")

    # Preparar modelo y tokenizer con Accelerator
    model, tokenizer = accelerator.prepare(model, tokenizer)

    # Crear pipeline de generación de texto
    text_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=200,
        do_sample=True,
        temperature=0.7,
        device_map="auto"  # Usar todas las GPUs disponibles
    )

    return text_pipeline, accelerator

    
def process_batch(pipeline, csv_path, output_path, batch_size=100, mode="text-to-image"):
    """
    Process a batch of images in parallel using both GPUs
    """
    # Cargar datos
    df = pd.read_csv(csv_path)
    
    # Tomar una muestra representativa de cada género
    genres = df['genre'].unique()
    samples_per_genre = batch_size // len(genres)
    
    sample_frames = []
    for genre in genres:
        genre_df = df[df['genre'] == genre]
        samples = min(samples_per_genre, len(genre_df))
        sample_frames.append(genre_df.sample(n=samples, random_state=42))
    
    sample_df = pd.concat(sample_frames).reset_index(drop=True)
    
    # Procesar prompts en lotes para aprovechar ambas GPUs
    batch_size = 4  # Puedes ajustar este valor según la capacidad de tus GPUs
    prompts = []
    
    for i in range(0, len(sample_df), batch_size):
        batch = sample_df.iloc[i:i+batch_size]
        print(f"Processing batch {i//batch_size + 1}/{(len(sample_df) + batch_size - 1)//batch_size}")
        
        batch_inputs = []
        for _, row in batch.iterrows():
            # Preparar los prompts para este lote
            system_prompt =  """
            You are an expert in art and image generation with Stable Diffusion in image-to-image mode.
            Your task is to create a prompt that guides Stable Diffusion to modify an existing image
            while maintaining certain elements but changing others to create interesting variations.
            
            The prompt should:
            1. Specify which elements to preserve (general style, theme)
            2. Indicate which elements to modify (colors, perspective, details)
            3. Suggest concrete but subtle transformations
            4. Maintain the essence of the original genre and artist
            
            Use descriptive and specific language. No more than 75 words.
            """
            
            art_context = f"""
            Genre: {row['genre']}
            Artist: {row['artist']}
            Title: {row['painting_name']}
            Description: {row['description']}
            """
            
            full_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{art_context}<|im_end|>\n<|im_start|>assistant\n"
            batch_inputs.append(full_prompt)
        
        # Generar respuestas para todo el lote en paralelo
        outputs = pipeline(
            batch_inputs,
            return_full_text=False
        )
        
        # Extraer y limpiar las respuestas generadas
        for output in outputs:
            generated_text = output[0]['generated_text']
            if "<|im_end|>" in generated_text:
                generated_text = generated_text.split("<|im_end|>")[0]
            prompts.append(generated_text.strip())
        
        # Liberar memoria cada cierto número de lotes
        if torch.cuda.is_available() and (i + batch_size) % 20 == 0:
            torch.cuda.empty_cache()
            gc.collect()
    
    sample_df['sd_prompt'] = prompts
    
    # Guardar resultados
    sample_df.to_csv(output_path, index=False)
    print(f"Processed {len(sample_df)} prompts in {mode} mode")
    
    return sample_df

def main():
    # Configurar el pipeline del modelo con aceleración multi-GPU
    pipeline, accelerator = setup_mistral_pipeline()
    
    try:

        # Para image-to-image
        print("\n--- Processing Image-to-Image Prompts ---")
        image_to_image_df = process_batch(
            pipeline,
            "dataset.csv", 
            "prompts_image_to_image.csv",
            batch_size=30, 
            mode="image-to-image"
        )
        
    finally:
        # Limpiar memoria de GPU
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            gc.collect()

if __name__ == "__main__":
    main()

Using device: cuda
Available GPUs: 2
GPU 0: NVIDIA GeForce RTX 3090
GPU 1: NVIDIA GeForce RTX 3090
Loading tokenizer...
Loading model...


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Device set to use cuda:0


Model device map: {'model.embed_tokens': 0, 'model.layers.0': 0, 'model.layers.1': 0, 'model.layers.2': 0, 'model.layers.3': 0, 'model.layers.4': 0, 'model.layers.5': 0, 'model.layers.6': 0, 'model.layers.7': 0, 'model.layers.8': 0, 'model.layers.9': 0, 'model.layers.10': 0, 'model.layers.11': 0, 'model.layers.12': 0, 'model.layers.13': 0, 'model.layers.14': 0, 'model.layers.15': 0, 'model.layers.16': 0, 'model.layers.17': 0, 'model.layers.18': 1, 'model.layers.19': 1, 'model.layers.20': 1, 'model.layers.21': 1, 'model.layers.22': 1, 'model.layers.23': 1, 'model.layers.24': 1, 'model.layers.25': 1, 'model.layers.26': 1, 'model.layers.27': 1, 'model.layers.28': 1, 'model.layers.29': 1, 'model.layers.30': 1, 'model.layers.31': 1, 'model.layers.32': 1, 'model.layers.33': 1, 'model.layers.34': 1, 'model.layers.35': 1, 'model.layers.36': 1, 'model.layers.37': 1, 'model.layers.38': 1, 'model.layers.39': 1, 'model.norm': 1, 'model.rotary_emb': 1, 'lm_head': 1}

--- Processing Image-to-Image P

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing batch 1/7


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing batch 2/7


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing batch 3/7


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing batch 4/7


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing batch 5/7


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing batch 6/7


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing batch 7/7


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed 27 prompts in image-to-image mode
