In [None]:
# Uninstall existing bitsandbytes and install the latest version
!pip uninstall -y bitsandbytes
!pip install -U bitsandbytes

# Install other required packages
!pip install -U transformers accelerate sentencepiece

Found existing installation: bitsandbytes 0.45.4
Uninstalling bitsandbytes-0.45.4:
  Successfully uninstalled bitsandbytes-0.45.4
Collecting bitsandbytes
  Using cached bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Using cached bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl (76.0 MB)
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.4


In [None]:
# Install necessary packages
!pip install pandas numpy torch transformers tqdm
!pip install codecarbon sentence-transformers datasets
!pip install peft bitsandbytes accelerate scikit-learn



In [None]:

import os
import pandas as pd
import numpy as np
import torch
import time
import gc
import json
import logging
from tqdm import tqdm
from datetime import datetime
from codecarbon import EmissionsTracker
from torch.utils.data import DataLoader
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)
from peft import (
    LoraConfig,
    TaskType,
    get_peft_model,
    prepare_model_for_kbit_training
)
from sentence_transformers import SentenceTransformer, util
from datasets import Dataset as HFDataset
from google.colab import drive

In [None]:

# Mount Google Drive to access our dataset
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
DATA_PATH = "/content/drive/MyDrive/DM_Green_AI/Mistral-7B_Energy_Consumption.csv"
OUTPUT_DIR = "/content/drive/MyDrive/rl_models"
BATCH_SIZE = 8
MAX_PROMPT_LENGTH = 512
MAX_RESPONSE_LENGTH = 512
ENERGY_WEIGHT = 0.3
QUALITY_WEIGHT = 0.7
NUM_PPO_EPOCHS = 4
TRAIN_TEST_SPLIT = 0.8
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"


In [None]:

# === SETUP & LOGGING ===
def setup_logging():
    """Initialize logging configuration"""
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger(__name__)
    logging.getLogger("codecarbon").setLevel(logging.ERROR)
    return logger

# === BLOCK 1: DATA HANDLING ===
def load_data(data_path, logger=None):
    """Load data from CSV file with debugging output"""
    if logger: logger.info(f"Loading data from {data_path}")

    df = pd.read_csv(data_path)
    df = df.dropna(subset=['Prompt', 'Mistral_Response', 'Energy_Consumption_kWh'])

    # Debug output
    print("\n=== DATA PREVIEW ===")
    print(f"Total samples: {len(df)}")
    print(f"Columns: {df.columns.tolist()}")
    print("\nFirst 3 rows:")
    print(df.head(3))
    print("\nData statistics:")
    print(df.describe())

    return df

def prepare_data_splits(df, train_test_split=0.8, logger=None):
    """Split data into train/test sets and calculate statistics"""
    # Calculate statistics
    stats = {
        'mean_energy': df['Energy_Consumption_kWh'].mean(),
        'std_energy': df['Energy_Consumption_kWh'].std(),
        'mean_response_token_length': df['Response_Token_length'].mean() if 'Response_Token_length' in df.columns else 0,
        'mean_response_duration': df['Response_Total_duration'].mean() if 'Response_Total_duration' in df.columns else 0
    }

    df['Energy_Normalized'] = (df['Energy_Consumption_kWh'] - stats['mean_energy']) / stats['std_energy']

    # Split into train/test
    train_size = int(len(df) * train_test_split)
    train_df = df.iloc[:train_size].reset_index(drop=True)
    test_df = df.iloc[train_size:].reset_index(drop=True)

    if logger: logger.info(f"Training samples: {len(train_df)}, Test samples: {len(test_df)}")

    # Debug output
    print("\n=== DATA SPLITS ===")
    print(f"Training set: {len(train_df)} samples")
    print(f"Test set: {len(test_df)} samples")
    print(f"Energy consumption stats - Mean: {stats['mean_energy']:.6f}, Std: {stats['std_energy']:.6f}")

    return train_df, test_df, stats

def tokenize_sample(sample, tokenizer, max_prompt_length):
    """Tokenize a single sample"""
    prompt = sample['Prompt']
    formatted_prompt = f"<s>[INST] {prompt} [/INST]"

    tokens = tokenizer(
        formatted_prompt,
        max_length=max_prompt_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )

    return tokens['input_ids'].squeeze(), tokens['attention_mask'].squeeze()
def prepare_dataloader(df, tokenizer, batch_size, max_prompt_length, logger=None):
    """Create dataloader with tensor batching"""
    if logger: logger.info(f"Preparing dataloader with batch size {batch_size}")

    dataset_dict = {
        'input_ids': [],
        'attention_mask': [],
        'prompt': [],
        'benchmark_text': [],
        'energy_consumption': [],
        'cosine_similarity': []
    }

    # Process each sample
    for i in range(len(df)):
        sample = df.iloc[i]
        input_ids, attention_mask = tokenize_sample(sample, tokenizer, max_prompt_length)

        # Convert to tensors and move to device BEFORE adding to dataset_dict
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        dataset_dict['input_ids'].append(input_ids.to(device))
        dataset_dict['attention_mask'].append(attention_mask.to(device))
        dataset_dict['prompt'].append(sample['Prompt'])
        dataset_dict['benchmark_text'].append(sample['Benchmark_text'] if 'Benchmark_text' in sample else "")
        dataset_dict['energy_consumption'].append(sample['Energy_Consumption_kWh'])
        dataset_dict['cosine_similarity'].append(sample['cosine'] if 'cosine' in sample and not pd.isna(sample['cosine']) else 0.0)

    # Create HuggingFace dataset
    dataset = HFDataset.from_dict(dataset_dict)

    # Define collate function to properly batch tensors
    def collate_fn(batch):
        # Print types of elements before stacking
        return {
            'input_ids': torch.tensor([sample['input_ids'] for sample in batch]),
            'attention_mask': torch.tensor([sample['attention_mask'] for sample in batch]),
            'prompt': [sample['prompt'] for sample in batch],
            'benchmark_text': [sample['benchmark_text'] for sample in batch],
            'energy_consumption': [sample['energy_consumption'] for sample in batch],
            'cosine_similarity': [sample['cosine_similarity'] for sample in batch]
        }

    # Create dataloader
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        collate_fn=collate_fn,
        shuffle=True
    )

    # ... (Debug output)

    return dataloader


In [None]:

# === BLOCK 2: MODEL INITIALIZATION ===
def init_models(model_name, logger=None):
    """Initialize models and tokenizer with debugging output"""
    if logger: logger.info(f"Initializing models from {model_name}")

    # Initialize tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # Clear CUDA cache
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()

    # Load quantized policy model
    if logger: logger.info("Loading policy model with quantization...")
    try:
        # Try 4-bit quantization
        quant_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True
        )
        policy_model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=quant_config,
            device_map="auto",
            low_cpu_mem_usage=True
        )
        policy_model = prepare_model_for_kbit_training(policy_model)

    except Exception as e:
        if logger: logger.warning(f"4-bit loading failed: {e}")
        # Fall back to 8-bit
        quant_config = BitsAndBytesConfig(load_in_8bit=True)
        policy_model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=quant_config,
            device_map="auto",
            low_cpu_mem_usage=True
        )
        policy_model = prepare_model_for_kbit_training(policy_model)

    # Apply LoRA configuration
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
        bias="none",
        task_type=TaskType.CAUSAL_LM
    )
    policy_model = get_peft_model(policy_model, lora_config)

    # Load reference model (without LoRA)
    if logger: logger.info("Loading reference model...")
    reference_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=quant_config,
        device_map="auto",
        low_cpu_mem_usage=True
    )

    # Debug output
    print("\n=== MODEL INITIALIZATION ===")
    print(f"Policy model type: {type(policy_model).__name__}")
    print(f"Model device: {next(policy_model.parameters()).device}")
    print(f"Tokenizer vocab size: {len(tokenizer)}")

    # Test tokenization and generation
    test_text = "Hello, world!"
    test_tokens = tokenizer(test_text, return_tensors="pt").to(policy_model.device)
    print(f"Test tokenization - Input: '{test_text}', Token count: {len(test_tokens.input_ids[0])}")

    return policy_model, reference_model, tokenizer


In [None]:

# === BLOCK 3: ENERGY TRACKING ===
def measure_generation_energy(model, tokenizer, prompt, max_new_tokens=128):
    """Generate text and measure energy consumption with debugging output"""
    # Format and tokenize prompt
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Start energy tracking
    tracker = EmissionsTracker(measure_power_secs=1, log_level="error")
    tracker.start()
    start_time = time.time()

    # Generate text
    with torch.no_grad():
        outputs = model.generate(
            inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_new_tokens=max_new_tokens,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )

    # Stop energy tracking
    emissions = tracker.stop()
    duration = time.time() - start_time

    # Decode output
    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=False)
    response = decoded_output.split('[/INST]', 1)[1].strip() if '[/INST]' in decoded_output else decoded_output.strip()

    # Calculate token statistics
    input_tokens = len(inputs.input_ids[0])
    output_tokens = len(outputs[0]) - input_tokens
    tokens_per_kwh = output_tokens / (emissions if emissions > 0 else 1e-10)

    # Debug output
    print("\n=== GENERATION RESULTS ===")
    print(f"Input prompt: {prompt[:50]}...")
    print(f"Response preview: {response[:50]}...")
    print(f"Energy consumption: {emissions:.6f} kWh")
    print(f"Generation time: {duration:.2f} seconds")
    print(f"Output tokens: {output_tokens}")
    print(f"Efficiency: {tokens_per_kwh:.2f} tokens/kWh")

    return {
        "response": response,
        "energy_consumption": emissions,
        "duration": duration,
        "input_tokens": input_tokens,
        "output_tokens": output_tokens,
        "tokens_per_kwh": tokens_per_kwh
    }


In [None]:

# === BLOCK 4: REWARD CALCULATION ===
def calculate_reward(generation_info, reference_text, energy_stats, sentence_model=None):
    """Calculate reward based on energy efficiency and semantic quality"""
    # Energy component (normalized and inverted so lower consumption is better)
    energy_value = generation_info["energy_consumption"]
    if energy_stats['max'] == energy_stats['min']:
        energy_score = 0.5  # Default if no range
    else:
        energy_score = (energy_stats['max'] - energy_value) / (energy_stats['max'] - energy_stats['min'])
        energy_score = max(0, min(1, energy_score))  # Clamp to 0-1

    # Quality component - semantic similarity
    # Check if 'cosine_similarity' is present and not None before comparison
    if "cosine_similarity" in generation_info and generation_info["cosine_similarity"] is not None and generation_info["cosine_similarity"] > 0:
        quality_score = generation_info["cosine_similarity"]
    elif sentence_model and reference_text and generation_info["response"]:
        try:
            response_embedding = sentence_model.encode(generation_info["response"], convert_to_tensor=True)
            reference_embedding = sentence_model.encode(reference_text, convert_to_tensor=True)
            quality_score = float(util.pytorch_cos_sim(response_embedding, reference_embedding).item())
        except Exception as e:
            print(f"Error calculating similarity: {e}")
            quality_score = 0.0
    else:
        quality_score = 0.0

    # Combined reward
    total_reward = (ENERGY_WEIGHT * energy_score) + (QUALITY_WEIGHT * quality_score)

    # Debug output
    print("\n=== REWARD CALCULATION ===")
    print(f"Energy score: {energy_score:.4f}")
    print(f"Quality score: {quality_score:.4f}")
    print(f"Combined reward: {total_reward:.4f}")

    return {
        "total_reward": total_reward,
        "energy_score": energy_score,
        "quality_score": quality_score,
        "energy_consumption": energy_value
    }

In [None]:

# === BLOCK 5: PROMPT OPTIMIZATION ===
def test_prompt_optimization(original_prompt, reference_text, model, tokenizer, energy_stats, sentence_model, logger=None):
    """Test prompt optimization on a sample prompt with debugging output"""
    if logger: logger.info(f"Testing prompt optimization")

    optimization_prompt = f"""
    <s>[INST] I'll provide you with an original prompt. Rewrite this prompt to make it more efficient,
    clear, and concise while preserving the core information request. The goal is to generate a prompt
    that will result in a more energy-efficient response.

    Original prompt: "{original_prompt}"

    Return only the rewritten prompt with no additional text. [/INST]
    """

    # Generate optimized prompt
    optimization_result = measure_generation_energy(
        model,
        tokenizer,
        optimization_prompt,
        max_new_tokens=128
    )

    optimized_prompt = optimization_result["response"].strip()

    # Test original prompt
    print("\n=== TESTING ORIGINAL PROMPT ===")
    original_formatted = f"<s>[INST] {original_prompt} [/INST]"
    original_result = measure_generation_energy(
        model,
        tokenizer,
        original_formatted,
        max_new_tokens=256
    )

    # Test optimized prompt
    print("\n=== TESTING OPTIMIZED PROMPT ===")
    optimized_formatted = f"<s>[INST] {optimized_prompt} [/INST]"
    optimized_result = measure_generation_energy(
        model,
        tokenizer,
        optimized_formatted,
        max_new_tokens=256
    )

    # Calculate rewards
    original_reward = calculate_reward(
        original_result,
        reference_text,
        energy_stats,
        sentence_model
    )

    optimized_reward = calculate_reward(
        optimized_result,
        reference_text,
        energy_stats,
        sentence_model
    )

    # Calculate energy savings
    energy_savings = original_result["energy_consumption"] - optimized_result["energy_consumption"]
    savings_percent = (1 - (optimized_result["energy_consumption"] / original_result["energy_consumption"])) * 100 if original_result["energy_consumption"] > 0 else 0

    # Debug output
    print("\n=== OPTIMIZATION RESULTS ===")
    print(f"Original prompt ({len(original_prompt)} chars): {original_prompt[:100]}...")
    print(f"Optimized prompt ({len(optimized_prompt)} chars): {optimized_prompt[:100]}...")
    print(f"Energy savings: {energy_savings:.6f} kWh ({savings_percent:.2f}%)")
    print(f"Original reward: {original_reward['total_reward']:.4f}")
    print(f"Optimized reward: {optimized_reward['total_reward']:.4f}")
    print(f"Reward improvement: {optimized_reward['total_reward'] - original_reward['total_reward']:.4f}")

    return {
        "original_prompt": original_prompt,
        "optimized_prompt": optimized_prompt,
        "original_result": original_result,
        "optimized_result": optimized_result,
        "original_reward": original_reward,
        "optimized_reward": optimized_reward,
        "energy_savings": energy_savings,
        "energy_savings_percent": savings_percent,
        "reward_improvement": optimized_reward["total_reward"] - original_reward["total_reward"]
    }

# === BLOCK 6: PPO TRAINING LOOP ===
def run_ppo_epoch(policy_model, reference_model, dataloader, tokenizer, energy_stats, sentence_model, epoch, logger=None):
    """Run a single PPO training epoch with debugging output"""
    if logger: logger.info(f"Running PPO epoch {epoch}")

    epoch_rewards = []
    epoch_energy = []
    epoch_quality = []
    epoch_kl = []

    for batch_idx, batch in enumerate(tqdm(dataloader, desc=f"PPO Epoch {epoch}")):
        # Move tensors to device
        input_ids = batch['input_ids'].to(policy_model.device)
        attention_mask = batch['attention_mask'].to(policy_model.device)

        # Generate responses with policy model
        with torch.no_grad():
            policy_outputs = policy_model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=128,
                do_sample=True,
                temperature=0.7,
                return_dict_in_generate=True,
                output_scores=True
            )

        policy_responses = tokenizer.batch_decode(
            policy_outputs.sequences, skip_special_tokens=True
        )

        # Generate with reference model for KL penalty
        with torch.no_grad():
            ref_outputs = reference_model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=128,
                do_sample=True,
                temperature=0.7,
                return_dict_in_generate=True,
                output_scores=True
            )

        ref_responses = tokenizer.batch_decode(
            ref_outputs.sequences, skip_special_tokens=True
        )

        # Calculate rewards
        batch_rewards = []
        batch_energy = []
        batch_quality = []

        for i in range(len(policy_responses)):
            # Track energy for policy generation
            tracker = EmissionsTracker(measure_power_secs=1, log_level="error")
            tracker.start()
            tokenizer.encode(policy_responses[i])  # Simulate inference
            emissions = tracker.stop()

            # Calculate reward
            generation_info = {
                "response": policy_responses[i],
                "energy_consumption": emissions,
                "cosine_similarity": batch['cosine_similarity'][i] if batch['cosine_similarity'][i] > 0 else None
            }

            reward_info = calculate_reward(
                generation_info,
                batch['benchmark_text'][i],
                energy_stats,
                sentence_model
            )

            batch_rewards.append(reward_info["total_reward"])
            batch_energy.append(reward_info["energy_score"])
            batch_quality.append(reward_info["quality_score"])

        # Calculate KL divergence (simplified)
        kl_divergence = 0.0
        for i in range(len(policy_responses)):
            policy_text = policy_responses[i]
            ref_text = ref_responses[i]
            length_diff = abs(len(policy_text) - len(ref_text)) / max(len(policy_text), len(ref_text))
            kl_divergence += length_diff

        avg_kl = kl_divergence / len(policy_responses) if policy_responses else 0
        epoch_kl.append(avg_kl)

        # Add batch metrics to epoch tracking
        epoch_rewards.extend(batch_rewards)
        epoch_energy.extend(batch_energy)
        epoch_quality.extend(batch_quality)

        # Debug output (every few batches)
        if batch_idx % 5 == 0 and batch_rewards:
            print(f"\n=== BATCH {batch_idx} METRICS ===")
            print(f"Avg Reward: {np.mean(batch_rewards):.4f}")
            print(f"Avg Energy Score: {np.mean(batch_energy):.4f}")
            print(f"Avg Quality Score: {np.mean(batch_quality):.4f}")
            print(f"KL Divergence: {avg_kl:.4f}")

        # Clear GPU memory
        torch.cuda.empty_cache()
        gc.collect()

    # Compute epoch metrics
    epoch_metrics = {
        "avg_reward": np.mean(epoch_rewards) if epoch_rewards else 0,
        "avg_energy_score": np.mean(epoch_energy) if epoch_energy else 0,
        "avg_quality_score": np.mean(epoch_quality) if epoch_quality else 0,
        "avg_kl_divergence": np.mean(epoch_kl) if epoch_kl else 0
    }

    # Debug output
    print("\n=== EPOCH SUMMARY ===")
    for key, value in epoch_metrics.items():
        print(f"{key}: {value:.4f}")

    return epoch_metrics


In [None]:

# === BLOCK 7: EVALUATION ===
def evaluate_samples(model, tokenizer, test_df, energy_stats, sentence_model, num_samples=10, logger=None):
    """Evaluate model on sample test data with debugging output"""
    if logger: logger.info(f"Evaluating model on {num_samples} test samples")

    results = []

    # Use a smaller subset for evaluation
    eval_samples = min(num_samples, len(test_df))
    sample_indices = np.random.choice(len(test_df), size=eval_samples, replace=False)

    for idx in sample_indices:
        row = test_df.iloc[idx]
        prompt = row['Prompt']
        benchmark_text = row['Benchmark_text'] if 'Benchmark_text' in row else ""

        # Format prompt
        formatted_prompt = f"<s>[INST] {prompt} [/INST]"

        print(f"\n=== EVALUATING SAMPLE {idx} ===")
        print(f"Prompt: {prompt[:100]}...")

        # Generate response and measure energy
        response_info = measure_generation_energy(
            model,
            tokenizer,
            formatted_prompt,
            max_new_tokens=128
        )

        # Calculate reward
        reward_info = calculate_reward(
            response_info,
            benchmark_text,
            energy_stats,
            sentence_model
        )

        results.append({
            "prompt_id": idx,
            "prompt": prompt,
            "response": response_info["response"],
            "energy_consumption": response_info["energy_consumption"],
            "duration": response_info["duration"],
            "tokens_generated": response_info["output_tokens"],
            "tokens_per_kwh": response_info["tokens_per_kwh"],
            "quality_score": reward_info["quality_score"],
            "energy_score": reward_info["energy_score"],
            "total_reward": reward_info["total_reward"]
        })

    # Compile metrics
    if results:
        metrics = {
            "avg_energy_consumption": np.mean([r["energy_consumption"] for r in results]),
            "avg_quality_score": np.mean([r["quality_score"] for r in results]),
            "avg_total_reward": np.mean([r["total_reward"] for r in results]),
            "avg_tokens_per_kwh": np.mean([r["tokens_per_kwh"] for r in results]),
            "avg_generation_time": np.mean([r["duration"] for r in results])
        }
    else:
        metrics = {}

    # Debug output
    print("\n=== EVALUATION SUMMARY ===")
    for key, value in metrics.items():
        print(f"{key}: {value:.6f}")

    return results, metrics


In [None]:
# === MAIN EXECUTION FUNCTION ===
def run_energy_optimization(
    data_path=DATA_PATH,
    output_dir=OUTPUT_DIR,
    batch_size=BATCH_SIZE,
    num_ppo_epochs=2,  # Reduced from 4 for faster execution
    train_test_split=TRAIN_TEST_SPLIT
):
    """Run the energy optimization pipeline with batch processing"""
    # Setup
    logger = setup_logging()
    os.makedirs(output_dir, exist_ok=True)

    print("\n========== STARTING ENERGY OPTIMIZATION PIPELINE ==========")

    # BLOCK 1: Load and prepare data
    print("\n===== BLOCK 1: DATA PREPARATION =====")
    df = load_data(data_path, logger)
    train_df, test_df, data_stats = prepare_data_splits(df, train_test_split, logger)

    # Initialize sentence transformer
    print("\n===== INITIALIZING SENTENCE TRANSFORMER =====")
    sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
    print(f"Sentence model loaded: {type(sentence_model).__name__}")

    # BLOCK 2: Load models
    print("\n===== BLOCK 2: MODEL INITIALIZATION =====")
    policy_model, reference_model, tokenizer = init_models(MODEL_NAME, logger)

    # BLOCK 3: Prepare dataloaders
    print("\n===== BLOCK 3: DATALOADER PREPARATION =====")
    train_dataloader = prepare_dataloader(
        train_df,
        tokenizer,
        batch_size,
        MAX_PROMPT_LENGTH,
        logger
    )

    # Initialize energy stats for reward normalization
    energy_stats = {
        'min': train_df['Energy_Consumption_kWh'].min(),
        'max': train_df['Energy_Consumption_kWh'].max()
    }
    print(f"Energy stats - Min: {energy_stats['min']:.6f}, Max: {energy_stats['max']:.6f}")

    # BLOCK 4: Test prompt optimization with batch processing
    print("\n===== BLOCK 4: TESTING PROMPT OPTIMIZATION =====")
    optimization_results = []

    for idx in range(len(test_df)):
        sample = test_df.iloc[idx]
        print(f"\nOptimizing sample prompt {idx+1}...")

        result = test_prompt_optimization(
            sample['Prompt'],
            sample['Benchmark_text'] if 'Benchmark_text' in sample else "",
            policy_model,
            tokenizer,
            energy_stats,
            sentence_model,
            logger
        )
        optimization_results.append(result)

        if (idx + 1) % batch_size == 0 or idx == len(test_df) - 1:
            optim_path = os.path.join(output_dir, f"prompt_optimization_batch_{(idx // batch_size) + 1}.json")
            with open(optim_path, "w") as f:
                json.dump([{k: v for k, v in r.items() if not isinstance(v, torch.Tensor)} for r in optimization_results], f, indent=2)
            print(f"Batch {((idx // batch_size) + 1)} optimization results saved to {optim_path}")
            optimization_results = []  # Reset for next batch

    # BLOCK 5: Run simplified PPO training with batch processing
    print("\n===== BLOCK 5: PPO TRAINING =====")
    ppo_metrics = []

    for epoch in range(num_ppo_epochs):
        print(f"\nRunning PPO Epoch {epoch+1}/{num_ppo_epochs}")
        epoch_metrics = run_ppo_epoch(
            policy_model,
            reference_model,
            train_dataloader,
            tokenizer,
            energy_stats,
            sentence_model,
            epoch+1,
            logger
        )
        ppo_metrics.append(epoch_metrics)

        # Save PPO metrics for the epoch
        ppo_metrics_path = os.path.join(output_dir, f"ppo_training_metrics_epoch_{epoch+1}.json")
        with open(ppo_metrics_path, "w") as f:
            json.dump(ppo_metrics, f, indent=2)
        print(f"PPO metrics for epoch {epoch+1} saved to {ppo_metrics_path}")

    # Save model
    model_path = os.path.join(output_dir, "policy_model")
    print(f"\nSaving policy model to {model_path}")
    policy_model.save_pretrained(model_path)
    tokenizer.save_pretrained(model_path)

    # BLOCK 6: Evaluation
    print("\n===== BLOCK 6: MODEL EVALUATION =====")
    eval_results, eval_metrics = evaluate_samples(
        policy_model,
        tokenizer,
        test_df,
        energy_stats,
        sentence_model,
        num_samples=5,  # Evaluate on 5 samples for brevity
        logger=logger
    )

    # Save evaluation results
    eval_results_path = os.path.join(output_dir, "evaluation_results.json")
    with open(eval_results_path, "w") as f:
        json.dump(eval_results, f, indent=2)

    eval_metrics_path = os.path.join(output_dir, "evaluation_metrics.json")
    with open(eval_metrics_path, "w") as f:
        json.dump(eval_metrics, f, indent=2)

    print(f"Evaluation results saved to {eval_results_path}")
    print(f"Evaluation metrics saved to {eval_metrics_path}")

    print("\n========== ENERGY OPTIMIZATION PIPELINE COMPLETE ==========")

    return {
        "eval_metrics": eval_metrics,
        "saved_model_path": model_path,
        "optimization_examples": optim_path,
        "ppo_metrics": ppo_metrics_path
    }

# Execute the pipeline
run_energy_optimization()




===== BLOCK 1: DATA PREPARATION =====

=== DATA PREVIEW ===
Total samples: 5000
Columns: ['Prompt', 'Benchmark_text', 'Mistral_Response', 'Response_Token_length', 'Bechmark_Token_length', 'Response_Total_duration', 'Rouge', 'cosine', 'Energy_Consumption_kWh']

First 3 rows:
                                              Prompt  \
0  Compose a short critique of the following artw...   
1  Given a description of a situation, provide an...   
2  Take the given number and divide it by three, ...   

                                      Benchmark_text  \
0  The artwork "No. 61 (Rust and Brown)" by Mark ...   
1  The chef's behavior towards the waiter was unp...   
2  The result of dividing 42 by 3 is 14. Rounded ...   

                                    Mistral_Response  Response_Token_length  \
0  Title: "Nude Descending a Staircase, No. 2" (1...                    554   
1  The chef's behavior in this situation can be c...                     89   
2  The number 42 divided by 3 is app

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
=== GENERATION RESULTS ===
Input prompt: <s>[INST] "Calculate the median of the following n...
Response preview: To calculate the median of a set of numbers, you f...
Energy consumption: 0.000186 kWh
Generation time: 16.48 seconds
Output tokens: 162
Efficiency: 868821.88 tokens/kWh

=== REWARD CALCULATION ===
Energy score: 0.8076
Quality score: 0.7389
Combined reward: 0.7595

=== REWARD CALCULATION ===
Energy score: 0.8108
Quality score: 0.7623
Combined reward: 0.7769

=== OPTIMIZATION RESULTS ===
Original prompt (62 chars): Find the median of these numbers: 5, 6, 7, 8, 9. 5, 6, 7, 8, 9...
Optimized prompt (81 chars): "Calculate the median of the following numbers: 5, 6, 7, 8, 9, 5, 6, 7, 8, 9"</s>...
Energy savings: 0.000003 kWh (1.66%)
Original reward: 0.7595
Optimized reward: 0.7769
Reward improvement: 0.0174

Optimizing sample prompt 70...

=== GENERATION RESULTS ===
Input prompt: 
    <s>[INST] I'll provide you with 

In [None]:
# === MAIN EXECUTION FUNCTION ===
def run_energy_optimization(
    data_path=DATA_PATH,
    output_dir=OUTPUT_DIR,
    batch_size=BATCH_SIZE,
    num_ppo_epochs=2,  # Reduced from 4 for faster execution
    train_test_split=TRAIN_TEST_SPLIT
):
    """Run the energy optimization pipeline with batch processing"""
    # Setup
    logger = setup_logging()
    os.makedirs(output_dir, exist_ok=True)

    print("\n========== STARTING ENERGY OPTIMIZATION PIPELINE ==========")

    # BLOCK 1: Load and prepare data
    print("\n===== BLOCK 1: DATA PREPARATION =====")
    df = load_data(data_path, logger)
    train_df, test_df, data_stats = prepare_data_splits(df, train_test_split, logger)

    # Initialize sentence transformer
    print("\n===== INITIALIZING SENTENCE TRANSFORMER =====")
    sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
    print(f"Sentence model loaded: {type(sentence_model).__name__}")

    # BLOCK 2: Load models
    print("\n===== BLOCK 2: MODEL INITIALIZATION =====")
    policy_model, reference_model, tokenizer = init_models(MODEL_NAME, logger)

    # BLOCK 3: Prepare dataloaders
    print("\n===== BLOCK 3: DATALOADER PREPARATION =====")
    train_dataloader = prepare_dataloader(
        train_df,
        tokenizer,
        batch_size,
        MAX_PROMPT_LENGTH,
        logger
    )

    # Initialize energy stats for reward normalization
    energy_stats = {
        'min': train_df['Energy_Consumption_kWh'].min(),
        'max': train_df['Energy_Consumption_kWh'].max()
    }
    print(f"Energy stats - Min: {energy_stats['min']:.6f}, Max: {energy_stats['max']:.6f}")

    # BLOCK 4: Test prompt optimization with batch processing
    print("\n===== BLOCK 4: TESTING PROMPT OPTIMIZATION =====")
    optimization_results = []

    for idx in range(len(test_df)):
        sample = test_df.iloc[idx]
        print(f"\nOptimizing sample prompt {idx+1}...")

        result = test_prompt_optimization(
            sample['Prompt'],
            sample['Benchmark_text'] if 'Benchmark_text' in sample else "",
            policy_model,
            tokenizer,
            energy_stats,
            sentence_model,
            logger
        )
        optimization_results.append(result)

        if (idx + 1) % batch_size == 0 or idx == len(test_df) - 1:
            optim_path = os.path.join(output_dir, f"prompt_optimization_batch_{(idx // batch_size) + 1}.json")
            with open(optim_path, "w") as f:
                json.dump([{k: v for k, v in r.items() if not isinstance(v, torch.Tensor)} for r in optimization_results], f, indent=2)
            print(f"Batch {((idx // batch_size) + 1)} optimization results saved to {optim_path}")
            optimization_results = []  # Reset for next batch

    # BLOCK 5: Run simplified PPO training with batch processing
    print("\n===== BLOCK 5: PPO TRAINING =====")
    ppo_metrics = []

    for epoch in range(num_ppo_epochs):
        print(f"\nRunning PPO Epoch {epoch+1}/{num_ppo_epochs}")
        epoch_metrics = run_ppo_epoch(
            policy_model,
            reference_model,
            train_dataloader,
            tokenizer,
            energy_stats,
            sentence_model,
            epoch+1,
            logger
        )
        ppo_metrics.append(epoch_metrics)

        # Save PPO metrics for the epoch
        ppo_metrics_path = os.path.join(output_dir, f"ppo_training_metrics_epoch_{epoch+1}.json")
        with open(ppo_metrics_path, "w") as f:
            json.dump(ppo_metrics, f, indent=2)
        print(f"PPO metrics for epoch {epoch+1} saved to {ppo_metrics_path}")

    # Save model
    model_path = os.path.join(output_dir, "policy_model")
    print(f"\nSaving policy model to {model_path}")
    policy_model.save_pretrained(model_path)
    tokenizer.save_pretrained(model_path)

    # BLOCK 6: Evaluation
    print("\n===== BLOCK 6: MODEL EVALUATION =====")
    eval_results, eval_metrics = evaluate_samples(
        policy_model,
        tokenizer,
        test_df,
        energy_stats,
        sentence_model,
        num_samples=5,  # Evaluate on 5 samples for brevity
        logger=logger
    )

    # Save evaluation results
    eval_results_path = os.path.join(output_dir, "evaluation_results.json")
    with open(eval_results_path, "w") as f:
        json.dump(eval_results, f, indent=2)

    eval_metrics_path = os.path.join(output_dir, "evaluation_metrics.json")
    with open(eval_metrics_path, "w") as f:
        json.dump(eval_metrics, f, indent=2)

    print(f"Evaluation results saved to {eval_results_path}")
    print(f"Evaluation metrics saved to {eval_metrics_path}")

    print("\n========== ENERGY OPTIMIZATION PIPELINE COMPLETE ==========")

    return {
        "eval_metrics": eval_metrics,
        "saved_model_path": model_path,
        "optimization_examples": optim_path,
        "ppo_metrics": ppo_metrics_path
    }

# Execute the pipeline
run_energy_optimization()




===== BLOCK 1: DATA PREPARATION =====

=== DATA PREVIEW ===
Total samples: 5000
Columns: ['Prompt', 'Benchmark_text', 'Mistral_Response', 'Response_Token_length', 'Bechmark_Token_length', 'Response_Total_duration', 'Rouge', 'cosine', 'Energy_Consumption_kWh']

First 3 rows:
                                              Prompt  \
0  Compose a short critique of the following artw...   
1  Given a description of a situation, provide an...   
2  Take the given number and divide it by three, ...   

                                      Benchmark_text  \
0  The artwork "No. 61 (Rust and Brown)" by Mark ...   
1  The chef's behavior towards the waiter was unp...   
2  The result of dividing 42 by 3 is 14. Rounded ...   

                                    Mistral_Response  Response_Token_length  \
0  Title: "Nude Descending a Staircase, No. 2" (1...                    554   
1  The chef's behavior in this situation can be c...                     89   
2  The number 42 divided by 3 is app

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
=== GENERATION RESULTS ===
Input prompt: <s>[INST] "Calculate the median of the following n...
Response preview: To calculate the median of a set of numbers, you f...
Energy consumption: 0.000186 kWh
Generation time: 16.48 seconds
Output tokens: 162
Efficiency: 868821.88 tokens/kWh

=== REWARD CALCULATION ===
Energy score: 0.8076
Quality score: 0.7389
Combined reward: 0.7595

=== REWARD CALCULATION ===
Energy score: 0.8108
Quality score: 0.7623
Combined reward: 0.7769

=== OPTIMIZATION RESULTS ===
Original prompt (62 chars): Find the median of these numbers: 5, 6, 7, 8, 9. 5, 6, 7, 8, 9...
Optimized prompt (81 chars): "Calculate the median of the following numbers: 5, 6, 7, 8, 9, 5, 6, 7, 8, 9"</s>...
Energy savings: 0.000003 kWh (1.66%)
Original reward: 0.7595
Optimized reward: 0.7769
Reward improvement: 0.0174

Optimizing sample prompt 70...

=== GENERATION RESULTS ===
Input prompt: 
    <s>[INST] I'll provide you with 