In [None]:
!pip install -q -U transformers accelerate bitsandbytes codecarbon tqdm

In [None]:
# ==============================================================
#       AUTOMATED TEST GENERATION USING AP_V3 PROMPT
# ==============================================================

import os
import ast
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from huggingface_hub import login
from codecarbon import EmissionsTracker

# --- Configuration Section ---
os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Prevent tokenizer deadlocks

# Input and output directories
CODE_DIR = "HumanEval_Integrated_Dataset"
OUTPUT_DIR = "APV3_Llama-3-8B-Instruct"  #Output directory to save the generated test scripts
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct" #change the model name here
EMISSIONS_FILE_PATH = "APV3_Llama-3-8B-Instruct.csv" #To save the .csv file generated by CodeCarbon

# Ensure the output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Authenticate with Hugging Face
login(token="hf_XXXXXXXXXXXXXXXXXXXXXX") #Use Hugging Face Token here

# --- Model Loading Section ---
# Load model in 4-bit quantized mode for efficiency on T4 GPU
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,                     # ✅ Changed from 8-bit to 4-bit
    bnb_4bit_use_double_quant=True,        # ✅ Recommended for stability
    bnb_4bit_quant_type="nf4",             # ✅ Standard 4-bit quantization type
    bnb_4bit_compute_dtype=torch.float16   # ✅ Safe dtype for inference
)

print(f"Loading 4-bit quantized model '{MODEL_ID}'...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    quantization_config=bnb_config,
    torch_dtype=torch.float16              # ✅ Ensures actual tensor loading
)
print("✅ Model loaded successfully!")

# --- Collect all code files ---
file_indices = range(164)
code_files = [os.path.join(CODE_DIR, f"HumanEval_{i}_code.py") for i in file_indices]
print(f"Found {len(code_files)} code files to process.")

# --- Utility Function: Extract Main Function Name using AST ---
def extract_function_name(code_text):
    """
    Extracts the primary (last top-level) function name from a Python file.
    - Handles 1–3 top-level functions (common in HumanEval dataset)
    - Ignores nested or inner functions
    """
    try:
        tree = ast.parse(code_text)
        function_names = [node.name for node in tree.body if isinstance(node, ast.FunctionDef)]
        return function_names[-1] if function_names else "unknown_function"
    except Exception:
        return "unknown_function"

# --- Batch Processing Setup ---
BATCH_SIZE = 5
num_batches = (len(code_files) + BATCH_SIZE - 1) // BATCH_SIZE

# --- Main Generation Loop ---
for i in tqdm(range(num_batches), desc="Processing batches"):
    # Initialize CodeCarbon emissions tracker
    tracker = EmissionsTracker(
        project_name=f"{MODEL_ID.replace('/', '_')}_batch_{i}_Test_Generation",
        output_dir=os.path.dirname(EMISSIONS_FILE_PATH),
        output_file=os.path.basename(EMISSIONS_FILE_PATH)
    )
    tracker.start()

    start_index = i * BATCH_SIZE
    end_index = min(start_index + BATCH_SIZE, len(code_files))
    batch_files = code_files[start_index:end_index]

    # Process each file in current batch
    for file_path in batch_files:
        if not os.path.exists(file_path):
            print(f"⚠️ Skipping missing file: {file_path}")
            continue

        try:
            # --- Step 1: Read the source code ---
            with open(file_path, "r", encoding="utf-8") as f:
                code_content = f.read()

            # --- Step 2: Extract module and function names ---
            module_name = os.path.basename(file_path).replace(".py", "")
            function_name = extract_function_name(code_content)

            # --- Step 3: Prompt for Test Generation (AP_V3 - Role-based Entropy Cloud) --- #Change the prompt version here APV0, APV1 and APV2
            # Feature  ➜ Adds tone, detailed structure, and illustrative example
            messages = [
                {
                    "role": "system",
                    "content": f"""### Task Context
You are an expert Python programmer. Your only task is to write complete unittest test suites.

### Tone Context
Maintain a professional, precise, and methodical tone.

### Detailed Task Description & Rules
1. Analyze the provided Python function.
2. Generate a self-contained unittest test suite.
3. The output must:
   - Begin with import unittest
   - Include from {module_name} import {function_name}
   - Define a single unittest.TestCase class
   - Include multiple test_ methods for normal, edge, and invalid inputs
   - End with if __name__ == '__main__': unittest.main()
4. Use only unittest assertions.
5. Do not include markdown, prose, or explanations.
6. Output must be runnable Python code.

### Example
#### Function:
def sum_of_elements(numbers: list) -> int:
    \"\"\"Return the sum of all integers in a list.\"\"\"
    return sum(numbers)

#### Test Script:
import unittest

class TestSumOfElements(unittest.TestCase):
    def test_positive_numbers(self):
        self.assertEqual(sum_of_elements([1, 2, 3, 4]), 10)

    def test_negative_numbers(self):
        self.assertEqual(sum_of_elements([-1, -2, -3]), -6)

    def test_empty_list(self):
        self.assertEqual(sum_of_elements([]), 0)

if __name__ == '__main__':
    unittest.main()
"""
                },
                {
                    "role": "user",
                    "content": f"""### Immediate Task
Write the complete unittest test suite for the following Python function.

### Output Formatting
1. Start with: import unittest
2. Include: from {module_name} import {function_name}
3. End with:
if __name__ == '__main__':
    unittest.main()

Function:
{code_content}
"""
                }
            ]

            # --- Step 4: Tokenize and Generate Test Script ---
            model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
            generated_ids = model.generate(model_inputs, max_new_tokens=1024, do_sample=True, temperature=0.2)
            generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

            # --- Step 5: Clean and Save Output ---
            generated_test = generated_text.strip().replace("```python", "").replace("```", "").strip()
            task_id = os.path.basename(file_path).replace("_code.py", "")
            output_filename = f"test_{task_id}_test.py"
            output_path = os.path.join(OUTPUT_DIR, output_filename)

            with open(output_path, "w", encoding="utf-8") as test_file:
                test_file.write(generated_test)

        except Exception as e:
            print(f"❌ Error processing {file_path}: {e}")
            continue

    emissions = tracker.stop()

print("\n✅ Test generation complete!")
print(f"Emissions (kg CO2eq): {emissions}")


Loading 4-bit quantized model 'meta-llama/Meta-Llama-3-8B-Instruct'...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

✅ Model loaded successfully!
Found 164 code files to process.


[codecarbon INFO @ 21:12:33] [setup] RAM Tracking...
[codecarbon INFO @ 21:12:33] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 21:12:34] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 21:12:34] [setup] GPU Tracking...
[codecarbon INFO @ 21:12:34] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 21:12:34] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 21:12:34] >>> Tracker's metadata:
[codecarbon INFO @ 21:12:34]   Platform system: Linux-6.6.105+-x86_64-with-glibc2.35
[codecarbon INFO @ 21:12:34]   Python version: 3.12.12
[codecarbon INFO @ 21:12:34]   CodeCarbon version: 3.0.7
[codecarbon INFO @ 21:12:34]   Available RAM : 12.671 GB
[codecarbon INF


✅ Test generation complete!
Emissions (kg CO2eq): 0.00040129675501716466



