## Setup and Model Loading

In [1]:
from transformers import pipeline
import torch
import pandas as pd
from tqdm.auto import tqdm
import json
import re
import os
import gc
import ast
import signal
import sys
from io import StringIO
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional
from datasets import Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def clear_memory():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

clear_memory()

if torch.cuda.is_available():
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
    print(f"GPU: {torch.cuda.get_device_name()}")
    print(f"GPU Memory: {gpu_memory:.1f} GB")
else:
    print("No GPU detected - using CPU (will be very slow)")

GPU: NVIDIA GeForce RTX 3090 Ti
GPU Memory: 22.0 GB


## Data Loading

In [3]:
dev_data_path = 'PATH_TO_ORIGINAL_DEV_DATASET'
dev_df = pd.read_csv(dev_data_path)

In [4]:
pipe = pipeline(
        "text-generation", 
        model="unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
        trust_remote_code=False,  # Llama doesn't need trust_remote_code
        device_map="auto" if torch.cuda.is_available() else None,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)

Device set to use cuda:0


In [5]:
def format_prompt(example):
        """Format a single example into the required prompt format"""
        instruction = example['instruction']
        test_list = example['test_list']
        
        # Parse function name from instruction
        function_name = "unknown_function"
        if "Example:" in instruction:
            example_part = instruction.split("Example:")[1].strip()
            func_match = re.search(r'(\w+)\s*\(', example_part)
            if func_match:
                function_name = func_match.group(1)
        
       
        system_message = """You are an expert Python programmer. Your task is to generate clean, efficient, and correct Python functions that pass all given test cases.

CRITICAL RULES:
1. ALWAYS wrap your code in ```python ``` blocks
2. Write ONLY the function implementation, no extra explanations
3. Use the EXACT function name from the example
4. Ensure the function passes ALL test cases
5. Handle edge cases and invalid inputs appropriately
6. Use appropriate data types based on test case patterns

Here are examples of how to solve different types of problems:

EXAMPLE 1 - String Processing:
Task: ‡¶è‡¶ï‡¶ü‡¶ø ‡¶™‡ßç‡¶∞‡¶¶‡¶§‡ßç‡¶§ ‡¶∏‡ßç‡¶ü‡ßç‡¶∞‡¶ø‡¶Ç-‡¶è ‡¶™‡ßç‡¶∞‡¶•‡¶Æ ‡¶™‡ßÅ‡¶®‡¶∞‡¶æ‡¶¨‡ßÉ‡¶§‡ßç‡¶§ ‡¶Ö‡¶ï‡ßç‡¶∑‡¶∞ ‡¶ñ‡ßÅ‡¶Å‡¶ú‡ßá ‡¶™‡ßá‡¶§‡ßá ‡¶è‡¶ï‡¶ü‡¶ø ‡¶™‡¶æ‡¶á‡¶•‡¶® ‡¶´‡¶æ‡¶Ç‡¶∂‡¶® ‡¶≤‡¶ø‡¶ñ‡ßÅ‡¶®‡•§
Test Cases:
assert first_repeated_char("abcabc") == "a"
assert first_repeated_char("abc") == "None"  
assert first_repeated_char("123123") == "1"

Expected Solution:
```python
def first_repeated_char(s):
    seen = set()
    for char in s:
        if char in seen:
            return char
        seen.add(char)
    return "None"
```

EXAMPLE 2 - Mathematical Function:
Task: ‡¶™‡ßç‡¶∞‡¶¶‡¶§‡ßç‡¶§ ‡¶™‡ßÇ‡¶∞‡ßç‡¶£‡¶∏‡¶Ç‡¶ñ‡ßç‡¶Ø‡¶æ‡¶ü‡¶ø ‡¶è‡¶ï‡¶ü‡¶ø ‡¶Æ‡ßå‡¶≤‡¶ø‡¶ï ‡¶∏‡¶Ç‡¶ñ‡ßç‡¶Ø‡¶æ ‡¶ï‡¶ø‡¶®‡¶æ ‡¶§‡¶æ ‡¶™‡¶∞‡ßÄ‡¶ï‡ßç‡¶∑‡¶æ ‡¶ï‡¶∞‡¶æ‡¶∞ ‡¶ú‡¶®‡ßç‡¶Ø ‡¶è‡¶ï‡¶ü‡¶ø ‡¶´‡¶æ‡¶Ç‡¶∂‡¶® ‡¶≤‡¶ø‡¶ñ‡ßÅ‡¶®‡•§
Test Cases:
assert prime_num(13) == True
assert prime_num(7) == True
assert prime_num(-1010) == False

Expected Solution:
```python
def prime_num(n):
    if n < 2:
        return False
    if n == 2:
        return True
    if n % 2 == 0:
        return False
    for i in range(3, int(n**0.5) + 1, 2):
        if n % i == 0:
            return False
    return True
```


Code Quality Standards:
- Write code with proper indentation
- Optimize for correctness first, then efficiency
- Handle common edge cases (empty inputs, None values, negative numbers, etc.)
- Return the exact data type shown in test cases"""
        
        user_prompt = f"""Generate a Python function for this problem:

**Task**: {instruction}

**Test Cases**:
{test_list}

**Expected Function Name**: {function_name}

Requirements:
- Follow the examples shown in the system message
- Analyze the test cases carefully to understand input/output patterns
- Implement the function to pass ALL test cases exactly
- Return the appropriate data type as shown in test cases
- Handle edge cases gracefully (empty inputs, invalid values, etc.)
- Use efficient algorithms where applicable

Generate ONLY the Python function wrapped in ```python ``` blocks. No explanations needed."""
        
        # Format for Llama 3.1 using chat template
        messages = [
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_prompt}
        ]
        
        # Apply chat template
        formatted_prompt = pipe.tokenizer.apply_chat_template(
            messages, 
            tokenize=False, 
            add_generation_prompt=True
        )
        
        return formatted_prompt

In [6]:
instructions_data = []
formatted_prompts = []
ids_list = []
for _, row in tqdm(dev_df.iterrows(), desc="Preparing data", unit="row", total=len(dev_df)):
    instructions_data.append({
        'instruction': row['instruction'],
        'test_list': row['test_list'],
        'id': row['id']
    })

for item in tqdm(instructions_data, desc="Formatting prompts", unit="prompt"):
    formatted_prompt = format_prompt(item)
    formatted_prompts.append(formatted_prompt)
    ids_list.append(item['id'])

print(f"Formatted {len(formatted_prompts)} prompts")
print("Creating dataset from formatted prompts...")

dataset_dict = {
    'prompt': formatted_prompts,
    'id': ids_list
}

dataset = Dataset.from_dict(dataset_dict)
print(f"Dataset created with {len(dataset)} samples")

Preparing data:   0%|          | 0/400 [00:00<?, ?row/s]

Preparing data: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 400/400 [00:00<00:00, 76114.76row/s]
Formatting prompts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 400/400 [00:00<00:00, 27068.76prompt/s]

Formatted 400 prompts
Creating dataset from formatted prompts...
Dataset created with 400 samples





In [7]:
def get_function_name_from_test(test_cases_str):
    """Extract function name from test cases for better error reporting"""
    try:
        inner_str = ast.literal_eval(test_cases_str)
        test_cases = ast.literal_eval(inner_str)
        if test_cases:
            # Find function name in first test case
            func_match = re.search(r'assert\s+(\w+)\s*\(', test_cases[0])
            if func_match:
                return func_match.group(1)
    except:
        pass
    return "function"

## Simple Code Generation

In [8]:
def generate_code(prompt):
    """
    Generate code from the given prompt using the language model pipeline.
    Returns: (generated_code)
    """
    
    result = pipe(
                prompt,
                max_new_tokens=768,  # Llama 3.1 works well with moderate length
                temperature=0.1,     # Conservative temperature for code generation
                top_p=0.95,         # Standard top_p for Llama
                do_sample=True,
                return_full_text=False,
                pad_token_id=pipe.tokenizer.eos_token_id if hasattr(pipe.tokenizer, 'eos_token_id') else None
            )
            
    generated_code = result[0]['generated_text'].strip()
    
    return generated_code

In [9]:
responses = []

for idx in tqdm(range(len(dataset)), desc="ü¶ô Llama code generation"):
    try:
        # Get sample data
        prompt = dataset[idx]['prompt']
        sample_id = dataset[idx]['id']
        
        # Get corresponding test cases and instruction from original data
        original_row = dev_df[dev_df['id'] == sample_id].iloc[0]
        test_cases_str = original_row['test_list']
        instruction = original_row['instruction']
        
        # Generate code
        generated_code = generate_code(prompt)
        
        responses.append(generated_code)   
        
        # Memory management - clear every 20 samples
        if (idx + 1) % 20 == 0:
            clear_memory()
            print(f"\nüß† Memory cleared after {idx + 1} samples")
            
        # Progress update every 50 samples
        if (idx + 1) % 50 == 0:
            print(f"\nüìä Progress Update after {idx + 1} samples:") 
            
    except Exception as e:
        print(f"Complete failure for ID {sample_id}: {e}")
        responses.append("def placeholder(): pass")
        continue


print("LLAMA CODE GENERATION COMPLETED!")

ü¶ô Llama code generation:   2%|‚ñé         | 10/400 [00:19<10:58,  1.69s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
ü¶ô Llama code generation:   5%|‚ñå         | 20/400 [00:35<11:02,  1.74s/it]


üß† Memory cleared after 20 samples


ü¶ô Llama code generation:  10%|‚ñà         | 40/400 [01:08<11:41,  1.95s/it]


üß† Memory cleared after 40 samples


ü¶ô Llama code generation:  12%|‚ñà‚ñé        | 50/400 [01:22<08:27,  1.45s/it]


üìä Progress Update after 50 samples:


ü¶ô Llama code generation:  15%|‚ñà‚ñå        | 60/400 [01:40<12:17,  2.17s/it]


üß† Memory cleared after 60 samples


ü¶ô Llama code generation:  20%|‚ñà‚ñà        | 80/400 [02:10<07:21,  1.38s/it]


üß† Memory cleared after 80 samples


ü¶ô Llama code generation:  25%|‚ñà‚ñà‚ñå       | 100/400 [02:45<07:30,  1.50s/it]


üß† Memory cleared after 100 samples

üìä Progress Update after 100 samples:


ü¶ô Llama code generation:  30%|‚ñà‚ñà‚ñà       | 120/400 [03:12<05:56,  1.27s/it]


üß† Memory cleared after 120 samples


ü¶ô Llama code generation:  35%|‚ñà‚ñà‚ñà‚ñå      | 140/400 [03:51<08:49,  2.04s/it]


üß† Memory cleared after 140 samples


ü¶ô Llama code generation:  38%|‚ñà‚ñà‚ñà‚ñä      | 150/400 [04:08<06:42,  1.61s/it]


üìä Progress Update after 150 samples:


ü¶ô Llama code generation:  40%|‚ñà‚ñà‚ñà‚ñà      | 160/400 [04:33<12:47,  3.20s/it]


üß† Memory cleared after 160 samples


ü¶ô Llama code generation:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 180/400 [05:07<06:25,  1.75s/it]


üß† Memory cleared after 180 samples


ü¶ô Llama code generation:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 200/400 [05:40<04:21,  1.31s/it]


üß† Memory cleared after 200 samples

üìä Progress Update after 200 samples:


ü¶ô Llama code generation:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 220/400 [06:09<05:04,  1.69s/it]


üß† Memory cleared after 220 samples


ü¶ô Llama code generation:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 240/400 [06:52<07:38,  2.86s/it]


üß† Memory cleared after 240 samples


ü¶ô Llama code generation:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 250/400 [07:14<04:25,  1.77s/it]


üìä Progress Update after 250 samples:


ü¶ô Llama code generation:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 260/400 [07:28<03:02,  1.30s/it]


üß† Memory cleared after 260 samples


ü¶ô Llama code generation:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 280/400 [08:01<03:41,  1.85s/it]


üß† Memory cleared after 280 samples


ü¶ô Llama code generation:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 300/400 [08:28<02:13,  1.33s/it]


üß† Memory cleared after 300 samples

üìä Progress Update after 300 samples:


ü¶ô Llama code generation:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 320/400 [09:05<02:17,  1.72s/it]


üß† Memory cleared after 320 samples


ü¶ô Llama code generation:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 340/400 [09:49<02:01,  2.03s/it]


üß† Memory cleared after 340 samples


ü¶ô Llama code generation:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 350/400 [10:03<01:18,  1.57s/it]


üìä Progress Update after 350 samples:


ü¶ô Llama code generation:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 360/400 [10:20<01:11,  1.80s/it]


üß† Memory cleared after 360 samples


ü¶ô Llama code generation:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 380/400 [10:49<00:36,  1.81s/it]


üß† Memory cleared after 380 samples


ü¶ô Llama code generation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 400/400 [11:21<00:00,  1.70s/it]


üß† Memory cleared after 400 samples

üìä Progress Update after 400 samples:
LLAMA CODE GENERATION COMPLETED!





In [10]:
submission_data = []
for i, (_, row) in enumerate(dev_df.iterrows()):
    submission_data.append({
        "id": int(row['id']),
        "response": responses[i]
    })

submission_file = "submission.json"
with open(submission_file, 'w', encoding='utf-8') as f:
    json.dump(submission_data, f, ensure_ascii=False, indent=2)


print("SUBMISSION SAVED!")
print(f"File: {submission_file}")
print(f"Total samples: {len(submission_data)}")

SUBMISSION SAVED!
File: submission.json
Total samples: 400
