## Setup and Model Loading

In [1]:
# Import required libraries
from transformers import pipeline
import torch
import pandas as pd
from tqdm.auto import tqdm
import json
import re
import os
import gc
import ast
import signal
import sys
from io import StringIO
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional
from datasets import Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Memory management and system check
def clear_memory():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

clear_memory()

# System resources
if torch.cuda.is_available():
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
    print(f"GPU: {torch.cuda.get_device_name()}")
    print(f"GPU Memory: {gpu_memory:.1f} GB")
else:
    print("No GPU detected - using CPU (will be very slow)")

GPU: NVIDIA GeForce RTX 3090 Ti
GPU Memory: 22.0 GB


## Data Loading

In [3]:
dev_data_path = '/home/nafi/dev/shared-task/task2/prompt/qwen-25-coder-14b-1/dev-translated.csv'
dev_df = pd.read_csv(dev_data_path)

In [4]:
pipe = pipeline(
        "text-generation", 
        model="unsloth/Qwen2.5-Coder-14B-Instruct-bnb-4bit",
        trust_remote_code=True,
        device_map="auto" if torch.cuda.is_available() else None,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.33s/it]
Device set to use cuda:0


In [5]:
def format_prompt(example):
        """Format a single example into the required prompt format"""
        instruction = example['instruction']
        test_list = example['test_list']
        
        # Parse function name from instruction
        function_name = "unknown_function"
        if "Example:" in instruction:
            example_part = instruction.split("Example:")[1].strip()
            func_match = re.search(r'(\w+)\s*\(', example_part)
            if func_match:
                function_name = func_match.group(1)
        
       
        system_message = """You are an expert Python programmer. Your task is to generate clean, efficient, and correct Python functions that pass all given test cases.

CRITICAL RULES:
1. ALWAYS wrap your code in ```python ``` blocks
2. Write ONLY the function implementation, no extra explanations
3. Use the EXACT function name from the example
4. Ensure the function passes ALL test cases
5. Handle edge cases and invalid inputs appropriately
6. Use appropriate data types based on test case patterns

Here are examples of how to solve different types of problems:

EXAMPLE 1 - String Processing:
Task: Write a Python function to find the first repeated character in a given string.
Test Cases:
assert first_repeated_char("abcabc") == "a"
assert first_repeated_char("abc") == "None"  
assert first_repeated_char("123123") == "1"

Expected Solution:
```python
def first_repeated_char(s):
    seen = set()
    for char in s:
        if char in seen:
            return char
        seen.add(char)
    return "None"
```

EXAMPLE 2 - Mathematical Function:
Task: Write a function to check if a given integer is a prime number.
Test Cases:
assert prime_num(13) == True
assert prime_num(7) == True
assert prime_num(-1010) == False

Expected Solution:
```python
def prime_num(n):
    if n < 2:
        return False
    if n == 2:
        return True
    if n % 2 == 0:
        return False
    for i in range(3, int(n**0.5) + 1, 2):
        if n % i == 0:
            return False
    return True
```


Code Quality Standards:
- Write code with proper indentation
- Optimize for correctness first, then efficiency
- Handle common edge cases (empty inputs, None values, negative numbers, etc.)
- Return the exact data type shown in test cases"""
        
        user_prompt = f"""Generate a Python function for this problem:

**Task**: {instruction}

**Test Cases**:
{test_list}

**Expected Function Name**: {function_name}

Requirements:
- Follow the examples shown in the system message
- Analyze the test cases carefully to understand input/output patterns
- Implement the function to pass ALL test cases exactly
- Return the appropriate data type as shown in test cases
- Handle edge cases gracefully (empty inputs, invalid values, etc.)
- Use efficient algorithms where applicable

Generate ONLY the Python function wrapped in ```python ``` blocks. No explanations needed."""
        
        # Format for chat template
        messages = [
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_prompt}
        ]
        
        # Apply chat template
        formatted_prompt = pipe.tokenizer.apply_chat_template(
            messages, 
            tokenize=False, 
            add_generation_prompt=True
        )
        
        return formatted_prompt

In [6]:
instructions_data = []
formatted_prompts = []
ids_list = []
for _, row in tqdm(dev_df.iterrows(), desc="Preparing data", unit="row", total=len(dev_df)):
    instructions_data.append({
        'instruction': row['instruction'],
        'test_list': row['test_list'],
        'id': row['id']
    })

for item in tqdm(instructions_data, desc="Formatting prompts", unit="prompt"):
    formatted_prompt = format_prompt(item)
    formatted_prompts.append(formatted_prompt)
    ids_list.append(item['id'])

print(f"Formatted {len(formatted_prompts)} prompts")
print("Creating dataset from formatted prompts...")

dataset_dict = {
    'prompt': formatted_prompts,
    'id': ids_list
}

dataset = Dataset.from_dict(dataset_dict)
print(f"Dataset created with {len(dataset)} samples")

Preparing data: 100%|██████████| 400/400 [00:00<00:00, 68484.02row/s]
Formatting prompts: 100%|██████████| 400/400 [00:00<00:00, 28509.89prompt/s]

Formatted 400 prompts
Creating dataset from formatted prompts...
Dataset created with 400 samples





## Simple Code Generation

In [7]:
def generate_code(prompt):
    """
    Generate code from the given prompt using the language model pipeline.
    Returns: (generated_code)
    """
    
    result = pipe(
                prompt,
                max_new_tokens=768,
                temperature=0.1,
                top_p=0.95,
                do_sample=True,
                return_full_text=False,
                pad_token_id=pipe.tokenizer.eos_token_id
            )
            
    generated_code = result[0]['generated_text'].strip()
    
    return generated_code

In [8]:
responses = []

for idx in tqdm(range(len(dataset)), desc="Code generation"):
    try:
        # Get sample data
        prompt = dataset[idx]['prompt']
        sample_id = dataset[idx]['id']
        
        # Get corresponding test cases and instruction from original data
        original_row = dev_df[dev_df['id'] == sample_id].iloc[0]
        test_cases_str = original_row['test_list']
        instruction = original_row['instruction']
        
        # Generate code
        generated_code = generate_code(prompt)
        
        responses.append(generated_code)   
        
        # Memory management - clear every 20 samples
        if (idx + 1) % 20 == 0:
            clear_memory()
            print(f"\nMemory cleared after {idx + 1} samples")
            
        # Progress update every 50 samples
        if (idx + 1) % 50 == 0:
            print(f"\nProgress Update after {idx + 1} samples:") 
            
    except Exception as e:
        print(f"Complete failure for ID {sample_id}: {e}")
        responses.append("def placeholder(): pass")
        continue


print("CODE GENERATION COMPLETED!")


Code generation:   2%|▎         | 10/400 [00:30<15:55,  2.45s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Code generation:   5%|▌         | 20/400 [00:55<17:29,  2.76s/it]


Memory cleared after 20 samples


Code generation:  10%|█         | 40/400 [01:47<17:45,  2.96s/it]


Memory cleared after 40 samples


Code generation:  12%|█▎        | 50/400 [02:10<14:03,  2.41s/it]


Progress Update after 50 samples:


Code generation:  15%|█▌        | 60/400 [02:34<14:30,  2.56s/it]


Memory cleared after 60 samples


Code generation:  20%|██        | 80/400 [03:17<09:10,  1.72s/it]


Memory cleared after 80 samples


Code generation:  25%|██▌       | 100/400 [04:38<13:27,  2.69s/it]


Memory cleared after 100 samples

Progress Update after 100 samples:


Code generation:  30%|███       | 120/400 [05:25<09:02,  1.94s/it]


Memory cleared after 120 samples


Code generation:  35%|███▌      | 140/400 [06:16<11:20,  2.62s/it]


Memory cleared after 140 samples


Code generation:  38%|███▊      | 150/400 [06:46<11:11,  2.69s/it]


Progress Update after 150 samples:


Code generation:  40%|████      | 160/400 [07:12<08:53,  2.22s/it]


Memory cleared after 160 samples


Code generation:  45%|████▌     | 180/400 [08:10<12:47,  3.49s/it]


Memory cleared after 180 samples


Code generation:  50%|█████     | 200/400 [09:03<05:45,  1.73s/it]


Memory cleared after 200 samples

Progress Update after 200 samples:


Code generation:  55%|█████▌    | 220/400 [09:46<07:07,  2.37s/it]


Memory cleared after 220 samples


Code generation:  60%|██████    | 240/400 [10:53<10:44,  4.03s/it]


Memory cleared after 240 samples


Code generation:  62%|██████▎   | 250/400 [11:33<07:00,  2.80s/it]


Progress Update after 250 samples:


Code generation:  65%|██████▌   | 260/400 [12:00<05:43,  2.45s/it]


Memory cleared after 260 samples


Code generation:  70%|███████   | 280/400 [12:46<04:34,  2.29s/it]


Memory cleared after 280 samples


Code generation:  75%|███████▌  | 300/400 [13:31<03:29,  2.09s/it]


Memory cleared after 300 samples

Progress Update after 300 samples:


Code generation:  80%|████████  | 320/400 [14:30<03:57,  2.97s/it]


Memory cleared after 320 samples


Code generation:  85%|████████▌ | 340/400 [15:37<02:22,  2.38s/it]


Memory cleared after 340 samples


Code generation:  88%|████████▊ | 350/400 [16:09<03:18,  3.96s/it]


Progress Update after 350 samples:


Code generation:  90%|█████████ | 360/400 [16:33<01:45,  2.63s/it]


Memory cleared after 360 samples


Code generation:  95%|█████████▌| 380/400 [17:16<00:53,  2.66s/it]


Memory cleared after 380 samples


Code generation: 100%|██████████| 400/400 [18:15<00:00,  2.74s/it]


Memory cleared after 400 samples

Progress Update after 400 samples:
CODE GENERATION COMPLETED!





In [9]:
submission_data = []
for i, (_, row) in enumerate(dev_df.iterrows()):
    submission_data.append({
        "id": int(row['id']),
        "response": responses[i]
    })

submission_file = "submission.json"
with open(submission_file, 'w', encoding='utf-8') as f:
    json.dump(submission_data, f, ensure_ascii=False, indent=2)


print("SUBMISSION SAVED!")
print(f"File: {submission_file}")
print(f"Total samples: {len(submission_data)}")

SUBMISSION SAVED!
File: submission.json
Total samples: 400
