In [None]:
# vLLM itself
!pip install vllm

# Hugging Face transformers for model handling
!pip install transformers

# BitsAndBytes (for quantization support like 4-bit/8-bit)
!pip install bitsandbytes

# Accelerate (required for model device mapping)
!pip install accelerate
# vLLM itself
!pip install vllm

# Hugging Face transformers for model handling
!pip install transformers

# BitsAndBytes (for quantization support like 4-bit/8-bit)
!pip install bitsandbytes

# Accelerate (required for model device mapping)
!pip install accelerate

# For caching models and logging
!pip install huggingface_hub

In [None]:
import torch

# Check GPU status
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"Total Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print(f"Available Memory: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1e9:.2f} GB")

# Clear cache
torch.cuda.empty_cache()

In [None]:
import os
os.environ['VLLM_USE_V1'] = '0'  # Disable V1 engine

# Clear any existing GPU memory
import torch
import gc

# Clear CUDA cache
torch.cuda.empty_cache()
gc.collect()

# Try to free all CUDA memory
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        with torch.cuda.device(i):
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()
            
import pandas as pd
from vllm import LLM, SamplingParams
from tqdm import tqdm
import gc
import torch

# Load the CSV
df = pd.read_csv('/kaggle/input/banglachq/train.csv')
print(f"Loaded {len(df)} rows from train.csv")
print(f"Columns: {df.columns.tolist()}")

# Load model with vLLM on single GPU with optimized settings
model_name = "Qwen/Qwen2.5-3B-Instruct"
print(f"\nLoading {model_name} with vLLM...")

llm = LLM(
    model=model_name,
    trust_remote_code=True,
    dtype="float16",
    gpu_memory_utilization=0.65,  # Reduced slightly for larger context
    max_model_len=2048,  # Increased to 2048
    max_num_seqs=24,  # Reduced batch size slightly
)

# Get tokenizer for chat template
tokenizer = llm.get_tokenizer()

print("Model loaded successfully!\n")

# Translation function
def translate_to_chittagonian(bangla_text):
    prompt = f"""You are a precise translation tool. Your only task is to translate the given Bangla text to Chittagonian (Chatgaiyan) dialect using Bengali script.

INSTRUCTIONS:
- Translate the Bangla text below to Chittagonian (Chatgaiyan) dialect
- Use only Bengali script (not Latin script or IPA)
- Return ONLY the translated text with no additional commentary, explanations, or notes
- Do not include phrases like "Here is the translation:" or "The Chittagonian translation is:"
- Do not add any metadata, formatting, or extra information
- If you cannot translate a specific word, keep it as is in the original form

Bangla text to translate:
{bangla_text}

Chittagonian translation:"""

    messages = [{"role": "user", "content": prompt}]

    # Apply chat template
    formatted_prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    return formatted_prompt

# Set sampling parameters
sampling_params = SamplingParams(
    temperature=0.3,
    top_p=0.9,
    max_tokens=200
)

# Process in smaller batches to avoid OOM
BATCH_SIZE = 32  # Match max_num_seqs
all_translations = []

print(f"Processing {len(df)} sentences in batches of {BATCH_SIZE}...")

for batch_start in tqdm(range(0, len(df), BATCH_SIZE), desc="Processing batches"):
    batch_end = min(batch_start + BATCH_SIZE, len(df))
    batch_df = df.iloc[batch_start:batch_end]
    
    # Prepare prompts for this batch
    batch_prompts = []
    for idx, row in batch_df.iterrows():
        bangla_text = row['question']
        prompt = translate_to_chittagonian(bangla_text)
        batch_prompts.append(prompt)
    
    # Generate translations for this batch
    outputs = llm.generate(batch_prompts, sampling_params)
    
    # Extract translations
    batch_translations = [output.outputs[0].text.strip() for output in outputs]
    all_translations.extend(batch_translations)
    
    # Clear memory after each batch
    del outputs, batch_prompts
    gc.collect()
    torch.cuda.empty_cache()

# Print first few examples
print("\nFirst 3 examples:")
for idx in range(min(3, len(df))):
    print(f"\nBangla: {df.iloc[idx]['question']}")
    print(f"Chittagonian: {all_translations[idx]}")

# Add translations to dataframe
df['chittagonian_translation'] = all_translations

# Save results
df.to_csv('qwen_5_train_with_translations.csv', index=False)
print(f"\n✅ Translation complete! Saved to 'qwen_10_train_with_translations.csv'")
print(f"Total sentences translated: {len(df)}")