In [1]:
# vLLM itself
!pip install vllm

# Hugging Face transformers for model handling
!pip install transformers

# BitsAndBytes (for quantization support like 4-bit/8-bit)
!pip install bitsandbytes

# Accelerate (required for model device mapping)
!pip install accelerate
# vLLM itself
!pip install vllm

# Hugging Face transformers for model handling
!pip install transformers

# BitsAndBytes (for quantization support like 4-bit/8-bit)
!pip install bitsandbytes

# Accelerate (required for model device mapping)
!pip install accelerate

# For caching models and logging
!pip install huggingface_hub

Collecting vllm
  Downloading vllm-0.10.2-cp38-abi3-manylinux1_x86_64.whl.metadata (16 kB)
Collecting blake3 (from vllm)
  Downloading blake3-1.0.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (217 bytes)
Collecting transformers>=4.55.2 (from vllm)
  Downloading transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting openai>=1.99.1 (from vllm)
  Downloading openai-1.109.1-py3-none-any.whl.metadata (29 kB)
Collecting prometheus-fastapi-instrumentator>=7.0.0 (from vllm)
  Downloading prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl.metadata (13 kB)
Collecting lm-format-enforcer==0.11.3 (from vllm)
  Downloading lm_format_enforcer-0.11.3-py3-none-any.whl.metadata (17 kB)
Collecting llguidance<0.8.0,>=0.7.11 (from vllm)
  Downloading llguidance-0.7.30-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecti

In [2]:
import torch

# Check GPU status
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"Total Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print(f"Available Memory: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1e9:.2f} GB")

# Clear cache
torch.cuda.empty_cache()

GPU: Tesla T4
Total Memory: 15.83 GB
Available Memory: 15.83 GB


In [3]:
import os
os.environ['VLLM_USE_V1'] = '0'  # Disable V1 engine

# Clear any existing GPU memory
import torch
import gc

# Clear CUDA cache
torch.cuda.empty_cache()
gc.collect()

# Try to free all CUDA memory
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        with torch.cuda.device(i):
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()
            
import pandas as pd
from vllm import LLM, SamplingParams
from tqdm import tqdm
import gc
import torch

# Load the CSV
df = pd.read_csv('/kaggle/input/banglachq/train.csv')
print(f"Loaded {len(df)} rows from train.csv")
print(f"Columns: {df.columns.tolist()}")

# Load model with vLLM on single GPU with optimized settings
model_name = "Qwen/Qwen2.5-3B-Instruct"
print(f"\nLoading {model_name} with vLLM...")

llm = LLM(
    model=model_name,
    trust_remote_code=True,
    dtype="float16",
    gpu_memory_utilization=0.65,  # Reduced slightly for larger context
    max_model_len=2048,  # Increased to 2048
    max_num_seqs=24,  # Reduced batch size slightly
)

# Get tokenizer for chat template
tokenizer = llm.get_tokenizer()

print("Model loaded successfully!\n")

# Translation function (kept prompt intact)
def translate_to_sylheti(bangla_text):
    prompt = f"""You are a precise translation tool. Your only task is to translate the given Bangla text to Sylheti dialect using Bengali script.

    INSTRUCTIONS:
    - Translate the Bangla text below to Sylheti dialect
    - Use only Bengali script (not Latin script or IPA)
    - Return ONLY the translated text with no additional commentary, explanations, or notes
    - Do not include phrases like "Here is the translation:" or "The Sylheti translation is:"
    - Do not add any metadata, formatting, or extra information
    - If you cannot translate a specific word, keep it as is in the original form

    Here are some examples of Bangla to Sylheti translations:
    Bangla: আপনি কি নিয়মিত স্ক্যান করেন?
    Sylheti: আফনে কিতা রেগুলার স্ক্যান খরইননি?

    Bangla: কোথায় কোচিং করেন?
    Sylheti: কোনানো কোচিং খরইন?

    Bangla: ডাল সিদ্ধ হয়েছে?
    Sylheti: ডাইল সিদ্ধ অইছেনি?

    Bangla: এই যে মিয়া, কি নিয়ে যাও?
    Sylheti: ওবা, কিতা লইয়া যাইরায়?

    Bangla: আপনি কি নিয়মিত স্ক্যান করেন?
    Sylheti: আফনে কিতা রেগুলার স্ক্যান খরইননি?

    Bangla text to translate:
    {bangla_text}

Sylheti translation:"""

    messages = [{"role": "user", "content": prompt}]

    # Apply chat template
    formatted_prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    return formatted_prompt

# Set sampling parameters
sampling_params = SamplingParams(
    temperature=0.3,
    top_p=0.9,
    max_tokens=200
)

# Process in smaller batches to avoid OOM
BATCH_SIZE = 32  # Match max_num_seqs
all_translations = []

print(f"Processing {len(df)} sentences in batches of {BATCH_SIZE}...")

for batch_start in tqdm(range(0, len(df), BATCH_SIZE), desc="Processing batches"):
    batch_end = min(batch_start + BATCH_SIZE, len(df))
    batch_df = df.iloc[batch_start:batch_end]
    
    # Prepare prompts for this batch
    batch_prompts = []
    for idx, row in batch_df.iterrows():
        bangla_text = row['question']
        prompt = translate_to_sylheti(bangla_text)
        batch_prompts.append(prompt)
    
    # Generate translations for this batch
    outputs = llm.generate(batch_prompts, sampling_params)
    
    # Extract translations
    batch_translations = [output.outputs[0].text.strip() for output in outputs]
    all_translations.extend(batch_translations)
    
    # Clear memory after each batch
    del outputs, batch_prompts
    gc.collect()
    torch.cuda.empty_cache()

# Print first few examples
print("\nFirst 3 examples:")
for idx in range(min(3, len(df))):
    print(f"\nBangla: {df.iloc[idx]['question']}")
    print(f"Sylheti: {all_translations[idx]}")

# Add translations to dataframe
df['sylheti_translation'] = all_translations

# Save results
df.to_csv('qwen_5_train_with_sylheti_translations.csv', index=False)
print(f"\n✅ Translation complete! Saved to 'qwen_10_train_with_sylheti_translations.csv'")
print(f"Total sentences translated: {len(df)}")





2025-09-27 21:36:34.119394: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759008994.483061      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759008994.593440      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO 09-27 21:36:53 [__init__.py:216] Automatically detected platform cuda.
Loaded 1880 rows from train.csv
Columns: ['id', 'question', 'indices', 'summary']

Loading Qwen/Qwen2.5-3B-Instruct with vLLM...
INFO 09-27 21:36:55 [utils.py:328] non-default args: {'trust_remote_code': True, 'dtype': 'float16', 'max_model_len': 2048, 'gpu_memory_utilization': 0.65, 'max_num_seqs': 24, 'disable_log_stats': True, 'model': 'Qwen/Qwen2.5-3B-Instruct'}


The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

INFO 09-27 21:37:10 [__init__.py:742] Resolved architecture: Qwen2ForCausalLM


`torch_dtype` is deprecated! Use `dtype` instead!


INFO 09-27 21:37:10 [__init__.py:1815] Using max model len 2048
INFO 09-27 21:37:11 [llm_engine.py:221] Initializing a V0 LLM engine (v0.10.2) with config: model='Qwen/Qwen2.5-3B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-3B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=None, served_model_name=Qwen/Qwen2.5-3B-Instruct, enable_prefix_cachin

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

INFO 09-27 21:37:13 [cuda.py:408] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
INFO 09-27 21:37:13 [cuda.py:453] Using XFormers backend.


[W927 21:37:24.915060361 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
INFO 09-27 21:37:34 [parallel_state.py:1165] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
INFO 09-27 21:37:34 [model_runner.py:1051] Starting to load model Qwen/Qwen2.5-3B-Instruct...


[W927 21:37:34.925790188 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


INFO 09-27 21:37:35 [weight_utils.py:348] Using model weights format ['*.safetensors']


model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

INFO 09-27 21:38:13 [weight_utils.py:369] Time spent downloading weights for Qwen/Qwen2.5-3B-Instruct: 37.717127 seconds


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]


INFO 09-27 21:38:20 [default_loader.py:268] Loading weights took 6.77 seconds
INFO 09-27 21:38:21 [model_runner.py:1083] Model loading took 5.7916 GiB and 45.067306 seconds
INFO 09-27 21:38:23 [worker.py:290] Memory profiling takes 1.91 seconds
INFO 09-27 21:38:23 [worker.py:290] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.65) = 9.58GiB
INFO 09-27 21:38:23 [worker.py:290] model weights take 5.79GiB; non_torch_memory takes 0.05GiB; PyTorch activation peak memory takes 0.19GiB; the rest of the memory reserved for KV Cache is 3.56GiB.
INFO 09-27 21:38:24 [executor_base.py:114] # cuda blocks: 6472, # CPU blocks: 7281
INFO 09-27 21:38:24 [executor_base.py:119] Maximum concurrency for 2048 tokens per request: 50.56x
INFO 09-27 21:38:28 [model_runner.py:1355] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in th

Capturing CUDA graph shapes:   0%|          | 0/6 [00:00<?, ?it/s]

INFO 09-27 21:38:35 [model_runner.py:1507] Graph capturing finished in 7 secs, took 0.07 GiB
INFO 09-27 21:38:35 [worker.py:467] Free memory on device (14.64/14.74 GiB) on startup. Desired GPU memory utilization is (0.65, 9.58 GiB). Actual usage is 5.79 GiB for weight, 0.19 GiB for peak activation, 0.05 GiB for non-torch memory, and 0.07 GiB for CUDAGraph memory. Replace gpu_memory_utilization config with `--kv-cache-memory=3584957030` to fit into requested memory, or `--kv-cache-memory=9016931328` to fully utilize gpu memory. Current kv cache memory in use is 3817740902 bytes.
INFO 09-27 21:38:35 [llm_engine.py:420] init engine (profile, create kv cache, warmup model) took 14.15 seconds
INFO 09-27 21:38:35 [llm.py:295] Supported_tasks: ['generate']
INFO 09-27 21:38:35 [__init__.py:36] No IOProcessor plugins requested by the model
Model loaded successfully!

Processing 1880 sentences in batches of 32...


Processing batches:   0%|          | 0/59 [00:00<?, ?it/s]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:   2%|▏         | 1/59 [00:26<25:30, 26.38s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:   3%|▎         | 2/59 [00:54<25:48, 27.17s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:   5%|▌         | 3/59 [01:24<26:48, 28.73s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:   7%|▋         | 4/59 [01:54<26:45, 29.19s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:   8%|▊         | 5/59 [02:23<26:13, 29.14s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  10%|█         | 6/59 [02:53<25:52, 29.29s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  12%|█▏        | 7/59 [03:22<25:30, 29.43s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  14%|█▎        | 8/59 [03:52<25:00, 29.43s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  15%|█▌        | 9/59 [04:21<24:34, 29.49s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  17%|█▋        | 10/59 [04:51<24:08, 29.57s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  19%|█▊        | 11/59 [05:21<23:36, 29.51s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  20%|██        | 12/59 [05:50<23:08, 29.54s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  22%|██▏       | 13/59 [06:20<22:36, 29.49s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  24%|██▎       | 14/59 [06:49<22:09, 29.55s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  25%|██▌       | 15/59 [07:19<21:45, 29.67s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  27%|██▋       | 16/59 [07:49<21:18, 29.74s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  29%|██▉       | 17/59 [08:19<20:45, 29.65s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  31%|███       | 18/59 [08:48<20:16, 29.66s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  32%|███▏      | 19/59 [09:18<19:50, 29.76s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  34%|███▍      | 20/59 [09:48<19:19, 29.73s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  36%|███▌      | 21/59 [10:18<18:52, 29.80s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  37%|███▋      | 22/59 [10:48<18:24, 29.85s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  39%|███▉      | 23/59 [11:18<17:52, 29.79s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  41%|████      | 24/59 [11:47<17:19, 29.70s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  42%|████▏     | 25/59 [12:17<16:48, 29.65s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  44%|████▍     | 26/59 [12:46<16:19, 29.69s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  46%|████▌     | 27/59 [13:16<15:49, 29.66s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  47%|████▋     | 28/59 [13:45<15:15, 29.53s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  49%|████▉     | 29/59 [14:15<14:45, 29.51s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  51%|█████     | 30/59 [14:45<14:19, 29.63s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  53%|█████▎    | 31/59 [15:14<13:48, 29.58s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  54%|█████▍    | 32/59 [15:44<13:18, 29.58s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  56%|█████▌    | 33/59 [16:13<12:51, 29.66s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  58%|█████▊    | 34/59 [16:44<12:25, 29.81s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  59%|█████▉    | 35/59 [17:13<11:55, 29.81s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  61%|██████    | 36/59 [17:43<11:26, 29.86s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  63%|██████▎   | 37/59 [18:13<10:55, 29.80s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  64%|██████▍   | 38/59 [18:43<10:27, 29.89s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  66%|██████▌   | 39/59 [19:13<09:56, 29.84s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  68%|██████▊   | 40/59 [19:42<09:26, 29.79s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  69%|██████▉   | 41/59 [20:12<08:53, 29.65s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  71%|███████   | 42/59 [20:42<08:25, 29.76s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  73%|███████▎  | 43/59 [21:12<07:56, 29.77s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  75%|███████▍  | 44/59 [21:41<07:26, 29.74s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  76%|███████▋  | 45/59 [22:11<06:56, 29.77s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  78%|███████▊  | 46/59 [22:41<06:26, 29.73s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  80%|███████▉  | 47/59 [23:10<05:56, 29.73s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  81%|████████▏ | 48/59 [23:40<05:27, 29.79s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  83%|████████▎ | 49/59 [24:10<04:57, 29.79s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  85%|████████▍ | 50/59 [24:40<04:26, 29.65s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  86%|████████▋ | 51/59 [25:09<03:57, 29.66s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  88%|████████▊ | 52/59 [25:39<03:27, 29.62s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  90%|████████▉ | 53/59 [26:09<02:58, 29.73s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  92%|█████████▏| 54/59 [26:39<02:29, 29.86s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  93%|█████████▎| 55/59 [27:09<01:59, 29.92s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  95%|█████████▍| 56/59 [27:38<01:29, 29.74s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  97%|█████████▋| 57/59 [28:08<00:59, 29.60s/it]

Adding requests:   0%|          | 0/32 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches:  98%|█████████▊| 58/59 [28:37<00:29, 29.65s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processing batches: 100%|██████████| 59/59 [28:57<00:00, 29.44s/it]


First 3 examples:

Bangla: আমার বয়স ২৫ বছর । আজ কিছুদিন যাবত আমার ঘন ঘন টয়লেট হচ্ছে । হটাৎ পেটের মধ্যে ব্যাথা অনুভূত হয়ে টয়লেটে চাপ দেয় এবং টয়লেটে যাওয়ার পরে ও টয়লেট ক্লিয়ারলি হচ্ছে না । এই সমস্যাটার সমাধান জানাবেন । আর একটি সমস্যা আছে , সেটি হল আজ দুই দিন যাবত হটাৎ করে আমার ঠান্ডা লেগে সর্দি + অনেক হাচি হচ্ছে । 
Sylheti: আফনে বয়স রেগুলার ২৫ বছর । আজ কিছুদিন যাবত আমার ঘন ঘন টয়লেট হচ্ছে । হটাৎ পেটের মধ্যে ব্যাথা অনুভূত হয়ে টয়লেটে চাপ দেয় এবং টয়লেটে যাওয়ার পরে ও টয়লেট ক্লিয়ারলি হচ্ছে না । এই সমস

Bangla: বাচ্চার বয়স ১০ মাস চলছে , ওজন ৮ . ৩ কেজি । দাঁত উঠতেছে , হালকা ঘন সর্দি আজ ৪ দিন যাবত , হাছি কম । মায়ের বুকের দুধ ছাড়া অন্য কোন খাবার ১০ দিন ধরে খায় না , এমন কি পানিও কম খায় । সর্দি ভাল করার জন্য উপায় কী এবং খাবার খাওয়ানোর ব্যাপারে কী করব ? 
Sylheti: বাচ্চার বয়স ১০ মাস চলইনি, ওজন ৮ . ৩ কেজি । দাঁত উঠইনি, ঘন সর্দি আজ ৪ দিন যাইনি, হাছি কমই। মায়ের বুকের দুধ ছাড়া অন্য কোন খাবার ১০ দিন ধরে খাইনি, এমন কি পানি খাইনি কমই। সর্দি ভাল করাইনি

Bangla: আমার সব সময় চুল পড়ে । প্রায় প্রতিদ


