In [1]:
# vLLM itself
!pip install vllm

# Hugging Face transformers for model handling
!pip install transformers

# BitsAndBytes (for quantization support like 4-bit/8-bit)
!pip install bitsandbytes

# Accelerate (required for model device mapping)
!pip install accelerate
# vLLM itself
!pip install vllm

# Hugging Face transformers for model handling
!pip install transformers

# BitsAndBytes (for quantization support like 4-bit/8-bit)
!pip install bitsandbytes

# Accelerate (required for model device mapping)
!pip install accelerate

# For caching models and logging
!pip install huggingface_hub

Collecting vllm
  Downloading vllm-0.10.2-cp38-abi3-manylinux1_x86_64.whl.metadata (16 kB)
Collecting blake3 (from vllm)
  Downloading blake3-1.0.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (217 bytes)
Collecting transformers>=4.55.2 (from vllm)
  Downloading transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Collecting openai>=1.99.1 (from vllm)
  Downloading openai-1.109.1-py3-none-any.whl.metadata (29 kB)
Collecting prometheus-fastapi-instrumentator>=7.0.0 (from vllm)
  Downloading prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl.metadata (13 kB)
Collecting lm-format-enforcer==0.11.3 (from vllm)
  Downloading lm_format_enforcer-0.11.3-py3-none-any.whl.metadata (17 kB)
Collecting llguidance<0.8.0,>=0.7.11 (from vllm)
  Downloading llguidance-0.7.30-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecti

In [2]:
import torch

# Check GPU status
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"Total Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print(f"Available Memory: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1e9:.2f} GB")

# Clear cache
torch.cuda.empty_cache()

GPU: Tesla T4
Total Memory: 15.83 GB
Available Memory: 15.83 GB


In [None]:
import os
from huggingface_hub import login

# Load token from environment variable
hf_token = os.getenv("HF_TOKEN")
login(token=hf_token)

In [4]:
import os
os.environ['VLLM_USE_V1'] = '0'

import torch
import gc

torch.cuda.empty_cache()
gc.collect()

if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        with torch.cuda.device(i):
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()

import pandas as pd
from vllm import LLM, SamplingParams
from tqdm import tqdm

df = pd.read_csv('/kaggle/input/banglachq/train.csv')
df = df[['id', 'question']].copy()
print(f"Loaded {len(df)} rows")

model_name = "google/gemma-3-1b-it"
print(f"\nLoading {model_name}...")

llm = LLM(
    model=model_name,
    trust_remote_code=True,
    dtype="float32",  # 1B should work with float16 on T4
    gpu_memory_utilization=0.7,
    max_model_len=2048,
    max_num_seqs=24,
)

tokenizer = llm.get_tokenizer()
print("Model loaded!\n")

# keep prompt intact
def translate_to_sylheti(bangla_text):
    prompt = f"""You are a precise translation tool. Your only task is to translate the given Bangla text to Sylheti dialect using Bengali script.

    INSTRUCTIONS:
    - Translate the Bangla text below to Sylheti dialect
    - Use only Bengali script (not Latin script or IPA)
    - Return ONLY the translated text with no additional commentary, explanations, or notes
    - Do not include phrases like "Here is the translation:" or "The Sylheti translation is:"
    - Do not add any metadata, formatting, or extra information
    - If you cannot translate a specific word, keep it as is in the original form

    Here are some examples of Bangla to Sylheti translations:
    Bangla: আপনি কি নিয়মিত স্ক্যান করেন?
    Sylheti: আফনে কিতা রেগুলার স্ক্যান খরইননি?

    Bangla: কোথায় কোচিং করেন?
    Sylheti: কোনানো কোচিং খরইন?

    Bangla: ডাল সিদ্ধ হয়েছে?
    Sylheti: ডাইল সিদ্ধ অইছেনি?

    Bangla: এই যে মিয়া, কি নিয়ে যাও?
    Sylheti: ওবা, কিতা লইয়া যাইরায়?

    Bangla: আপনি কি নিয়মিত স্ক্যান করেন?
    Sylheti: আফনে কিতা রেগুলার স্ক্যান খরইননি?

    Bangla: হ্যাঁ, প্রতিদিন করি
    Sylheti: অয়, ডেইলি খরি

    Bangla: কি করছেন?
    Sylheti: কিতা খররা?

    Bangla: জামাই কবে আসবে?
    Sylheti: জামাই কোনদিন আইবো?

    Bangla: হলুদ আর মরিচ দেন।
    Sylheti: অলইদ আর মরিচ দেইন।

    Bangla: দুটো দিলেই হবে।
    Sylheti: দুইটা দিলে ওউ অইবো।

    Bangla text to translate:
    {bangla_text}

    Sylheti translation:"""
    
    messages = [{"role": "user", "content": prompt}]
    return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

sampling_params = SamplingParams(
    temperature=0.3,
    top_p=0.9,
    max_tokens=512,
    stop=["Bangla:"]
)

BATCH_SIZE = 24
all_translations = []

for batch_start in tqdm(range(0, len(df), BATCH_SIZE), desc="Translating"):
    batch_end = min(batch_start + BATCH_SIZE, len(df))
    batch_prompts = [translate_to_sylheti(row['question']) for _, row in df.iloc[batch_start:batch_end].iterrows()]
    
    outputs = llm.generate(batch_prompts, sampling_params)
    all_translations.extend([o.outputs[0].text.strip() for o in outputs])
    
    del outputs, batch_prompts
    gc.collect()
    torch.cuda.empty_cache()

df['sylheti_translation'] = all_translations
df.to_csv('gemma_10_shot_sylheti.csv', index=False)
print("Done!")


2025-09-27 22:40:53.900775: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759012854.280057      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759012854.387117      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO 09-27 22:41:12 [__init__.py:216] Automatically detected platform cuda.
Loaded 1880 rows

Loading google/gemma-3-1b-it...
INFO 09-27 22:41:15 [utils.py:328] non-default args: {'trust_remote_code': True, 'dtype': 'float32', 'max_model_len': 2048, 'gpu_memory_utilization': 0.7, 'max_num_seqs': 24, 'disable_log_stats': True, 'model': 'google/gemma-3-1b-it'}


The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


config.json:   0%|          | 0.00/899 [00:00<?, ?B/s]

INFO 09-27 22:41:31 [__init__.py:742] Resolved architecture: Gemma3ForCausalLM


`torch_dtype` is deprecated! Use `dtype` instead!


INFO 09-27 22:41:31 [__init__.py:2761] Upcasting torch.bfloat16 to torch.float32.
INFO 09-27 22:41:31 [__init__.py:1815] Using max model len 2048
INFO 09-27 22:41:32 [llm_engine.py:221] Initializing a V0 LLM engine (v0.10.2) with config: model='google/gemma-3-1b-it', speculative_config=None, tokenizer='google/gemma-3-1b-it', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float32, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), s

tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

INFO 09-27 22:41:39 [cuda.py:408] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
INFO 09-27 22:41:39 [cuda.py:453] Using XFormers backend.


[W927 22:41:49.384082228 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
INFO 09-27 22:41:59 [parallel_state.py:1165] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
INFO 09-27 22:41:59 [model_runner.py:1051] Starting to load model google/gemma-3-1b-it...


[W927 22:41:59.394651480 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


INFO 09-27 22:42:00 [weight_utils.py:348] Using model weights format ['*.safetensors']


model.safetensors:   0%|          | 0.00/2.00G [00:00<?, ?B/s]

INFO 09-27 22:42:11 [weight_utils.py:369] Time spent downloading weights for google/gemma-3-1b-it: 10.243656 seconds
INFO 09-27 22:42:11 [weight_utils.py:406] No model.safetensors.index.json found in remote.


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 09-27 22:42:13 [default_loader.py:268] Loading weights took 2.24 seconds
INFO 09-27 22:42:14 [model_runner.py:1083] Model loading took 3.7884 GiB and 12.928683 seconds
INFO 09-27 22:42:26 [worker.py:290] Memory profiling takes 11.30 seconds
INFO 09-27 22:42:26 [worker.py:290] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.70) = 10.32GiB
INFO 09-27 22:42:26 [worker.py:290] model weights take 3.79GiB; non_torch_memory takes 0.03GiB; PyTorch activation peak memory takes 0.30GiB; the rest of the memory reserved for KV Cache is 6.20GiB.
INFO 09-27 22:42:26 [executor_base.py:114] # cuda blocks: 7820, # CPU blocks: 5041
INFO 09-27 22:42:26 [executor_base.py:119] Maximum concurrency for 2048 tokens per request: 61.09x
INFO 09-27 22:42:32 [model_runner.py:1355] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in 

Capturing CUDA graph shapes:   0%|          | 0/6 [00:00<?, ?it/s]

INFO 09-27 22:42:43 [model_runner.py:1507] Graph capturing finished in 11 secs, took 0.09 GiB
INFO 09-27 22:42:43 [worker.py:467] Free memory on device (14.64/14.74 GiB) on startup. Desired GPU memory utilization is (0.7, 10.32 GiB). Actual usage is 3.79 GiB for weight, 0.3 GiB for peak activation, 0.03 GiB for non-torch memory, and 0.09 GiB for CUDAGraph memory. Replace gpu_memory_utilization config with `--kv-cache-memory=6408710451` to fit into requested memory, or `--kv-cache-memory=11049268736` to fully utilize gpu memory. Current kv cache memory in use is 6662465843 bytes.
INFO 09-27 22:42:43 [llm_engine.py:420] init engine (profile, create kv cache, warmup model) took 28.98 seconds
INFO 09-27 22:42:43 [llm.py:295] Supported_tasks: ['generate']
INFO 09-27 22:42:43 [__init__.py:36] No IOProcessor plugins requested by the model
Model loaded!



Translating:   0%|          | 0/79 [00:00<?, ?it/s]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   1%|▏         | 1/79 [00:11<15:01, 11.55s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   3%|▎         | 2/79 [00:29<19:21, 15.09s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   4%|▍         | 3/79 [00:47<20:57, 16.55s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   5%|▌         | 4/79 [00:58<17:56, 14.35s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   6%|▋         | 5/79 [01:08<15:52, 12.87s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   8%|▊         | 6/79 [01:19<14:47, 12.16s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   9%|▉         | 7/79 [01:29<13:56, 11.61s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  10%|█         | 8/79 [01:41<13:34, 11.47s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  11%|█▏        | 9/79 [01:52<13:16, 11.38s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  13%|█▎        | 10/79 [02:03<12:52, 11.19s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  14%|█▍        | 11/79 [02:13<12:32, 11.07s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  15%|█▌        | 12/79 [02:25<12:27, 11.16s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  16%|█▋        | 13/79 [02:44<14:57, 13.60s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  18%|█▊        | 14/79 [02:55<13:48, 12.75s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  19%|█▉        | 15/79 [03:05<12:47, 12.00s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  20%|██        | 16/79 [03:16<12:14, 11.66s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  22%|██▏       | 17/79 [03:26<11:41, 11.31s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  23%|██▎       | 18/79 [03:47<14:30, 14.27s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  24%|██▍       | 19/79 [03:58<13:10, 13.18s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  25%|██▌       | 20/79 [04:10<12:28, 12.68s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  27%|██▋       | 21/79 [04:20<11:37, 12.02s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  28%|██▊       | 22/79 [04:31<10:57, 11.54s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  29%|██▉       | 23/79 [04:41<10:34, 11.33s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  30%|███       | 24/79 [04:52<10:14, 11.18s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  32%|███▏      | 25/79 [05:03<09:54, 11.00s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  33%|███▎      | 26/79 [05:14<09:46, 11.07s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  34%|███▍      | 27/79 [05:27<10:02, 11.59s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  35%|███▌      | 28/79 [05:39<10:01, 11.79s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  37%|███▋      | 29/79 [05:50<09:31, 11.43s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  38%|███▊      | 30/79 [06:09<11:11, 13.70s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  39%|███▉      | 31/79 [06:22<10:50, 13.54s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  41%|████      | 32/79 [06:32<09:52, 12.61s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  42%|████▏     | 33/79 [06:44<09:26, 12.31s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  43%|████▎     | 34/79 [06:55<08:57, 11.96s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  44%|████▍     | 35/79 [07:06<08:27, 11.54s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  46%|████▌     | 36/79 [07:17<08:11, 11.44s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  47%|████▋     | 37/79 [07:35<09:30, 13.59s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  48%|████▊     | 38/79 [07:47<08:51, 12.96s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  49%|████▉     | 39/79 [07:58<08:11, 12.29s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  51%|█████     | 40/79 [08:08<07:40, 11.82s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  52%|█████▏    | 41/79 [08:27<08:44, 13.81s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  53%|█████▎    | 42/79 [08:36<07:45, 12.57s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  54%|█████▍    | 43/79 [08:46<07:02, 11.72s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  56%|█████▌    | 44/79 [08:57<06:40, 11.43s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  57%|█████▋    | 45/79 [09:08<06:29, 11.45s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  58%|█████▊    | 46/79 [09:19<06:11, 11.27s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  59%|█████▉    | 47/79 [09:38<07:16, 13.65s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  61%|██████    | 48/79 [09:50<06:41, 12.94s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  62%|██████▏   | 49/79 [10:01<06:11, 12.37s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  63%|██████▎   | 50/79 [10:14<06:08, 12.69s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  65%|██████▍   | 51/79 [10:25<05:39, 12.13s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  66%|██████▌   | 52/79 [10:36<05:13, 11.63s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  67%|██████▋   | 53/79 [10:46<04:54, 11.33s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  68%|██████▊   | 54/79 [10:57<04:40, 11.24s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  70%|██████▉   | 55/79 [11:09<04:35, 11.48s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  71%|███████   | 56/79 [11:21<04:24, 11.48s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  72%|███████▏  | 57/79 [11:32<04:12, 11.50s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  73%|███████▎  | 58/79 [11:43<03:58, 11.35s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  75%|███████▍  | 59/79 [11:54<03:41, 11.09s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  76%|███████▌  | 60/79 [12:05<03:31, 11.14s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  77%|███████▋  | 61/79 [12:15<03:12, 10.67s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  78%|███████▊  | 62/79 [12:26<03:03, 10.77s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  80%|███████▉  | 63/79 [12:44<03:30, 13.15s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  81%|████████  | 64/79 [12:55<03:06, 12.45s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  82%|████████▏ | 65/79 [13:06<02:48, 12.05s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  84%|████████▎ | 66/79 [13:17<02:33, 11.80s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  85%|████████▍ | 67/79 [13:29<02:20, 11.73s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  86%|████████▌ | 68/79 [13:40<02:07, 11.59s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  87%|████████▋ | 69/79 [13:59<02:16, 13.68s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  89%|████████▊ | 70/79 [14:10<01:56, 12.95s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  90%|████████▉ | 71/79 [14:21<01:39, 12.43s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  91%|█████████ | 72/79 [14:33<01:25, 12.24s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  92%|█████████▏| 73/79 [14:52<01:25, 14.31s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  94%|█████████▎| 74/79 [15:03<01:06, 13.22s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  95%|█████████▍| 75/79 [15:13<00:49, 12.32s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  96%|█████████▌| 76/79 [15:23<00:35, 11.67s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  97%|█████████▋| 77/79 [15:35<00:23, 11.71s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  99%|█████████▊| 78/79 [15:54<00:13, 13.82s/it]

Adding requests:   0%|          | 0/8 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating: 100%|██████████| 79/79 [15:59<00:00, 12.15s/it]

Done!



