In [1]:
# vLLM itself
!pip install vllm

# Hugging Face transformers for model handling
!pip install transformers

# BitsAndBytes (for quantization support like 4-bit/8-bit)
!pip install bitsandbytes

# Accelerate (required for model device mapping)
!pip install accelerate
# vLLM itself
!pip install vllm

# Hugging Face transformers for model handling
!pip install transformers

# BitsAndBytes (for quantization support like 4-bit/8-bit)
!pip install bitsandbytes

# Accelerate (required for model device mapping)
!pip install accelerate

# For caching models and logging
!pip install huggingface_hub



In [2]:
import torch

# Check GPU status
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"Total Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print(f"Available Memory: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1e9:.2f} GB")

# Clear cache
torch.cuda.empty_cache()

GPU: Tesla T4
Total Memory: 15.83 GB
Available Memory: 15.83 GB


In [None]:
import os
from huggingface_hub import login

# Load token from environment variable
hf_token = os.getenv("HF_TOKEN")
login(token=hf_token)

In [4]:
import torch
print("PyTorch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("cuDNN version:", torch.backends.cudnn.version())


PyTorch: 2.8.0+cu128
CUDA available: True
CUDA version: 12.8
cuDNN version: 91002


In [6]:
import os
os.environ['VLLM_USE_V1'] = '0'

import torch
import gc

torch.cuda.empty_cache()
gc.collect()

if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        with torch.cuda.device(i):
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()

import pandas as pd
from vllm import LLM, SamplingParams
from tqdm import tqdm

df = pd.read_csv('/kaggle/input/banglachq/train.csv')
df = df[['id', 'question']].copy()
print(f"Loaded {len(df)} rows")

model_name = "google/gemma-3-1b-it"
print(f"\nLoading {model_name}...")

llm = LLM(
    model=model_name,
    trust_remote_code=True,
    dtype="float32",  # 1B should work with float16 on T4
    gpu_memory_utilization=0.7,
    max_model_len=2048,
    max_num_seqs=24,
)

tokenizer = llm.get_tokenizer()
print("Model loaded!\n")

# keep prompt intact
def translate_to_sylheti(bangla_text):
    prompt = f"""You are a precise translation tool. Your task is to translate the given Bangla text to Sylheti dialect using Bengali script.

        INSTRUCTIONS:
        - Analyze the text step-by-step before translating
        - Use only Bengali script in your final translation
        - After your reasoning, provide ONLY the final translation with no additional commentary

        Here are examples showing the translation process:

        Example 1:
        Bangla: আপনি কি নিয়মিত স্ক্যান করেন?
        Reasoning: "আপনি" becomes "আফনে" (phonetic shift প→ফ, ই→ে), "কি নিয়মিত" becomes "কিতা রেগুলার" (কি→কিতা with emphasis particle, নিয়মিত→রেগুলার lexical change), "স্ক্যান করেন" becomes "স্ক্যান খরইননি?" (করেন→খরইননি with খ sound and -ননি ending for question)
        Sylheti: আফনে কিতা রেগুলার স্ক্যান খরইননি?

        Example 2:
        Bangla: কোথায় কোচিং করেন?
        Reasoning: "কোথায়" becomes "কোনানো" (locative form কোথায়→কোনানো with extended vowel), "কোচিং" stays as "কোচিং" (unchanged), "করেন" becomes "খরইন?" (verb করেন→খরইন with খ sound and -ইন ending)
        Sylheti: কোনানো কোচিং খরইন?

        Example 3:
        Bangla: ডাল সিদ্ধ হয়েছে?
        Reasoning: "ডাল" becomes "ডাইল" (vowel shift আ→আই for diphthong), "সিদ্ধ" stays as "সিদ্ধ" (unchanged), "হয়েছে" becomes "অইছেনি?" (হয়েছে→অইছেনি with অ prefix and -নি question marker)
        Sylheti: ডাইল সিদ্ধ অইছেনি?

        Example 4:
        Bangla: এই যে মিয়া, কি নিয়ে যাও?
        Reasoning: "এই যে মিয়া" becomes "ওবা" (colloquial address form, complete lexical replacement for calling attention), "কি নিয়ে" becomes "কিতা লইয়া" (কি→কিতা, নিয়ে→লইয়া with ল sound), "যাও" becomes "যাইরায়?" (যাও→যাইরায় with extended verb form and য় ending)
        Sylheti: ওবা, কিতা লইয়া যাইরায়?

        Example 5:
        Bangla: জুম্মার পর যাব।
        Reasoning: "জুম্মার" becomes "জুম্মার" (unchanged as Arabic loanword), "পর" stays as "পর" (unchanged), "যাব" becomes "যামু" (first person future -ব→-মু typical Sylheti verb ending)
        Sylheti: জুম্মার পর যামু।

        Now translate this text:
        Bangla: {bangla_text}

        Think through the translation:
        - Identify key vocabulary transformations
        - Note grammatical changes (verb forms, pronouns, question markers)
        - Apply Sylheti phonetic patterns (খ for ক in verbs, diphthongs, vowel shifts)

        Provide your final translation below:
        Sylheti:""" 
    
    messages = [{"role": "user", "content": prompt}]
    return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

sampling_params = SamplingParams(
    temperature=0.3,
    top_p=0.9,
    max_tokens=512,
    stop=["Bangla:"]
)

BATCH_SIZE = 24
all_translations = []

for batch_start in tqdm(range(0, len(df), BATCH_SIZE), desc="Translating"):
    batch_end = min(batch_start + BATCH_SIZE, len(df))
    batch_prompts = [translate_to_sylheti(row['question']) for _, row in df.iloc[batch_start:batch_end].iterrows()]
    
    outputs = llm.generate(batch_prompts, sampling_params)
    all_translations.extend([o.outputs[0].text.strip() for o in outputs])
    
    del outputs, batch_prompts
    gc.collect()
    torch.cuda.empty_cache()

df['sylheti_translation'] = all_translations
df.to_csv('gemma_5_shot_sylheti_CoT.csv', index=False)
print("Done!")


2025-09-28 17:38:01.028999: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759081081.054519     488 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759081081.062854     488 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO 09-28 17:38:06 [__init__.py:216] Automatically detected platform cuda.
Loaded 1880 rows

Loading google/gemma-3-1b-it...
INFO 09-28 17:38:07 [utils.py:328] non-default args: {'trust_remote_code': True, 'dtype': 'float32', 'max_model_len': 2048, 'gpu_memory_utilization': 0.7, 'max_num_seqs': 24, 'disable_log_stats': True, 'model': 'google/gemma-3-1b-it'}


The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


INFO 09-28 17:38:26 [__init__.py:742] Resolved architecture: Gemma3ForCausalLM


`torch_dtype` is deprecated! Use `dtype` instead!


INFO 09-28 17:38:26 [__init__.py:2761] Upcasting torch.bfloat16 to torch.float32.
INFO 09-28 17:38:26 [__init__.py:1815] Using max model len 2048
INFO 09-28 17:38:27 [llm_engine.py:221] Initializing a V0 LLM engine (v0.10.2) with config: model='google/gemma-3-1b-it', speculative_config=None, tokenizer='google/gemma-3-1b-it', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float32, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), s

[W928 17:38:42.182955439 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
INFO 09-28 17:38:52 [parallel_state.py:1165] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
INFO 09-28 17:38:52 [model_runner.py:1051] Starting to load model google/gemma-3-1b-it...


[W928 17:38:52.191580279 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


INFO 09-28 17:38:53 [weight_utils.py:348] Using model weights format ['*.safetensors']
INFO 09-28 17:38:53 [weight_utils.py:406] No model.safetensors.index.json found in remote.


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 09-28 17:38:55 [default_loader.py:268] Loading weights took 2.17 seconds
INFO 09-28 17:38:56 [model_runner.py:1083] Model loading took 3.7884 GiB and 2.677821 seconds
INFO 09-28 17:39:01 [worker.py:290] Memory profiling takes 4.41 seconds
INFO 09-28 17:39:01 [worker.py:290] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.70) = 10.32GiB
INFO 09-28 17:39:01 [worker.py:290] model weights take 3.79GiB; non_torch_memory takes 0.03GiB; PyTorch activation peak memory takes 0.30GiB; the rest of the memory reserved for KV Cache is 6.20GiB.
INFO 09-28 17:39:02 [executor_base.py:114] # cuda blocks: 7820, # CPU blocks: 5041
INFO 09-28 17:39:02 [executor_base.py:119] Maximum concurrency for 2048 tokens per request: 61.09x
INFO 09-28 17:39:07 [model_runner.py:1355] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in th

Capturing CUDA graph shapes:   0%|          | 0/6 [00:00<?, ?it/s]

INFO 09-28 17:39:16 [model_runner.py:1507] Graph capturing finished in 9 secs, took 0.09 GiB
INFO 09-28 17:39:16 [worker.py:467] Free memory on device (14.64/14.74 GiB) on startup. Desired GPU memory utilization is (0.7, 10.32 GiB). Actual usage is 3.79 GiB for weight, 0.3 GiB for peak activation, 0.03 GiB for non-torch memory, and 0.09 GiB for CUDAGraph memory. Replace gpu_memory_utilization config with `--kv-cache-memory=6408710451` to fit into requested memory, or `--kv-cache-memory=11049268736` to fully utilize gpu memory. Current kv cache memory in use is 6662465843 bytes.
INFO 09-28 17:39:16 [llm_engine.py:420] init engine (profile, create kv cache, warmup model) took 19.57 seconds
INFO 09-28 17:39:16 [llm.py:295] Supported_tasks: ['generate']
INFO 09-28 17:39:16 [__init__.py:36] No IOProcessor plugins requested by the model
Model loaded!



Translating:   0%|          | 0/79 [00:00<?, ?it/s]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   1%|▏         | 1/79 [00:13<17:44, 13.65s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   3%|▎         | 2/79 [00:36<24:40, 19.22s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   4%|▍         | 3/79 [01:00<26:52, 21.21s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   5%|▌         | 4/79 [01:14<23:05, 18.47s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   6%|▋         | 5/79 [01:38<25:03, 20.32s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   8%|▊         | 6/79 [02:01<26:07, 21.48s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:   9%|▉         | 7/79 [02:23<25:52, 21.57s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  10%|█         | 8/79 [02:47<26:23, 22.30s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  11%|█▏        | 9/79 [03:11<26:40, 22.87s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  13%|█▎        | 10/79 [03:33<25:51, 22.49s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  14%|█▍        | 11/79 [03:56<25:51, 22.81s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  15%|█▌        | 12/79 [04:18<25:01, 22.41s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  16%|█▋        | 13/79 [04:42<25:10, 22.89s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  18%|█▊        | 14/79 [05:04<24:23, 22.52s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  19%|█▉        | 15/79 [05:25<23:45, 22.28s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  20%|██        | 16/79 [05:39<20:45, 19.77s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  22%|██▏       | 17/79 [06:03<21:44, 21.05s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  23%|██▎       | 18/79 [06:25<21:33, 21.21s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  24%|██▍       | 19/79 [06:46<21:17, 21.29s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  25%|██▌       | 20/79 [07:01<19:06, 19.43s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  27%|██▋       | 21/79 [07:22<19:14, 19.90s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  28%|██▊       | 22/79 [07:37<17:25, 18.34s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  29%|██▉       | 23/79 [07:59<18:01, 19.31s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  30%|███       | 24/79 [08:21<18:36, 20.29s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  32%|███▏      | 25/79 [08:35<16:29, 18.32s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  33%|███▎      | 26/79 [08:58<17:20, 19.63s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  34%|███▍      | 27/79 [09:11<15:24, 17.79s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  35%|███▌      | 28/79 [09:25<14:05, 16.58s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  37%|███▋      | 29/79 [09:47<15:10, 18.21s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  38%|███▊      | 30/79 [10:10<16:08, 19.77s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  39%|███▉      | 31/79 [10:33<16:29, 20.61s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  41%|████      | 32/79 [10:57<16:51, 21.53s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  42%|████▏     | 33/79 [11:21<17:05, 22.30s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  43%|████▎     | 34/79 [11:45<17:07, 22.83s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  44%|████▍     | 35/79 [12:07<16:32, 22.56s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  46%|████▌     | 36/79 [12:26<15:30, 21.63s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  47%|████▋     | 37/79 [12:50<15:36, 22.31s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  48%|████▊     | 38/79 [13:13<15:26, 22.61s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  49%|████▉     | 39/79 [13:35<14:54, 22.36s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  51%|█████     | 40/79 [13:59<14:50, 22.83s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  52%|█████▏    | 41/79 [14:22<14:33, 23.00s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  53%|█████▎    | 42/79 [14:46<14:15, 23.13s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  54%|█████▍    | 43/79 [14:59<12:01, 20.05s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  56%|█████▌    | 44/79 [15:23<12:28, 21.38s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  57%|█████▋    | 45/79 [15:46<12:26, 21.95s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  58%|█████▊    | 46/79 [16:11<12:25, 22.59s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  59%|█████▉    | 47/79 [16:33<12:00, 22.51s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  61%|██████    | 48/79 [16:57<11:55, 23.08s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  62%|██████▏   | 49/79 [17:21<11:37, 23.24s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  63%|██████▎   | 50/79 [17:45<11:21, 23.51s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  65%|██████▍   | 51/79 [18:07<10:41, 22.91s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  66%|██████▌   | 52/79 [18:26<09:52, 21.95s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  67%|██████▋   | 53/79 [18:50<09:47, 22.60s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  68%|██████▊   | 54/79 [19:08<08:44, 21.00s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  70%|██████▉   | 55/79 [19:22<07:37, 19.05s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  71%|███████   | 56/79 [19:46<07:54, 20.61s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  72%|███████▏  | 57/79 [20:08<07:41, 20.99s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  73%|███████▎  | 58/79 [20:32<07:35, 21.69s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  75%|███████▍  | 59/79 [20:52<07:07, 21.40s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  76%|███████▌  | 60/79 [21:10<06:25, 20.28s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  77%|███████▋  | 61/79 [21:35<06:28, 21.61s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  78%|███████▊  | 62/79 [21:49<05:30, 19.44s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  80%|███████▉  | 63/79 [22:11<05:21, 20.10s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  81%|████████  | 64/79 [22:35<05:19, 21.30s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  82%|████████▏ | 65/79 [22:57<05:00, 21.44s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  84%|████████▎ | 66/79 [23:21<04:49, 22.23s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  85%|████████▍ | 67/79 [23:45<04:33, 22.80s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  86%|████████▌ | 68/79 [24:08<04:13, 23.07s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  87%|████████▋ | 69/79 [24:21<03:19, 19.95s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  89%|████████▊ | 70/79 [24:38<02:51, 19.04s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  90%|████████▉ | 71/79 [24:53<02:22, 17.82s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  91%|█████████ | 72/79 [25:18<02:19, 19.95s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  92%|█████████▏| 73/79 [25:40<02:03, 20.59s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  94%|█████████▎| 74/79 [26:03<01:46, 21.29s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  95%|█████████▍| 75/79 [26:16<01:14, 18.71s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  96%|█████████▌| 76/79 [26:37<00:58, 19.61s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  97%|█████████▋| 77/79 [27:02<00:41, 20.98s/it]

Adding requests:   0%|          | 0/24 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/24 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating:  99%|█████████▊| 78/79 [27:26<00:22, 22.00s/it]

Adding requests:   0%|          | 0/8 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Translating: 100%|██████████| 79/79 [27:41<00:00, 21.03s/it]

Done!



