In [2]:
import os
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch
import time
import csv


TXTS_PATH = "transcripts"
OUT_PATH = "summaries_russian"
os.makedirs(OUT_PATH, exist_ok=True)

CHUNK_TOKENS = 1024
SUMMARY_TOKENS = 150

DEVICE = 0 if torch.cuda.is_available() else -1
LANG_CODE = "ru_RU"

def chunk_text(text, tokenizer, max_tokens):
    import nltk
    nltk.download('punkt', quiet=True)
    from nltk.tokenize import sent_tokenize
    sentences = sent_tokenize(text, language='russian')
    chunks, current = [], ""
    for sent in sentences:
        if len(tokenizer.encode(current + sent)) < max_tokens:
            current += " " + sent
        else:
            if current: chunks.append(current)
            current = sent
    if current:
        chunks.append(current)
    return chunks

def summarize_with_mbart50(text, model, tokenizer):
    chunks = chunk_text(text, tokenizer, CHUNK_TOKENS)
    summaries = []
    for chunk in chunks:
        inputs = tokenizer(
            chunk, return_tensors="pt", max_length=CHUNK_TOKENS, truncation=True
        ).to(model.device)
        with torch.no_grad():
            summary_ids = model.generate(
                **inputs,
                max_length=SUMMARY_TOKENS, 
                num_beams=4, 
                forced_bos_token_id=tokenizer.lang_code_to_id[LANG_CODE]
            )
        out = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        summaries.append(out)
    merged_summary = " ".join(summaries)
    return merged_summary

def summarize_with_rut5(text, pipe):
    chunks = chunk_text(text, pipe.tokenizer, CHUNK_TOKENS)
    # ruT5 is T5, so use native summarization pipeline
    summaries = pipe(chunks, max_length=SUMMARY_TOKENS, min_length=20, truncation=True, batch_size=4)
    merged = " ".join([out['summary_text'] for out in summaries])
    return merged

# LOAD MODELS ONCE
mbart_tokenizer = MBart50TokenizerFast.from_pretrained('facebook/mbart-large-50-many-to-many-mmt')
mbart_tokenizer.src_lang = LANG_CODE
mbart_model = MBartForConditionalGeneration.from_pretrained('facebook/mbart-large-50-many-to-many-mmt').to(DEVICE)

ruT5_model_name = "ai-forever/ruT5-base"
ruT5_pipe = pipeline("summarization", model=ruT5_model_name, tokenizer=ruT5_model_name, device=DEVICE)

timing_report = []  # List of dicts: one row per run

for fname in os.listdir(TXTS_PATH):
    if not fname.endswith(".txt"): continue
    fpath = os.path.join(TXTS_PATH, fname)
    with open(fpath, "r", encoding="utf-8") as f:
        text = f.read()
    
    # MBART-50
    outname_mbart = os.path.join(OUT_PATH, f"{fname}.mbart.summary.txt")
    if not os.path.exists(outname_mbart):
        t0 = time.time()
        summary = summarize_with_mbart50(text, mbart_model, mbart_tokenizer)
        elapsed = time.time() - t0
        with open(outname_mbart, "w", encoding="utf-8") as fout:
            fout.write(summary)
        print(f"  Wrote {outname_mbart}, time: {elapsed:.1f}s")
        timing_report.append({'filename': fname, 'model': 'mbart50', 'seconds': elapsed})
        
    # ruT5
    outname_rut5 = os.path.join(OUT_PATH, f"{fname}.rut5.summary.txt")
    if not os.path.exists(outname_rut5):
        t0 = time.time()
        summary = summarize_with_rut5(text, ruT5_pipe)
        elapsed = time.time() - t0
        with open(outname_rut5, "w", encoding="utf-8") as fout:
            fout.write(summary)
        print(f"  Wrote {outname_rut5}, time: {elapsed:.1f}s")
        timing_report.append({'filename': fname, 'model': 'ruT5', 'seconds': elapsed})

    # TextRank
    try:
        from summa.summarizer import summarize as textrank_summarize
        outname_textrank = os.path.join(OUT_PATH, f"{fname}.textrank.summary.txt")
        if not os.path.exists(outname_textrank):
            t0 = time.time()
            summary = textrank_summarize(text, language="russian", ratio=0.07)
            elapsed = time.time() - t0
            with open(outname_textrank, "w", encoding="utf-8") as fout:
                fout.write(summary)
            print(f"  Wrote {outname_textrank}, time: {elapsed:.1f}s")
            timing_report.append({'filename': fname, 'model': 'textrank', 'seconds': elapsed})
    except ImportError:
        print("summa not installed, skipping textrank.")


print("DONE.")


  Wrote summaries_russian/recognized_small_20m20s.txt.mbart.summary.txt, time: 105.8s


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


  Wrote summaries_russian/recognized_small_20m20s.txt.rut5.summary.txt, time: 33.6s
summa not installed, skipping textrank.
  Wrote summaries_russian/recognized_medium_24m15s.txt.mbart.summary.txt, time: 77.4s
  Wrote summaries_russian/recognized_medium_24m15s.txt.rut5.summary.txt, time: 22.0s
summa not installed, skipping textrank.
  Wrote summaries_russian/recognized_large_33m5s.txt.mbart.summary.txt, time: 81.9s
  Wrote summaries_russian/recognized_large_33m5s.txt.rut5.summary.txt, time: 31.8s
summa not installed, skipping textrank.
  Wrote summaries_russian/recognized_tiny_14m19s.txt.mbart.summary.txt, time: 174.1s
  Wrote summaries_russian/recognized_tiny_14m19s.txt.rut5.summary.txt, time: 74.8s
summa not installed, skipping textrank.
DONE.


In [3]:
# Save timing results for analysis
with open(os.path.join(OUT_PATH, 'timing_report.csv'), "w", encoding="utf-8", newline='') as f_csv:
    writer = csv.DictWriter(f_csv, fieldnames=['filename', 'model', 'seconds'])
    writer.writeheader()
    writer.writerows(timing_report)
print("Timing CSV report saved:", os.path.join(OUT_PATH, 'timing_report.csv'))

Timing CSV report saved: summaries_russian/timing_report.csv


# LLM chats

In [13]:
!pip install transformers accelerate torch sentencepiece tqdm --extra-index-url https://pypi.org/simple/

Looking in indexes: https://pypi.yandex-team.ru/simple/, https://pypi.org/simple/


In [None]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from tqdm import tqdm
import time

os.environ["HF_TOKEN"] = "removed"

TRANSCRIPTS_PATH = "transcripts"
OUTPUT_PATH = "summaries_russian_llm"
os.makedirs(OUTPUT_PATH, exist_ok=True)

MODELS = {
    "mistral": "mistralai/Mistral-7B-Instruct-v0.2",
    "qwen": "Qwen/Qwen1.5-7B-Chat",
    "llama": "meta-llama/Llama-2-7b-chat-hf",
}

PROMPT_TEMPLATES = {
    "mistral": "Сделай краткое и информативное резюме следующего текста собрания на русском языке, выдели принятые решения и action-items.:\n\n{}",
    "llama": "Сделай краткое и информативное резюме следующего текста собрания на русском языке, выдели принятые решения и action-items.:\n\n{}",
    "qwen": "Сделай краткое и информативное резюме следующего текста собрания на русском языке, выдели принятые решения и action-items.:\n\n{}",
}

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

CHUNK_TOKENS = 2048
GENERATE_TOKENS = 512

def chunk_text(text, tokenizer, max_tokens):
    """Splits Russian text into token-length chunks at sentence boundaries."""
    import nltk
    nltk.download('punkt', quiet=True)
    from nltk.tokenize import sent_tokenize

    sentences = sent_tokenize(text, language='russian')
    chunks = []
    current_chunk = ""
    for sent in sentences:
        if len(tokenizer.encode(current_chunk + " " + sent)) < max_tokens:
            current_chunk += " " + sent
        else:
            if current_chunk: chunks.append(current_chunk.strip())
            current_chunk = sent
    if current_chunk:
        chunks.append(current_chunk.strip())
    return chunks

def generate_summary(
    model, tokenizer, prompt, device, chunk_tokens=2048, gen_tokens=512
):
    tokenizer.pad_token = tokenizer.eos_token
    inputs = tokenizer(
        prompt,
        return_tensors='pt',
        truncation=True,
        max_length=chunk_tokens,
        padding="max_length"
    )
    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)

    # print("Model device:", next(model.parameters()).device)
    # print("input_ids:", input_ids.device)
    # print("attention_mask:", attention_mask.device)
    
    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=chunk_tokens+gen_tokens,
            temperature=0.3,
            top_p=0.95,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    result = tokenizer.decode(
        output_ids[0][input_ids.shape[-1]:],
        skip_special_tokens=True
    )
    return result.strip()


timing_report = []
for model_key, model_name in MODELS.items():
    print(f"> Loading {model_key} ({model_name}) ...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map=None,
        trust_remote_code=True,        # for Qwen especially
    )
    model = model.to(DEVICE)
    model.eval()
    prompt_template = PROMPT_TEMPLATES[model_key]
    for fname in os.listdir(TRANSCRIPTS_PATH):
        if not fname.endswith('.txt'):
            continue
        transcript_path = os.path.join(TRANSCRIPTS_PATH, fname)
        outname = os.path.join(OUTPUT_PATH, f"{fname}.{model_key}.summary.txt")
        if os.path.exists(outname):
            print(f"Skip {outname}, exists.")
            continue
        with open(transcript_path, "r", encoding="utf-8") as fin:
            text = fin.read()
        chunks = chunk_text(text, tokenizer, CHUNK_TOKENS)
        summaries = []
        t0 = time.time()
        for chunk in tqdm(chunks, desc=f"{fname} ({model_key})"):
            prompt = prompt_template.format(chunk)
            summary = generate_summary(model, tokenizer, prompt, DEVICE)
            summaries.append(summary)
        final_summary = "\n".join(summaries)
        elapsed = time.time() - t0
        with open(outname, "w", encoding="utf-8") as fout:
            fout.write(final_summary)
        print(f">>> {fname} ({model_key}) done in {elapsed:.1f} sec ({len(chunks)} chunks)")
        timing_report.append({'filename': fname, 'model': model_key, 'seconds': elapsed})

import csv
with open(os.path.join(OUTPUT_PATH, 'timing_report.csv'), "w", encoding="utf-8", newline='') as f_csv:
    writer = csv.DictWriter(f_csv, fieldnames=['filename', 'model', 'seconds'])
    writer.writeheader()
    writer.writerows(timing_report)
print("Timing report written:", os.path.join(OUTPUT_PATH, 'timing_report.csv'))


> Loading mistral (mistralai/Mistral-7B-Instruct-v0.2) ...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

recognized_small_20m20s.txt (mistral):   0%|                                                                                    | 0/27 [00:00<?, ?it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):   4%|██▊                                                                         | 1/27 [00:35<15:21, 35.45s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):   7%|█████▋                                                                      | 2/27 [01:22<17:41, 42.47s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  11%|████████▍                                                                   | 3/27 [01:58<15:47, 39.47s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  15%|███████████▎                                                                | 4/27 [02:37<15:03, 39.30s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  19%|██████████████                                                              | 5/27 [03:18<14:34, 39.77s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  22%|████████████████▉                                                           | 6/27 [03:58<13:56, 39.83s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  26%|███████████████████▋                                                        | 7/27 [04:46<14:09, 42.50s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  30%|██████████████████████▌                                                     | 8/27 [05:32<13:47, 43.55s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  33%|█████████████████████████▎                                                  | 9/27 [06:14<12:56, 43.15s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  37%|███████████████████████████▊                                               | 10/27 [06:50<11:37, 41.05s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  41%|██████████████████████████████▌                                            | 11/27 [07:30<10:48, 40.54s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  44%|█████████████████████████████████▎                                         | 12/27 [08:09<10:03, 40.22s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  48%|████████████████████████████████████                                       | 13/27 [08:30<08:00, 34.35s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  52%|██████████████████████████████████████▉                                    | 14/27 [09:14<08:04, 37.25s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  56%|█████████████████████████████████████████▋                                 | 15/27 [10:01<08:01, 40.13s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  59%|████████████████████████████████████████████▍                              | 16/27 [10:40<07:19, 40.00s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  63%|███████████████████████████████████████████████▏                           | 17/27 [11:19<06:34, 39.50s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  67%|██████████████████████████████████████████████████                         | 18/27 [11:57<05:51, 39.03s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  70%|████████████████████████████████████████████████████▊                      | 19/27 [12:35<05:10, 38.85s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  74%|███████████████████████████████████████████████████████▌                   | 20/27 [13:15<04:34, 39.25s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  78%|██████████████████████████████████████████████████████████▎                | 21/27 [13:54<03:53, 38.97s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  81%|█████████████████████████████████████████████████████████████              | 22/27 [14:33<03:15, 39.10s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  85%|███████████████████████████████████████████████████████████████▉           | 23/27 [15:10<02:33, 38.46s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  89%|██████████████████████████████████████████████████████████████████▋        | 24/27 [15:46<01:53, 37.69s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  93%|█████████████████████████████████████████████████████████████████████▍     | 25/27 [16:23<01:15, 37.67s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral):  96%|████████████████████████████████████████████████████████████████████████▏  | 26/27 [17:02<00:37, 37.97s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (mistral): 100%|███████████████████████████████████████████████████████████████████████████| 27/27 [17:26<00:00, 38.75s/it]


>>> recognized_small_20m20s.txt (mistral) done in 1046.3 sec (27 chunks)


recognized_medium_24m15s.txt (mistral):   0%|                                                                                   | 0/23 [00:00<?, ?it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):   4%|███▎                                                                       | 1/23 [00:13<04:58, 13.57s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):   9%|██████▌                                                                    | 2/23 [00:54<10:21, 29.60s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  13%|█████████▊                                                                 | 3/23 [01:34<11:24, 34.25s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  17%|█████████████                                                              | 4/23 [02:10<11:09, 35.23s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  22%|████████████████▎                                                          | 5/23 [03:00<12:05, 40.32s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  26%|███████████████████▌                                                       | 6/23 [03:49<12:15, 43.27s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  30%|██████████████████████▊                                                    | 7/23 [04:33<11:39, 43.74s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  35%|██████████████████████████                                                 | 8/23 [05:16<10:48, 43.24s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  39%|█████████████████████████████▎                                             | 9/23 [05:53<09:40, 41.47s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  43%|████████████████████████████████▏                                         | 10/23 [06:30<08:42, 40.16s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  48%|███████████████████████████████████▍                                      | 11/23 [07:08<07:51, 39.32s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  52%|██████████████████████████████████████▌                                   | 12/23 [07:36<06:36, 36.06s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  57%|█████████████████████████████████████████▊                                | 13/23 [08:17<06:14, 37.49s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  61%|█████████████████████████████████████████████                             | 14/23 [08:56<05:41, 37.98s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  65%|████████████████████████████████████████████████▎                         | 15/23 [09:36<05:07, 38.45s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  70%|███████████████████████████████████████████████████▍                      | 16/23 [09:59<03:57, 33.97s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  74%|██████████████████████████████████████████████████████▋                   | 17/23 [10:38<03:32, 35.45s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  78%|█████████████████████████████████████████████████████████▉                | 18/23 [11:03<02:41, 32.31s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  83%|█████████████████████████████████████████████████████████████▏            | 19/23 [11:34<02:06, 31.72s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  87%|████████████████████████████████████████████████████████████████▎         | 20/23 [12:13<01:41, 33.85s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  91%|███████████████████████████████████████████████████████████████████▌      | 21/23 [12:51<01:10, 35.34s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral):  96%|██████████████████████████████████████████████████████████████████████▊   | 22/23 [13:30<00:36, 36.32s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (mistral): 100%|██████████████████████████████████████████████████████████████████████████| 23/23 [14:11<00:00, 37.01s/it]


>>> recognized_medium_24m15s.txt (mistral) done in 851.3 sec (23 chunks)


recognized_large_33m5s.txt (mistral):   0%|                                                                                     | 0/23 [00:00<?, ?it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):   4%|███▎                                                                         | 1/23 [00:38<14:12, 38.73s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):   9%|██████▋                                                                      | 2/23 [01:17<13:33, 38.72s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  13%|██████████                                                                   | 3/23 [01:55<12:50, 38.54s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  17%|█████████████▍                                                               | 4/23 [02:29<11:39, 36.82s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  22%|████████████████▋                                                            | 5/23 [03:08<11:16, 37.60s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  26%|████████████████████                                                         | 6/23 [03:48<10:48, 38.13s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  30%|███████████████████████▍                                                     | 7/23 [04:27<10:15, 38.46s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  35%|██████████████████████████▊                                                  | 8/23 [05:06<09:41, 38.74s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  39%|██████████████████████████████▏                                              | 9/23 [05:45<09:04, 38.88s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  43%|█████████████████████████████████                                           | 10/23 [06:25<08:27, 39.05s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  48%|████████████████████████████████████▎                                       | 11/23 [06:45<06:39, 33.32s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  52%|███████████████████████████████████████▋                                    | 12/23 [07:28<06:39, 36.29s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  57%|██████████████████████████████████████████▉                                 | 13/23 [08:09<06:15, 37.59s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  61%|██████████████████████████████████████████████▎                             | 14/23 [08:49<05:44, 38.29s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  65%|█████████████████████████████████████████████████▌                          | 15/23 [09:29<05:10, 38.81s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  70%|████████████████████████████████████████████████████▊                       | 16/23 [10:00<04:15, 36.55s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  74%|████████████████████████████████████████████████████████▏                   | 17/23 [10:39<03:44, 37.35s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  78%|███████████████████████████████████████████████████████████▍                | 18/23 [11:01<02:43, 32.77s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  83%|██████████████████████████████████████████████████████████████▊             | 19/23 [11:41<02:19, 34.81s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  87%|██████████████████████████████████████████████████████████████████          | 20/23 [12:20<01:48, 36.25s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  91%|█████████████████████████████████████████████████████████████████████▍      | 21/23 [12:59<01:13, 36.82s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral):  96%|████████████████████████████████████████████████████████████████████████▋   | 22/23 [13:38<00:37, 37.60s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (mistral): 100%|████████████████████████████████████████████████████████████████████████████| 23/23 [13:48<00:00, 36.04s/it]


>>> recognized_large_33m5s.txt (mistral) done in 828.8 sec (23 chunks)


recognized_tiny_14m19s.txt (mistral):   0%|                                                                                     | 0/42 [00:00<?, ?it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):   2%|█▊                                                                           | 1/42 [00:16<11:32, 16.90s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):   5%|███▋                                                                         | 2/42 [00:58<20:53, 31.34s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):   7%|█████▌                                                                       | 3/42 [01:39<23:21, 35.93s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  10%|███████▎                                                                     | 4/42 [02:19<23:48, 37.59s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  12%|█████████▏                                                                   | 5/42 [03:01<24:01, 38.95s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  14%|███████████                                                                  | 6/42 [03:42<23:44, 39.58s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  17%|████████████▊                                                                | 7/42 [04:20<22:58, 39.37s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  19%|██████████████▋                                                              | 8/42 [05:00<22:15, 39.27s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  21%|████████████████▌                                                            | 9/42 [05:39<21:32, 39.18s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  24%|██████████████████                                                          | 10/42 [06:18<20:56, 39.25s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  26%|███████████████████▉                                                        | 11/42 [06:57<20:17, 39.29s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  29%|█████████████████████▋                                                      | 12/42 [07:19<16:57, 33.90s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  31%|███████████████████████▌                                                    | 13/42 [07:58<17:09, 35.49s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  33%|█████████████████████████▎                                                  | 14/42 [08:38<17:15, 36.97s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  36%|███████████████████████████▏                                                | 15/42 [08:59<14:24, 32.03s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  38%|████████████████████████████▉                                               | 16/42 [09:43<15:24, 35.56s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  40%|██████████████████████████████▊                                             | 17/42 [10:24<15:29, 37.19s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  43%|████████████████████████████████▌                                           | 18/42 [10:52<13:45, 34.40s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  45%|██████████████████████████████████▍                                         | 19/42 [11:33<13:58, 36.46s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  48%|████████████████████████████████████▏                                       | 20/42 [12:18<14:18, 39.04s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  50%|██████████████████████████████████████                                      | 21/42 [13:09<14:54, 42.60s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  52%|███████████████████████████████████████▊                                    | 22/42 [13:27<11:43, 35.18s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  55%|█████████████████████████████████████████▌                                  | 23/42 [14:05<11:24, 36.05s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  57%|███████████████████████████████████████████▍                                | 24/42 [14:30<09:47, 32.65s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  60%|█████████████████████████████████████████████▏                              | 25/42 [14:55<08:40, 30.59s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  62%|███████████████████████████████████████████████                             | 26/42 [15:35<08:50, 33.16s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  64%|████████████████████████████████████████████████▊                           | 27/42 [16:11<08:32, 34.19s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  67%|██████████████████████████████████████████████████▋                         | 28/42 [16:51<08:22, 35.87s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  69%|████████████████████████████████████████████████████▍                       | 29/42 [17:28<07:51, 36.27s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  71%|██████████████████████████████████████████████████████▎                     | 30/42 [18:06<07:19, 36.67s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  74%|████████████████████████████████████████████████████████                    | 31/42 [18:47<06:59, 38.15s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  76%|█████████████████████████████████████████████████████████▉                  | 32/42 [19:27<06:26, 38.69s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  79%|███████████████████████████████████████████████████████████▋                | 33/42 [20:06<05:47, 38.56s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  81%|█████████████████████████████████████████████████████████████▌              | 34/42 [20:45<05:11, 38.98s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  83%|███████████████████████████████████████████████████████████████▎            | 35/42 [21:26<04:35, 39.32s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  86%|█████████████████████████████████████████████████████████████████▏          | 36/42 [22:05<03:56, 39.35s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  88%|██████████████████████████████████████████████████████████████████▉         | 37/42 [22:44<03:15, 39.16s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  90%|████████████████████████████████████████████████████████████████████▊       | 38/42 [23:23<02:36, 39.23s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  93%|██████████████████████████████████████████████████████████████████████▌     | 39/42 [23:51<01:47, 35.90s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  95%|████████████████████████████████████████████████████████████████████████▍   | 40/42 [24:29<01:12, 36.40s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral):  98%|██████████████████████████████████████████████████████████████████████████▏ | 41/42 [24:45<00:30, 30.44s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (mistral): 100%|████████████████████████████████████████████████████████████████████████████| 42/42 [24:52<00:00, 35.55s/it]


>>> recognized_tiny_14m19s.txt (mistral) done in 1493.0 sec (42 chunks)
> Loading qwen (Qwen/Qwen1.5-7B-Chat) ...


tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/31.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.99G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]

recognized_small_20m20s.txt (qwen):   0%|                                                                                       | 0/21 [00:00<?, ?it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):   5%|███▊                                                                           | 1/21 [00:00<00:17,  1.15it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  10%|███████▌                                                                       | 2/21 [00:01<00:17,  1.11it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  14%|███████████▎                                                                   | 3/21 [00:02<00:14,  1.24it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  19%|███████████████                                                                | 4/21 [00:03<00:16,  1.05it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  24%|██████████████████▊                                                            | 5/21 [00:32<02:57, 11.08s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  29%|██████████████████████▌                                                        | 6/21 [00:50<03:18, 13.25s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  33%|██████████████████████████▎                                                    | 7/21 [00:50<02:08,  9.14s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  38%|██████████████████████████████                                                 | 8/21 [00:51<01:24,  6.48s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  43%|█████████████████████████████████▊                                             | 9/21 [01:02<01:35,  7.97s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  48%|█████████████████████████████████████▏                                        | 10/21 [01:03<01:03,  5.76s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  52%|████████████████████████████████████████▊                                     | 11/21 [01:04<00:42,  4.27s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  57%|████████████████████████████████████████████▌                                 | 12/21 [01:05<00:29,  3.25s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  62%|████████████████████████████████████████████████▎                             | 13/21 [01:29<01:15,  9.43s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  67%|████████████████████████████████████████████████████                          | 14/21 [01:32<00:52,  7.47s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  71%|███████████████████████████████████████████████████████▋                      | 15/21 [01:43<00:52,  8.72s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  76%|███████████████████████████████████████████████████████████▍                  | 16/21 [01:45<00:32,  6.52s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  81%|███████████████████████████████████████████████████████████████▏              | 17/21 [02:00<00:36,  9.24s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  86%|██████████████████████████████████████████████████████████████████▊           | 18/21 [02:02<00:20,  6.95s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  90%|██████████████████████████████████████████████████████████████████████▌       | 19/21 [02:02<00:09,  4.99s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen):  95%|██████████████████████████████████████████████████████████████████████████▎   | 20/21 [02:03<00:03,  3.65s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_small_20m20s.txt (qwen): 100%|██████████████████████████████████████████████████████████████████████████████| 21/21 [02:04<00:00,  5.91s/it]


>>> recognized_small_20m20s.txt (qwen) done in 124.0 sec (21 chunks)


recognized_medium_24m15s.txt (qwen):   0%|                                                                                      | 0/18 [00:00<?, ?it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):   6%|████▎                                                                         | 1/18 [00:16<04:43, 16.68s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  11%|████████▋                                                                     | 2/18 [00:42<05:57, 22.34s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  17%|█████████████                                                                 | 3/18 [00:43<03:05, 12.40s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  22%|█████████████████▎                                                            | 4/18 [00:45<01:54,  8.18s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  28%|█████████████████████▋                                                        | 5/18 [00:58<02:11, 10.14s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  33%|██████████████████████████                                                    | 6/18 [01:17<02:36, 13.03s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  39%|██████████████████████████████▎                                               | 7/18 [01:34<02:37, 14.34s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  44%|██████████████████████████████████▋                                           | 8/18 [01:35<01:39,  9.93s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  50%|███████████████████████████████████████                                       | 9/18 [01:54<01:54, 12.77s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  56%|██████████████████████████████████████████▊                                  | 10/18 [01:56<01:16,  9.54s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  61%|███████████████████████████████████████████████                              | 11/18 [01:58<00:51,  7.36s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  67%|███████████████████████████████████████████████████▎                         | 12/18 [02:00<00:33,  5.62s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  72%|███████████████████████████████████████████████████████▌                     | 13/18 [02:29<01:03, 12.65s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  78%|███████████████████████████████████████████████████████████▉                 | 14/18 [02:29<00:36,  9.03s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  83%|████████████████████████████████████████████████████████████████▏            | 15/18 [02:33<00:22,  7.47s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  89%|████████████████████████████████████████████████████████████████████▍        | 16/18 [02:35<00:11,  5.74s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen):  94%|████████████████████████████████████████████████████████████████████████▋    | 17/18 [02:35<00:04,  4.15s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_medium_24m15s.txt (qwen): 100%|█████████████████████████████████████████████████████████████████████████████| 18/18 [02:37<00:00,  8.76s/it]


>>> recognized_medium_24m15s.txt (qwen) done in 157.6 sec (18 chunks)


recognized_large_33m5s.txt (qwen):   0%|                                                                                        | 0/18 [00:00<?, ?it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):   6%|████▍                                                                           | 1/18 [00:01<00:31,  1.83s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  11%|████████▉                                                                       | 2/18 [00:03<00:29,  1.86s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  17%|█████████████▎                                                                  | 3/18 [00:05<00:24,  1.63s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  22%|█████████████████▊                                                              | 4/18 [00:05<00:16,  1.16s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  28%|██████████████████████▏                                                         | 5/18 [00:08<00:24,  1.86s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  33%|██████████████████████████▋                                                     | 6/18 [00:09<00:18,  1.53s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  39%|███████████████████████████████                                                 | 7/18 [00:12<00:20,  1.85s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  44%|███████████████████████████████████▌                                            | 8/18 [00:13<00:17,  1.74s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  50%|████████████████████████████████████████                                        | 9/18 [00:14<00:13,  1.50s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  56%|███████████████████████████████████████████▉                                   | 10/18 [00:15<00:10,  1.36s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  61%|████████████████████████████████████████████████▎                              | 11/18 [00:16<00:08,  1.22s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  67%|████████████████████████████████████████████████████▋                          | 12/18 [00:18<00:09,  1.52s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  72%|█████████████████████████████████████████████████████████                      | 13/18 [00:19<00:07,  1.46s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  78%|█████████████████████████████████████████████████████████████▍                 | 14/18 [00:20<00:04,  1.20s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  83%|█████████████████████████████████████████████████████████████████▊             | 15/18 [00:23<00:04,  1.62s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  89%|██████████████████████████████████████████████████████████████████████▏        | 16/18 [00:23<00:02,  1.29s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen):  94%|██████████████████████████████████████████████████████████████████████████▌    | 17/18 [00:24<00:01,  1.12s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_large_33m5s.txt (qwen): 100%|███████████████████████████████████████████████████████████████████████████████| 18/18 [00:37<00:00,  2.06s/it]


>>> recognized_large_33m5s.txt (qwen) done in 37.1 sec (18 chunks)


recognized_tiny_14m19s.txt (qwen):   0%|                                                                                        | 0/35 [00:00<?, ?it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):   3%|██▎                                                                             | 1/35 [00:00<00:24,  1.40it/s]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):   6%|████▌                                                                           | 2/35 [00:03<00:58,  1.78s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):   9%|██████▊                                                                         | 3/35 [00:22<05:06,  9.57s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  11%|█████████▏                                                                      | 4/35 [00:23<03:18,  6.40s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  14%|███████████▍                                                                    | 5/35 [00:24<02:09,  4.31s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  17%|█████████████▋                                                                  | 6/35 [00:54<06:25, 13.30s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  20%|████████████████                                                                | 7/35 [00:55<04:16,  9.15s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  23%|██████████████████▎                                                             | 8/35 [00:56<02:56,  6.54s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  26%|████████████████████▌                                                           | 9/35 [01:25<05:49, 13.43s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  29%|██████████████████████▌                                                        | 10/35 [01:39<05:41, 13.65s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  31%|████████████████████████▊                                                      | 11/35 [01:49<04:59, 12.48s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  34%|███████████████████████████                                                    | 12/35 [01:49<03:23,  8.84s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  37%|█████████████████████████████▎                                                 | 13/35 [02:21<05:46, 15.73s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  40%|███████████████████████████████▌                                               | 14/35 [02:21<03:54, 11.15s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  43%|█████████████████████████████████▊                                             | 15/35 [02:52<05:43, 17.19s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  46%|████████████████████████████████████                                           | 16/35 [02:53<03:52, 12.25s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  49%|██████████████████████████████████████▎                                        | 17/35 [03:22<05:10, 17.27s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  51%|████████████████████████████████████████▋                                      | 18/35 [03:23<03:28, 12.27s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  54%|██████████████████████████████████████████▉                                    | 19/35 [03:29<02:48, 10.50s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  57%|█████████████████████████████████████████████▏                                 | 20/35 [03:30<01:54,  7.62s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  60%|███████████████████████████████████████████████▍                               | 21/35 [03:47<02:24, 10.34s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  63%|█████████████████████████████████████████████████▋                             | 22/35 [04:16<03:28, 16.06s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  66%|███████████████████████████████████████████████████▉                           | 23/35 [04:32<03:10, 15.91s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  69%|██████████████████████████████████████████████████████▏                        | 24/35 [04:32<02:04, 11.32s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  71%|████████████████████████████████████████████████████████▍                      | 25/35 [05:02<02:49, 16.90s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  74%|██████████████████████████████████████████████████████████▋                    | 26/35 [05:03<01:48, 12.01s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  77%|████████████████████████████████████████████████████████████▉                  | 27/35 [05:21<01:51, 13.89s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  80%|███████████████████████████████████████████████████████████████▏               | 28/35 [05:51<02:11, 18.73s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  83%|█████████████████████████████████████████████████████████████████▍             | 29/35 [05:52<01:19, 13.29s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  86%|███████████████████████████████████████████████████████████████████▋           | 30/35 [05:52<00:47,  9.47s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  89%|█████████████████████████████████████████████████████████████████████▉         | 31/35 [06:23<01:03, 15.76s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  91%|████████████████████████████████████████████████████████████████████████▏      | 32/35 [06:25<00:34, 11.62s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  94%|██████████████████████████████████████████████████████████████████████████▍    | 33/35 [06:25<00:16,  8.29s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen):  97%|████████████████████████████████████████████████████████████████████████████▋  | 34/35 [06:26<00:05,  5.97s/it]

Model device: cuda:0
input_ids: cuda:0
attention_mask: cuda:0


recognized_tiny_14m19s.txt (qwen): 100%|███████████████████████████████████████████████████████████████████████████████| 35/35 [06:26<00:00, 11.05s/it]


>>> recognized_tiny_14m19s.txt (qwen) done in 386.9 sec (35 chunks)
> Loading llama (meta-llama/Llama-2-7b-chat-hf) ...


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-chat-hf.
403 Client Error. (Request ID: Root=1-6816bc29-54285b27730255e442d2a8b7;61ed4734-8984-403f-b44c-5ef614a86b2b)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/resolve/main/config.json.
Access to model meta-llama/Llama-2-7b-chat-hf is restricted and you are not in the authorized list. Visit https://huggingface.co/meta-llama/Llama-2-7b-chat-hf to ask for access.

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen3-8B"

# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)

with open("transcripts/recognized_large_33m5s.txt", "r", encoding="utf-8") as f:
    text = f.read()


text = text[:5000]

# prepare the model input
prompt = "Сделай краткое и информативное резюме следующего текста собрания на русском языке, выдели принятые решения и action-items.:\n\n"
messages = [
    {"role": "user", "content": prompt + text}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=True # Switches between thinking and non-thinking modes. Default is True.
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

# conduct text completion
generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=2048 #32768
)
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() 

# parsing thinking content
try:
    # rindex finding 151668 (</think>)
    index = len(output_ids) - output_ids[::-1].index(151668)
except ValueError:
    index = 0

thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

print("thinking content:", thinking_content)
print("content:", content)


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

thinking content: <think>
Хорошо, мне нужно сделать краткое и информативное резюме собрания на русском языке, выделив принятые решения и action-items. Начну с того, что внимательно прочитаю текст, чтобы понять основные моменты.

Сначала текст говорит о важности искусственного интеллекта (ИИ) как ключевого направления для будущего продуктов. Важно отметить, что некоторые коллеги поднимают вопросы, связанные с внедрением ИИ, и нужно совместно их решать. Также упоминается возможное создание нового управления для ускорения внедрения ИИ. 

Далее, говорится о том, что ИИ может ускорить развитие, как это происходило в истории человечества через появление письменности. Примеры: чтение большого количества текстов и обучение на них. ИИ способен обрабатывать огромные объемы данных быстрее человека, что может привести к трансформации подходов к жизни и бизнесу. 

Важно подчеркнуть, что компания должна активно внедрять ИИ, чтобы не отставать от конкурентов. Также упоминается, что нужно сформировать

In [5]:
import torch
torch.cuda.empty_cache()


In [2]:
import os
import nltk
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from tqdm import tqdm

nltk.download('punkt', quiet=True)
from nltk.tokenize import sent_tokenize

model_name = "Qwen/Qwen3-8B"       # << DO NOT CHANGE THIS
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto",
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token

# INPUT_PATH = "transcripts/recognized_large_33m5s.txt"

INPUT_PATH = "summaries_russian_llm/qwen3_summary_summary.txt"
OUTPUT_PATH = "summaries_russian_llm/qwen3_summary_summary_global.txt"
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)

PROMPT_CHUNK = (
    "Сделай краткое и информативное резюме следующего текста собрания на русском языке, "
    "выдели принятые решения и action-items:\n\n"
)

PROMPT_GLOBAL = (
    "Вот несколько кратких резюме разных частей собрания. На их основе создай обобщённое, "
    "сжатое итоговое резюме всего собрания, выдели только основные решения и важнейшие action items:\n\n"
)

def chunk_text(text, tokenizer, max_tokens=2000):
    sentences = sent_tokenize(text, language='russian')
    chunks = []
    current = ""
    for sent in sentences:
        test = (current + " " + sent).strip()
        token_count = len(tokenizer.encode(test))
        if token_count < max_tokens:
            current = test
        else:
            if current:
                chunks.append(current)
            current = sent
    if current:
        chunks.append(current)
    return chunks

with open(INPUT_PATH, "r", encoding="utf-8") as f:
    text = f.read()

chunks = chunk_text(text, tokenizer, max_tokens=2000)
print(f"Chunked transcript into {len(chunks)} segments.")

summaries = []
for idx, chunk in enumerate(tqdm(chunks, desc="Summarizing chunks")):
    messages = [{"role": "user", "content": PROMPT_GLOBAL + chunk}]
    chat_text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        enable_thinking=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([chat_text], return_tensors="pt").to(model.device)
    with torch.no_grad():
        out_ids = model.generate(
            **model_inputs,
            max_new_tokens=2048,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
    summary_chunk = tokenizer.decode(
        out_ids[0][model_inputs.input_ids.shape[-1]:], skip_special_tokens=True
    ).strip()
    summaries.append(summary_chunk)

# ---- Second pass: summarize the summaries ----
global_summary_prompt = PROMPT_GLOBAL + "\n\n".join(summaries)
# messages = [{"role": "user", "content": global_summary_prompt}]
# chat_text = tokenizer.apply_chat_template(
#     messages,
#     tokenize=False,
#     add_generation_prompt=True
# )
# model_inputs = tokenizer([chat_text], return_tensors="pt").to(model.device)
# with torch.no_grad():
#     out_ids = model.generate(
#         **model_inputs,
#         max_new_tokens=700,
#         temperature=0.1,
#         do_sample=False,
#         pad_token_id=tokenizer.eos_token_id,
#         eos_token_id=tokenizer.eos_token_id
#     )
# final_summary = tokenizer.decode(
#     out_ids[0][model_inputs.input_ids.shape[-1]:], skip_special_tokens=True
# ).strip()

final_summary = global_summary_prompt

with open(OUTPUT_PATH, "w", encoding="utf-8") as fout:
    fout.write(final_summary)

print(f"\nFinal summary written to {OUTPUT_PATH}\n")


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Chunked transcript into 3 segments.


Summarizing chunks: 100%|█████████████████████████████████████████████████████████████████████| 3/3 [08:33<00:00, 171.03s/it]


Final summary written to summaries_russian_llm/qwen3_summary_summary_global.txt




