In [1]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from datasets import load_dataset


device = "cuda:2" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model_id = "openai/whisper-large-v3"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    max_new_tokens=128,
    chunk_length_s=30,
    batch_size=16,
    return_timestamps=True,
    torch_dtype=torch_dtype,
    device=device,
)


result = pipe("data/02_Južne_vesti/160121/160121.wav",
              generate_kwargs={"language": "serbian"})


# files_to_process = ['data/02_Južne_vesti_asr/161226/segments/T129__T130.wav', 'data/02_Južne_vesti_asr/161226/segments/T120__T121.wav', 'data/02_Južne_vesti_asr/161226/segments/T109__T110.wav', 'data/02_Južne_vesti_asr/161226/segments/T60__T61.wav', 'data/02_Južne_vesti_asr/161226/segments/T15__T16.wav', 'data/02_Južne_vesti_asr/161226/segments/T135__T136.wav', 'data/02_Južne_vesti_asr/161226/segments/T32__T33.wav', 'data/02_Južne_vesti_asr/161226/segments/T9__T10.wav', 'data/02_Južne_vesti_asr/161226/segments/T54__T55.wav', 'data/02_Južne_vesti_asr/161226/segments/T100__T101.wav', 'data/02_Južne_vesti_asr/161226/segments/T49__T50.wav', 'data/02_Južne_vesti_asr/161226/segments/T55__T56.wav', 'data/02_Južne_vesti_asr/161226/segments/T124__T125.wav', 'data/02_Južne_vesti_asr/161226/segments/T131__T132.wav', 'data/02_Južne_vesti_asr/161226/segments/T125__T126.wav', 'data/02_Južne_vesti_asr/161226/segments/T127__T128.wav', 'data/02_Južne_vesti_asr/161226/segments/T2__T0.wav', 'data/02_Južne_vesti_asr/161226/segments/T90__T91.wav', 'data/02_Južne_vesti_asr/161226/segments/T70__T71.wav', 'data/02_Južne_vesti_asr/161226/segments/T12__T13.wav', 'data/02_Južne_vesti_asr/161226/segments/T56__T57.wav', 'data/02_Južne_vesti_asr/161226/segments/T61__T62.wav', 'data/02_Južne_vesti_asr/161226/segments/T48__T49.wav', 'data/02_Južne_vesti_asr/161226/segments/T42__T43.wav', 'data/02_Južne_vesti_asr/161226/segments/T119__T120.wav', 'data/02_Južne_vesti_asr/161226/segments/T45__T46.wav', 'data/02_Južne_vesti_asr/161226/segments/T5__T6.wav', 'data/02_Južne_vesti_asr/161226/segments/T23__T24.wav', 'data/02_Južne_vesti_asr/161226/segments/T69__T70.wav', 'data/02_Južne_vesti_asr/161226/segments/T80__T81.wav', 'data/02_Južne_vesti_asr/161226/segments/T88__T89.wav', 'data/02_Južne_vesti_asr/161226/segments/T53__T54.wav', 'data/02_Južne_vesti_asr/161226/segments/T51__T52.wav', 'data/02_Južne_vesti_asr/161226/segments/T104__T105.wav', 'data/02_Južne_vesti_asr/161226/segments/T87__T88.wav', 'data/02_Južne_vesti_asr/161226/segments/T21__T22.wav', 'data/02_Južne_vesti_asr/161226/segments/T128__T129.wav', 'data/02_Južne_vesti_asr/161226/segments/T30__T31.wav', 'data/02_Južne_vesti_asr/161226/segments/T36__T37.wav', 'data/02_Južne_vesti_asr/161226/segments/T139__T140.wav', 'data/02_Južne_vesti_asr/161226/segments/T107__T108.wav', 'data/02_Južne_vesti_asr/161226/segments/T65__T66.wav', 'data/02_Južne_vesti_asr/161226/segments/T133__T134.wav', 'data/02_Južne_vesti_asr/161226/segments/T122__T123.wav', 'data/02_Južne_vesti_asr/161226/segments/T66__T67.wav', 'data/02_Južne_vesti_asr/161226/segments/T108__T109.wav', 'data/02_Južne_vesti_asr/161226/segments/T40__T41.wav', 'data/02_Južne_vesti_asr/161226/segments/T123__T124.wav', 'data/02_Južne_vesti_asr/161226/segments/T110__T111.wav', 'data/02_Južne_vesti_asr/161226/segments/T73__T74.wav', 'data/02_Južne_vesti_asr/161226/segments/T84__T85.wav', 'data/02_Južne_vesti_asr/161226/segments/T121__T122.wav', 'data/02_Južne_vesti_asr/161226/segments/T19__T20.wav', 'data/02_Južne_vesti_asr/161226/segments/T11__T12.wav', 'data/02_Južne_vesti_asr/161226/segments/T118__T119.wav', 'data/02_Južne_vesti_asr/161226/segments/T115__T116.wav', 'data/02_Južne_vesti_asr/161226/segments/T4__T5.wav', 'data/02_Južne_vesti_asr/161226/segments/T34__T35.wav', 'data/02_Južne_vesti_asr/161226/segments/T140__T1.wav', 'data/02_Južne_vesti_asr/161226/segments/T82__T83.wav', 'data/02_Južne_vesti_asr/161226/segments/T3__T4.wav', 'data/02_Južne_vesti_asr/161226/segments/T58__T59.wav', 'data/02_Južne_vesti_asr/161226/segments/T47__T48.wav', 'data/02_Južne_vesti_asr/161226/segments/T68__T69.wav', 'data/02_Južne_vesti_asr/161226/segments/T76__T77.wav', 'data/02_Južne_vesti_asr/161226/segments/T27__T28.wav', 'data/02_Južne_vesti_asr/161226/segments/T81__T82.wav', 'data/02_Južne_vesti_asr/161226/segments/T114__T115.wav', 'data/02_Južne_vesti_asr/161226/segments/T17__T18.wav', 'data/02_Južne_vesti_asr/161226/segments/T59__T60.wav', 'data/02_Južne_vesti_asr/161226/segments/T130__T131.wav', 'data/02_Južne_vesti_asr/161226/segments/T22__T23.wav', 'data/02_Južne_vesti_asr/161226/segments/T64__T65.wav', 'data/02_Južne_vesti_asr/161226/segments/T91__T92.wav', 'data/02_Južne_vesti_asr/161226/segments/T103__T104.wav', 'data/02_Južne_vesti_asr/161226/segments/T10__T11.wav', 'data/02_Južne_vesti_asr/161226/segments/T97__T98.wav', 'data/02_Južne_vesti_asr/161226/segments/T14__T15.wav', 'data/02_Južne_vesti_asr/161226/segments/T31__T32.wav', 'data/02_Južne_vesti_asr/161226/segments/T71__T72.wav', 'data/02_Južne_vesti_asr/161226/segments/T111__T112.wav', 'data/02_Južne_vesti_asr/161226/segments/T25__T26.wav', 'data/02_Južne_vesti_asr/161226/segments/T7__T8.wav', 'data/02_Južne_vesti_asr/161226/segments/T101__T102.wav', 'data/02_Južne_vesti_asr/161226/segments/T85__T86.wav', 'data/02_Južne_vesti_asr/161226/segments/T20__T21.wav', 'data/02_Južne_vesti_asr/161226/segments/T105__T106.wav', 'data/02_Južne_vesti_asr/161226/segments/T95__T96.wav', 'data/02_Južne_vesti_asr/161226/segments/T24__T25.wav', 'data/02_Južne_vesti_asr/161226/segments/T52__T53.wav', 'data/02_Južne_vesti_asr/161226/segments/T89__T90.wav', 'data/02_Južne_vesti_asr/161226/segments/T137__T138.wav', 'data/02_Južne_vesti_asr/161226/segments/T74__T75.wav', 'data/02_Južne_vesti_asr/161226/segments/T13__T14.wav', 'data/02_Južne_vesti_asr/161226/segments/T106__T107.wav', 'data/02_Južne_vesti_asr/161226/segments/T134__T135.wav', 'data/02_Južne_vesti_asr/161226/segments/T44__T45.wav', 'data/02_Južne_vesti_asr/161226/segments/T92__T93.wav', 'data/02_Južne_vesti_asr/161226/segments/T112__T113.wav', 'data/02_Južne_vesti_asr/161226/segments/T28__T29.wav', 'data/02_Južne_vesti_asr/161226/segments/T39__T40.wav', 'data/02_Južne_vesti_asr/161226/segments/T50__T51.wav', 'data/02_Južne_vesti_asr/161226/segments/T75__T76.wav', 'data/02_Južne_vesti_asr/161226/segments/T62__T63.wav', 'data/02_Južne_vesti_asr/161226/segments/T6__T7.wav', 'data/02_Južne_vesti_asr/161226/segments/T79__T80.wav', 'data/02_Južne_vesti_asr/161226/segments/T117__T118.wav', 'data/02_Južne_vesti_asr/161226/segments/T126__T127.wav', 'data/02_Južne_vesti_asr/161226/segments/T41__T42.wav', 'data/02_Južne_vesti_asr/161226/segments/T18__T19.wav', 'data/02_Južne_vesti_asr/161226/segments/T0__T3.wav', 'data/02_Južne_vesti_asr/161226/segments/T138__T139.wav', 'data/02_Južne_vesti_asr/161226/segments/T132__T133.wav', 'data/02_Južne_vesti_asr/161226/segments/T113__T114.wav', 'data/02_Južne_vesti_asr/161226/segments/T63__T64.wav', 'data/02_Južne_vesti_asr/161226/segments/T26__T27.wav', 'data/02_Južne_vesti_asr/161226/segments/T78__T79.wav', 'data/02_Južne_vesti_asr/161226/segments/T67__T68.wav', 'data/02_Južne_vesti_asr/161226/segments/T29__T30.wav', 'data/02_Južne_vesti_asr/161226/segments/T116__T117.wav', 'data/02_Južne_vesti_asr/161226/segments/T83__T84.wav', 'data/02_Južne_vesti_asr/161226/segments/T38__T39.wav', 'data/02_Južne_vesti_asr/161226/segments/T35__T36.wav', 'data/02_Južne_vesti_asr/161226/segments/T102__T103.wav', 'data/02_Južne_vesti_asr/161226/segments/T37__T38.wav', 'data/02_Južne_vesti_asr/161226/segments/T94__T95.wav', 'data/02_Južne_vesti_asr/161226/segments/T43__T44.wav', 'data/02_Južne_vesti_asr/161226/segments/T98__T99.wav', 'data/02_Južne_vesti_asr/161226/segments/T77__T78.wav', 'data/02_Južne_vesti_asr/161226/segments/T8__T9.wav', 'data/02_Južne_vesti_asr/161226/segments/T33__T34.wav', 'data/02_Južne_vesti_asr/161226/segments/T99__T100.wav', 'data/02_Južne_vesti_asr/161226/segments/T16__T17.wav', 'data/02_Južne_vesti_asr/161226/segments/T86__T87.wav', 'data/02_Južne_vesti_asr/161226/segments/T93__T94.wav', 'data/02_Južne_vesti_asr/161226/segments/T72__T73.wav', 'data/02_Južne_vesti_asr/161226/segments/T136__T137.wav', 'data/02_Južne_vesti_asr/161226/segments/T96__T97.wav', 'data/02_Južne_vesti_asr/161226/segments/T57__T58.wav', 'data/02_Južne_vesti_asr/161226/segments/T46__T47.wav']
# from datasets import Dataset, Audio
# from transformers.pipelines.pt_utils import KeyDataset

# ds = Dataset.from_dict({"audio": files_to_process}).cast_column("audio", Audio(sampling_rate=16000, mono=True))
# transcripts = []
# for file in files_to_process:
#     t = pipe(
#         file,
#         generate_kwargs={"language": "serbian"},
#     )
#     transcripts.append(t)

# result = pipe(
#     KeyDataset(ds, "audio"),
    
# )
# transcripts = [i.get("text") for i in result]
# # transcripts = [pipe(file, generate_kwargs={"language":"serbian"}).get("text") for file in files_to_process]
# import pandas as pd

# pd.DataFrame({"file": files_to_process, "transcript": transcripts})


  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
result

{'text': ' Muzika Nalazi se na listi opština gde su prosječna primanja najmanja, poslednjih godina je fabriku otvorio samo jedan investitor, a politička preletanja u ovoj opštini su aktuelna i danas. O ovome i ostalim dešavanjima u Aleksincu razgovaramo sa zamenikom predsednika občine Čedomirom Rakićem. Gospodine Rakiću, dobrodošli u naš studio. Volim vas našo. Evo, mandat vaše vladajuće koalicije u Aleksincu se polako bliže kraju, čekaju nas lokalni izbori u ovoj godini. U Aleksincu se u medijima nije mnogo čulo predhodnih godina. Šta je to urađeno u mandatu aktuelne vlasti, što je značajno za građane Aleksinica? Pa ja bih da dam neki kratak odgovor, odnosno uvod na vaš sam početak, da slažem se da je opština Aleksinac jedna, da je u grupu nerezvijenih opština i u četvrte grupi razvijenosti, da prosečna primanja su takva kakva i jesu, a da ima više privrednih subjekata koji su otvarali svoje pogone ili koji su se širili, o kome kao što ste rekli nije se mnogo čulo i nije se pričalo, a