In [None]:
import os
import re
import requests, uuid
from faster_whisper import WhisperModel # type: ignore

In [None]:
model_size="large-v3"
model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
"""
Size	Parameters	English-only model	Multilingual model	Required VRAM	Relative speed
tiny	39 M	    tiny.en	            tiny	            ~1 GB	        ~32x
base	74 M	    base.en	            base	            ~1 GB	        ~16x
small	244 M	    small.en	        small	            ~2 GB	        ~6x
medium	769 M	    medium.en	        medium	            ~5 GB	        ~2x
large	1550 M	    N/A	                large	            ~10 GB	        1x
"""

In [None]:
dir="./"
file="xxx.mp4"
format="lrc" # lrc, srt
file_name=os.path.splitext(file)[0]
key = "xxx"
endpoint = "https://api.cognitive.microsofttranslator.com"
location = "xxx"
vad_filter=False
language=None
audio=dir+file

In [None]:
result = model.transcribe(audio, language=language, vad_filter=vad_filter,vad_parameters=dict(min_silence_duration_ms=500))[0]

In [None]:
def write(file,result,format):
    if format=="lrc":
        lrc_file=file+".lrc"
        with open(lrc_file,'w',encoding='utf-8') as lrcfile:
            for result in result:
                start=convert_time(result.start)[0]
                end=convert_time(result.end)[0]
                lrcfile.write(f"[{start}]{result.text}\n[{end}]\n")
                print(f"{start}")
    elif format=="srt":
        srt_file=file+".srt"
        with open(srt_file,'w',encoding='utf-8') as srtfile:
            for i,result in enumerate(result):
                start=convert_time(result.start)[1]
                end=convert_time(result.end)[1]
                srtfile.write(f"{i}\n{start} --> {end}\n{result.text}\n\n")
                print(f"{start}")
                
def convert_time(seconds):
    # 将秒数转换为毫秒
    milliseconds = int(seconds * 1000)
    
    hours = milliseconds // 3600000
    minutes = milliseconds // 60000
    seconds = (milliseconds % 60000) // 1000
    millis_1 = milliseconds % 100
    millis_2 = milliseconds % 1000
    
    # 格式化输出
    return f"{minutes:02d}:{seconds:02d}.{millis_1:02d}",f"{hours:02d}:{minutes:02d}:{seconds:02d},{millis_2:03d}"

write(f"{dir}{file_name}",result,format)

In [None]:
def translate(text):
    path = '/translate'
    constructed_url = endpoint + path

    params = {
        'api-version': '3.0',
        'from': 'ja',
        'to': ['zh']
    }

    headers = {
        'Ocp-Apim-Subscription-Key': key,
        # location required if you're using a multi-service or regional (not global) resource.
        'Ocp-Apim-Subscription-Region': location,
        'Content-type': 'application/json',
        'X-ClientTraceId': str(uuid.uuid4())
    }

    # You can pass more than one object in body.
    body = [{
        'text': "{0}".format(text)
    }]

    request = requests.post(constructed_url, params=params, headers=headers, json=body)
    response = request.json()[0].get('translations')[0].get('text')
    return response

def lrc_tran(lrc_file_path):
    new_lrc_file_path = lrc_file_path.replace('.lrc', ' - 翻译.lrc')
    with open(lrc_file_path, 'r', encoding='utf-8-sig') as lrc_file,\
        open(new_lrc_file_path, 'w', encoding='utf-8-sig') as new_lrc_file:
        lines = lrc_file.readlines()
        for i in range(0, len(lines), 2):
            time_start=re.match(r'\[(\d{2}:\d{2}\.\d{2})\]',lines[i])
            time_end=re.match(r'\[(\d{2}:\d{2}\.\d{2})\]',lines[i+1])
            text = lines[i][time_start.end():].strip()
            time_start = time_start.group(1)
            time_end = time_end.group(1)
            trans=translate(text)
            print(f"[{time_start}]{trans}\n")
            new_lrc_file.write(f"[{time_start}]{text}\n[{time_start}]{trans}\n")

def srt_tran(srt_file_path):
    new_srt_file_path = srt_file_path.replace('.srt', ' - 翻译.srt')
    with open(srt_file_path, 'r', encoding='utf-8-sig') as srt_file,\
        open(new_srt_file_path, 'w', encoding='utf-8-sig') as new_srt_file:
        lines = srt_file.readlines()
        for i in range(0, len(lines), 4):
            trans=translate(lines[i+2].strip())
            new_srt_file.write(f"{lines[i]}{lines[i+1]}{trans}{lines[i+3]}\n")
            print(f"{lines[i+1].strip().split(' --> ')[0]}")

def tran(file_name,format):
    if format=="lrc":
        lrc_tran(file_name+".lrc")
    elif format=="srt":
        srt_tran(file_name+".srt")

tran(f"{dir}{file_name}",format)