In [None]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime

# Switch to show recording status
show = 0

if not show:
    warnings.filterwarnings("ignore")

# Initialize Whisper model (choose a smaller model like "base" or "small" for faster performance)
model = whisper.load_model("small")

# Initialize MarianMT model and tokenizer for offline translation
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# Parameters for audio recording
samplerate = 16000
energy_threshold = 1000  # Adjust this threshold based on your environment
silence_duration = 0.5  # Duration (in seconds) of silence to stop recording

# Queue to hold audio files to be processed
audio_queue = queue.Queue()

# Path for the combined script file, name file with current_time saved when start listening
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# Variable to store the last processed English text
last_processed_text = None

# Function to record audio until silence is detected
def record_audio_until_silence(filename, samplerate=16000):
    if show:
        print("Recording...")
    audio_data = []
    silence_counter = 0
    
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while True:
            frame = stream.read(int(samplerate * 0.1))[0]  # Read 0.1-second chunks
            audio_data.append(frame)
            
            # Calculate the energy of the current frame
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0

            # If silence has been detected for a sufficient duration, stop recording
            if silence_counter >= silence_duration:
                break
    
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    if show:
        print("Recording complete!")
    audio_queue.put(filename)  # Add the recorded audio to the processing queue

# Function to translate English text to Chinese using the MarianMT model
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Function to process audio (transcribe and translate)
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()  # Get the next audio file from the queue
        if filename:
            # Transcribe using Whisper
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            
            # Only process if the new text is different from the last processed text
            if english_text and english_text != last_processed_text:
                print("Recognized English:", english_text)
                last_processed_text = english_text  # Update the last processed text

                # Translate to Chinese
                chinese_text = translate_to_chinese(english_text)
                print("Translated Chinese:", chinese_text)

                # Write both English and Chinese to the combined script file
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write("English: " + english_text + "\n")
                    combined_file.write("Chinese: " + chinese_text + "\n")
                    combined_file.write("\n")  # Add a blank line for separation
        
        audio_queue.task_done()  # Mark the task as done

current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Recording started at", current_time)
# Start a thread for continuous audio processing
threading.Thread(target=process_audio, daemon=True).start()

# Continuously record audio until stopped
while True:
    # Record audio and save to a temporary file
    record_audio_until_silence("temp_audio.wav")


In [None]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime
from pydub import AudioSegment
import io

# Switch to show recording status
show = 0

if not show:
    warnings.filterwarnings("ignore")

# Initialize Whisper model (choose a smaller model like "base" or "small" for faster performance)
model = whisper.load_model("small")

# Initialize MarianMT model and tokenizer for offline translation
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# Parameters for audio recording
samplerate = 16000
energy_threshold = 1000  # Adjust this threshold based on your environment
silence_duration = 0.5  # Duration (in seconds) of silence to stop recording

# Queue to hold audio files to be processed
audio_queue = queue.Queue()

# Path for the combined script file, name file with current_time saved when start listening
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# Variable to store the last processed English text
last_processed_text = None

# Function to record audio until silence is detected
def record_audio_until_silence(filename, samplerate=16000):
    if show:
        print("Recording...")
    audio_data = []
    silence_counter = 0
    
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while True:
            frame = stream.read(int(samplerate * 0.1))[0]  # Read 0.1-second chunks
            audio_data.append(frame)
            
            # Calculate the energy of the current frame
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0

            # If silence has been detected for a sufficient duration, stop recording
            if silence_counter >= silence_duration:
                break
    
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    
    # Apply noise reduction using pydub
    if show:
        print("Applying noise reduction...")
    audio_segment = AudioSegment.from_wav(filename)
    
    # Reduce noise by applying a noise gate or filter
    reduced_noise_audio = audio_segment - 10  # Reduces volume, acting as a simple noise gate

    # Export the processed audio back to a file
    reduced_noise_audio.export(filename, format="wav")
    
    if show:
        print("Recording and noise reduction complete!")
    audio_queue.put(filename)  # Add the recorded audio to the processing queue

# Function to translate English text to Chinese using the MarianMT model
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Function to process audio (transcribe and translate)
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()  # Get the next audio file from the queue
        if filename:
            # Transcribe using Whisper
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            
            # Only process if the new text is different from the last processed text
            if english_text and english_text != last_processed_text:
                print("Recognized English:", english_text)
                last_processed_text = english_text  # Update the last processed text

                # Translate to Chinese
                chinese_text = translate_to_chinese(english_text)
                print("Translated Chinese:", chinese_text)

                # Write both English and Chinese to the combined script file
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write("English: " + english_text + "\n")
                    combined_file.write("Chinese: " + chinese_text + "\n")
                    combined_file.write("\n")  # Add a blank line for separation
        
        audio_queue.task_done()  # Mark the task as done

current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Recording started at", current_time)
# Start a thread for continuous audio processing
threading.Thread(target=process_audio, daemon=True).start()

# Continuously record audio until stopped
while True:
    # Record audio and save to a temporary file
    record_audio_until_silence("temp_audio.wav")


Recording started at 2024-11-13 23:54:45


KeyboardInterrupt: 

Recognized English: Hello hello hello
Translated Chinese: 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好 你好


In [6]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime
from pydub import AudioSegment
import io

# Switch to show recording status
show = 0

if not show:
    warnings.filterwarnings("ignore")

# Initialize Whisper model (choose a smaller model like "base" or "small" for faster performance)
model = whisper.load_model("small")

# Initialize MarianMT model and tokenizer for offline translation
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# Parameters for audio recording
samplerate = 16000
energy_threshold = 1000  # Adjust this threshold based on your environment
silence_duration = 0.5  # Duration (in seconds) of silence to stop recording

# Queue to hold audio files to be processed
audio_queue = queue.Queue()

# Path for the combined script file, name file with current_time saved when start listening
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# Variable to store the last processed English text
last_processed_text = None

# Function to record audio until silence is detected
def record_audio_until_silence(filename, samplerate=16000):
    if show:
        print("Recording...")
    audio_data = []
    silence_counter = 0
    
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while True:
            frame = stream.read(int(samplerate * 0.1))[0]  # Read 0.1-second chunks
            audio_data.append(frame)
            
            # Calculate the energy of the current frame
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0

            # If silence has been detected for a sufficient duration, stop recording
            if silence_counter >= silence_duration:
                break
    
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    
    # Apply noise reduction using pydub
    if show:
        print("Applying noise reduction...")
    audio_segment = AudioSegment.from_wav(filename)
    
    # Reduce noise by lowering the volume
    reduced_noise_audio = audio_segment - 10  # Reduces volume as a simple noise gate

    # Apply normalization
    if show:
        print("Normalizing audio...")
    normalized_audio = reduced_noise_audio.normalize()

    # Export the processed audio back to a file
    normalized_audio.export(filename, format="wav")
    
    if show:
        print("Recording, noise reduction, and normalization complete!")
    audio_queue.put(filename)  # Add the recorded audio to the processing queue

# Function to translate English text to Chinese using the MarianMT model
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Function to process audio (transcribe and translate)
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()  # Get the next audio file from the queue
        if filename:
            # Transcribe using Whisper
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            
            # Only process if the new text is different from the last processed text
            if english_text and english_text != last_processed_text:
                print("Recognized English:", english_text)
                last_processed_text = english_text  # Update the last processed text

                # Translate to Chinese
                chinese_text = translate_to_chinese(english_text)
                print("Translated Chinese:", chinese_text)

                # Write both English and Chinese to the combined script file
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write("English: " + english_text + "\n")
                    combined_file.write("Chinese: " + chinese_text + "\n")
                    combined_file.write("\n")  # Add a blank line for separation
        
        audio_queue.task_done()  # Mark the task as done

current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Recording started at", current_time)
# Start a thread for continuous audio processing
threading.Thread(target=process_audio, daemon=True).start()

# Continuously record audio until stopped
while True:
    # Record audio and save to a temporary file
    record_audio_until_silence("temp_audio.wav")


Recording started at 2024-11-13 23:57:34
Recognized English: Thanks for watching!
Translated Chinese: 感谢观看!
Recognized English: Thank you.
Translated Chinese: 谢谢
Recognized English: Get some of the highlights of what the Supreme Court has said. Look, in their verdict, they've made it very clear that the state is accountable for this kind of bulldozer justice. But have they also spoken about what needs to be done? What kind of a crackdown perhaps we could see or what the victims here of bulldozer justice will get?
Translated Chinese: 获取最高法院所言的一些要点。在他们的判决中,他们已经明确表示国家要为这种推土机的正义负责。但他们是否也谈到需要做些什么?我们或许能看到什么样的镇压或者推土机正义的受害者会得到什么?
Recognized English: Thank you.
Translated Chinese: 谢谢
Recognized English: ये सुख्षिताया बारहें हतो � happily will see a kind of guidelines which will be applicable for Paula concentration in the country
Translated Chinese: 很高兴能看到一种适用于 Paula 集中在这个国家的准则
Recognized English: be happening across the country as far as the demolitions are concerned. The Supreme Court has mad

KeyboardInterrupt: 

In [11]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime
from pydub import AudioSegment
import io

# Switch to show recording status
show = 0

if not show:
    warnings.filterwarnings("ignore")

# Initialize Whisper model (choose a smaller model like "base" or "small" for faster performance)
model = whisper.load_model("small")

# Initialize MarianMT model and tokenizer for offline translation
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# Parameters for audio recording
samplerate = 16000
energy_threshold = 1000  # Adjust this threshold based on your environment
silence_duration = 0.5  # Duration (in seconds) of silence to stop recording

# Queue to hold audio files to be processed
audio_queue = queue.Queue()

# Path for the combined script file, name file with current_time saved when start listening
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# Variable to store the last processed English text
last_processed_text = None

# Function to record audio until silence is detected
def record_audio_until_silence(filename, samplerate=16000):
    if show:
        print("Recording...")
    audio_data = []
    silence_counter = 0
    
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while True:
            frame = stream.read(int(samplerate * 0.1))[0]  # Read 0.1-second chunks
            audio_data.append(frame)
            
            # Calculate the energy of the current frame
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0

            # If silence has been detected for a sufficient duration, stop recording
            if silence_counter >= silence_duration:
                break
    
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    
    # Apply noise reduction using pydub
    if show:
        print("Applying noise reduction...")
    audio_segment = AudioSegment.from_wav(filename)
    reduced_noise_audio = audio_segment - 10  # Simple noise reduction

    # Apply normalization
    if show:
        print("Normalizing audio...")
    normalized_audio = reduced_noise_audio.normalize()

    # Apply high-pass filter
    if show:
        print("Applying high-pass filter...")
    filtered_audio = normalized_audio.high_pass_filter(300)  # 300 Hz cutoff frequency

    # Export the processed audio back to a file
    filtered_audio.export(filename, format="wav")
    
    if show:
        print("Recording, noise reduction, normalization, and high-pass filtering complete!")
    audio_queue.put(filename)  # Add the recorded audio to the processing queue

# Function to translate English text to Chinese using the MarianMT model
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Function to process audio (transcribe and translate)
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()  # Get the next audio file from the queue
        if filename:
            # Transcribe using Whisper
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            
            # Only process if the new text is different from the last processed text
            if english_text and english_text != last_processed_text:
                print("Recognized English:", english_text)
                last_processed_text = english_text  # Update the last processed text

                # Translate to Chinese
                chinese_text = translate_to_chinese(english_text)
                print("Translated Chinese:", chinese_text)

                # Write both English and Chinese to the combined script file
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write("English: " + english_text + "\n")
                    combined_file.write("Chinese: " + chinese_text + "\n")
                    combined_file.write("\n")  # Add a blank line for separation
        
        audio_queue.task_done()  # Mark the task as done

current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Recording started at", current_time)
# Start a thread for continuous audio processing
threading.Thread(target=process_audio, daemon=True).start()

# Continuously record audio until stopped
while True:
    # Record audio and save to a temporary file
    record_audio_until_silence("temp_audio.wav")


Recording started at 2024-11-14 00:04:49
Recognized English: Thank you.
Translated Chinese: 谢谢
Recognized English: Actually the authorities will have to clarify this through the, that what is the illegality involved, what is the illegal structure, why are they demolishing the entire property and not the illegal part, why they were involved.
Translated Chinese: 实际上,当局必须通过以下方式澄清这一点,即所涉的非法性、非法结构、为何拆毁整个财产而不是非法部分、为何涉及非法部分。
Recognized English: for all these years because the person gets convicted or gets involved in any kind of crime in that particular state the action comes
Translated Chinese: 这些年来,由于某人被定罪或卷入特定国家的任何犯罪,诉讼就发生在该州。
Recognized English: All these things need to be clarified to the authorities because now the guy is...
Translated Chinese: 所有这些事情都需要向当局澄清 因为现在这家伙...
Recognized English: will be in place and the authorities will have, you know, will be a question over as to why they have taken.
Translated Chinese: 将就位 当局将会有,你知道, 将是一个问题 关于为什么他们 采取了。
Recognized English: action against t

KeyboardInterrupt: 

In [16]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime
from pydub import AudioSegment
import io

# Switch to show recording status
show = 0

if not show:
    warnings.filterwarnings("ignore")

# Initialize Whisper model (choose a smaller model like "base" or "small" for faster performance)
model = whisper.load_model("small")

# Initialize MarianMT model and tokenizer for offline translation
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# Parameters for audio recording
samplerate = 16000
energy_threshold = 1000  # Adjust this threshold based on your environment
silence_duration = 0.5  # Duration (in seconds) of silence to stop recording

# Queue to hold audio files to be processed
audio_queue = queue.Queue()

# Path for the combined script file, name file with current_time saved when start listening
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# Variable to store the last processed English text
last_processed_text = None

# Function to record audio until silence is detected
def record_audio_until_silence(filename, samplerate=16000):
    if show:
        print("Recording...")
    audio_data = []
    silence_counter = 0
    
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while True:
            frame = stream.read(int(samplerate * 0.1))[0]  # Read 0.1-second chunks
            audio_data.append(frame)
            
            # Calculate the energy of the current frame
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0

            # If silence has been detected for a sufficient duration, stop recording
            if silence_counter >= silence_duration:
                break
    
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    
    # Apply noise reduction using pydub
    if show:
        print("Applying noise reduction...")
    audio_segment = AudioSegment.from_wav(filename)
    reduced_noise_audio = audio_segment - 10  # Simple noise reduction

    # Apply normalization
    if show:
        print("Normalizing audio...")
    normalized_audio = reduced_noise_audio.normalize()

    # Apply high-pass filter
    if show:
        print("Applying high-pass filter...")
    filtered_audio = normalized_audio.high_pass_filter(300)  # 300 Hz cutoff frequency

    # Apply silence and noise removal
    if show:
        print("Applying silence and noise removal...")
    clean_audio = filtered_audio.split_to_mono()[0].strip_silence()

    # Export the processed audio back to a file
    clean_audio.export(filename, format="wav")
    
    if show:
        print("Recording, noise reduction, normalization, high-pass filtering, and silence/noise removal complete!")
    audio_queue.put(filename)  # Add the recorded audio to the processing queue

# Function to translate English text to Chinese using the MarianMT model
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Function to process audio (transcribe and translate)
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()  # Get the next audio file from the queue
        if filename:
            # Transcribe using Whisper
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            
            # Only process if the new text is different from the last processed text
            if english_text and english_text != last_processed_text:
                print("Recognized English:", english_text)
                last_processed_text = english_text  # Update the last processed text

                # Translate to Chinese
                chinese_text = translate_to_chinese(english_text)
                print("Translated Chinese:", chinese_text)

                # Write both English and Chinese to the combined script file
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write("English: " + english_text + "\n")
                    combined_file.write("Chinese: " + chinese_text + "\n")
                    combined_file.write("\n")  # Add a blank line for separation
        
        audio_queue.task_done()  # Mark the task as done

current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Recording started at", current_time)
# Start a thread for continuous audio processing
threading.Thread(target=process_audio, daemon=True).start()

# Continuously record audio until stopped
while True:
    # Record audio and save to a temporary file
    record_audio_until_silence("temp_audio.wav")


Recording started at 2024-11-14 00:12:46
Recognized English: not even in the given
Translated Chinese: 即使是在给定的


KeyboardInterrupt: 

In [17]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime
from pydub import AudioSegment
import io

# Switch to show recording status
show = 0

if not show:
    warnings.filterwarnings("ignore")

# Initialize Whisper model (choose a smaller model like "base" or "small" for faster performance)
model = whisper.load_model("tiny")

# Initialize MarianMT model and tokenizer for offline translation
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# Parameters for audio recording
samplerate = 16000
energy_threshold = 1000  # Adjust this threshold based on your environment
silence_duration = 0.5  # Duration (in seconds) of silence to stop recording

# Queue to hold audio files to be processed
audio_queue = queue.Queue()

# Path for the combined script file, name file with current_time saved when start listening
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# Variable to store the last processed English text
last_processed_text = None

# Function to record audio until silence is detected
def record_audio_until_silence(filename, samplerate=16000):
    if show:
        print("Recording...")
    audio_data = []
    silence_counter = 0
    
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while True:
            frame = stream.read(int(samplerate * 0.1))[0]  # Read 0.1-second chunks
            audio_data.append(frame)
            
            # Calculate the energy of the current frame
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0

            # If silence has been detected for a sufficient duration, stop recording
            if silence_counter >= silence_duration:
                break
    
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    
    # Apply noise reduction using pydub
    if show:
        print("Applying noise reduction...")
    audio_segment = AudioSegment.from_wav(filename)
    reduced_noise_audio = audio_segment - 10  # Simple noise reduction

    # Apply normalization
    if show:
        print("Normalizing audio...")
    normalized_audio = reduced_noise_audio.normalize()

    # Apply high-pass filter
    if show:
        print("Applying high-pass filter...")
    filtered_audio = normalized_audio.high_pass_filter(300)  # 300 Hz cutoff frequency

    # Export the processed audio back to a file
    filtered_audio.export(filename, format="wav")
    
    if show:
        print("Recording, noise reduction, normalization, and high-pass filtering complete!")
    audio_queue.put(filename)  # Add the recorded audio to the processing queue

# Function to translate English text to Chinese using the MarianMT model
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Function to process audio (transcribe and translate)
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()  # Get the next audio file from the queue
        if filename:
            # Transcribe using Whisper
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            
            # Only process if the new text is different from the last processed text
            if english_text and english_text != last_processed_text:
                print("Recognized English:", english_text)
                last_processed_text = english_text  # Update the last processed text

                # Translate to Chinese
                chinese_text = translate_to_chinese(english_text)
                print("Translated Chinese:", chinese_text)

                # Write both English and Chinese to the combined script file
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write("English: " + english_text + "\n")
                    combined_file.write("Chinese: " + chinese_text + "\n")
                    combined_file.write("\n")  # Add a blank line for separation
        
        audio_queue.task_done()  # Mark the task as done

current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Recording started at", current_time)
# Start a thread for continuous audio processing
threading.Thread(target=process_audio, daemon=True).start()

# Continuously record audio until stopped
while True:
    # Record audio and save to a temporary file
    record_audio_until_silence("temp_audio.wav")


100%|█████████████████████████████████████| 72.1M/72.1M [00:39<00:00, 1.91MiB/s]


Recording started at 2024-11-14 00:15:30
Recognized English: In the past we have seen that house, the government has cracked down on this.
Translated Chinese: 过去我们见过那所房子,政府就此予以镇压。
Recognized English: The properties of those criminals and that is why such things were hard to issue strict guidelines to be alert to by all the state authorities
Translated Chinese: 这些罪犯的财产,正因为如此,这类事情很难发布严格的准则,让所有国家当局保持警惕。
Recognized English: Fashion League the the municipalities that currently has the crime with endlich.
Translated Chinese: 时尚联盟是目前犯罪的地方
Recognized English: How they need to take action like you already pointed out that steam and time.
Translated Chinese: 他们如何需要采取行动 像你已经指出的 蒸汽和时间。
Recognized English: you need to be able to do any individual before you know the demolition takes place to any any for any other household or something of that sort. So the demolition notice, we must be able to make sure that you know the person who is willing to you know the demolition is that part which is illegal

KeyboardInterrupt: 

In [18]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime
from pydub import AudioSegment
import io

# Switch to show recording status
show = 0

if not show:
    warnings.filterwarnings("ignore")

# Initialize Whisper model (choose a smaller model like "base" or "small" for faster performance)
model = whisper.load_model("tiny.en")

# Initialize MarianMT model and tokenizer for offline translation
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# Parameters for audio recording
samplerate = 16000
energy_threshold = 1000  # Adjust this threshold based on your environment
silence_duration = 0.5  # Duration (in seconds) of silence to stop recording

# Queue to hold audio files to be processed
audio_queue = queue.Queue()

# Path for the combined script file, name file with current_time saved when start listening
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# Variable to store the last processed English text
last_processed_text = None

# Function to record audio until silence is detected
def record_audio_until_silence(filename, samplerate=16000):
    if show:
        print("Recording...")
    audio_data = []
    silence_counter = 0
    
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while True:
            frame = stream.read(int(samplerate * 0.1))[0]  # Read 0.1-second chunks
            audio_data.append(frame)
            
            # Calculate the energy of the current frame
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0

            # If silence has been detected for a sufficient duration, stop recording
            if silence_counter >= silence_duration:
                break
    
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    
    # Apply noise reduction using pydub
    if show:
        print("Applying noise reduction...")
    audio_segment = AudioSegment.from_wav(filename)
    reduced_noise_audio = audio_segment - 10  # Simple noise reduction

    # Apply normalization
    if show:
        print("Normalizing audio...")
    normalized_audio = reduced_noise_audio.normalize()

    # Apply high-pass filter
    if show:
        print("Applying high-pass filter...")
    filtered_audio = normalized_audio.high_pass_filter(300)  # 300 Hz cutoff frequency

    # Export the processed audio back to a file
    filtered_audio.export(filename, format="wav")
    
    if show:
        print("Recording, noise reduction, normalization, and high-pass filtering complete!")
    audio_queue.put(filename)  # Add the recorded audio to the processing queue

# Function to translate English text to Chinese using the MarianMT model
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Function to process audio (transcribe and translate)
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()  # Get the next audio file from the queue
        if filename:
            # Transcribe using Whisper
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            
            # Only process if the new text is different from the last processed text
            if english_text and english_text != last_processed_text:
                print("Recognized English:", english_text)
                last_processed_text = english_text  # Update the last processed text

                # Translate to Chinese
                chinese_text = translate_to_chinese(english_text)
                print("Translated Chinese:", chinese_text)

                # Write both English and Chinese to the combined script file
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write("English: " + english_text + "\n")
                    combined_file.write("Chinese: " + chinese_text + "\n")
                    combined_file.write("\n")  # Add a blank line for separation
        
        audio_queue.task_done()  # Mark the task as done

current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Recording started at", current_time)
# Start a thread for continuous audio processing
threading.Thread(target=process_audio, daemon=True).start()

# Continuously record audio until stopped
while True:
    # Record audio and save to a temporary file
    record_audio_until_silence("temp_audio.wav")


100%|█████████████████████████████████████| 72.1M/72.1M [00:47<00:00, 1.61MiB/s]


Recording started at 2024-11-14 00:22:13
Recognized English: Yes, actually the guidance will actually be done.
Translated Chinese: 是的,实际上,指导将实际完成。
Recognized English: about that you know puts to have you know been you know has faced this bulldozer will have to be compensated but in the time of case to case basis now the what is the how this is completely demolished because of the small legality involved in that structure. So, everything needs to be you know measured accordingly that is not as you think would have not laid down anything specifically but yes at least to case basis if all it a person has has a previous problem in you know the how is complete.
Translated Chinese: 你知道,你知道,你知道, 面对这辆推土机 将不得不得到补偿, 但在案件审理时, 依据现在的情况, 如何完全拆除它, 因为这个结构涉及的 合法性很小。 所以,每件事都需要你知道 相应的量度, 这不是你认为没有 具体地规定什么,而是是,至少是 案例基础,如果一个人以前有问题, 你知道它是如何完成的。
Recognized English: Well, you should go to the minor one.
Translated Chinese: 好吧,你应该去 小一个。
Recognized English: That particular person can oppose the port again those 

KeyboardInterrupt: 

In [19]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime
from pydub import AudioSegment
from pydub.effects import normalize

# Switch to show recording status
show = 0

if not show:
    warnings.filterwarnings("ignore")

# Initialize Whisper model (choose a smaller model like "base" or "small" for faster performance)
model = whisper.load_model("small")

# Initialize MarianMT model and tokenizer for offline translation
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# Parameters for audio recording
samplerate = 16000
energy_threshold = 1000  # Adjust this threshold based on your environment
silence_duration = 0.5  # Duration (in seconds) of silence to stop recording

# Queue to hold audio files to be processed
audio_queue = queue.Queue()

# Path for the combined script file, name file with current_time saved when start listening
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# Variable to store the last processed English text
last_processed_text = None

# Function to record audio until silence is detected
def record_audio_until_silence(filename, samplerate=16000):
    if show:
        print("Recording...")
    audio_data = []
    silence_counter = 0
    
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while True:
            frame = stream.read(int(samplerate * 0.1))[0]  # Read 0.1-second chunks
            audio_data.append(frame)
            
            # Calculate the energy of the current frame
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0

            # If silence has been detected for a sufficient duration, stop recording
            if silence_counter >= silence_duration:
                break
    
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    
    # Apply noise reduction using pydub
    if show:
        print("Applying noise reduction...")
    audio_segment = AudioSegment.from_wav(filename)
    reduced_noise_audio = audio_segment - 10  # Simple noise reduction

    # Apply normalization
    if show:
        print("Normalizing audio...")
    normalized_audio = reduced_noise_audio.normalize()

    # Apply high-pass filter
    if show:
        print("Applying high-pass filter...")
    filtered_audio = normalized_audio.high_pass_filter(300)  # 300 Hz cutoff frequency

    # Export the processed audio back to a file
    filtered_audio.export(filename, format="wav")
    
    if show:
        print("Recording, noise reduction, normalization, and high-pass filtering complete!")
    audio_queue.put(filename)  # Add the recorded audio to the processing queue

# Function to translate English text to Chinese using the MarianMT model
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Function to process audio (transcribe and translate)
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()  # Get the next audio file from the queue
        if filename:
            # Transcribe using Whisper
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            
            # Only process if the new text is different from the last processed text
            if english_text and english_text != last_processed_text:
                print("Recognized English:", english_text)
                last_processed_text = english_text  # Update the last processed text

                # Translate to Chinese
                chinese_text = translate_to_chinese(english_text)
                print("Translated Chinese:", chinese_text)

                # Write both English and Chinese to the combined script file
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write("English: " + english_text + "\n")
                    combined_file.write("Chinese: " + chinese_text + "\n")
                    combined_file.write("\n")  # Add a blank line for separation
        
        audio_queue.task_done()  # Mark the task as done

current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Recording started at", current_time)
# Start a thread for continuous audio processing
threading.Thread(target=process_audio, daemon=True).start()

# Continuously record audio until stopped
while True:
    # Record audio and save to a temporary file
    record_audio_until_silence("temp_audio.wav")


Recording started at 2024-11-14 00:23:59
Recognized English: I'm Mr. Ganshan Tiwari, your view on this verdict that's coming from the Supreme Court saying very clearly that the state cannot go ahead and arbitrarily res…
Translated Chinese: 我是甘山提瓦里先生 你对最高法院的判决的看法 很清楚地说 国家不能任意...
Recognized English: Thank you.
Translated Chinese: 谢谢
Recognized English: Let me close the use of the Prime Minister of India, from India to the West side itself, on his victory to Vito. When the Prime Minister nodded, we are just a man, and he said that the nation should learn.
Translated Chinese: 请允许我结束印度总理从印度到西方的用武之道,他胜利给维托。 当总理点头时,我们只是一个男人,他说国家应该学习。
Recognized English: How good they were, how good they were.
Translated Chinese: 他们有多好,多好
Recognized English: The same that is in the Code of Bows, the same constitution to people like the time minister and the other staff who have been in power for the kids.
Translated Chinese: 同样的,在《鞠躬法》中, 同样的宪法对像时务大臣这样的人 和其他掌权的员工 都一样,他们一直为孩子们服务。
Recognized English: And please, 

KeyboardInterrupt: 

In [22]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime
from pydub import AudioSegment
from pydub.effects import normalize

# Switch to show recording status
show = 0

if not show:
    warnings.filterwarnings("ignore")

# Initialize Whisper model (choose a smaller model like "base" or "small" for faster performance)
model = whisper.load_model("small")

# Initialize MarianMT model and tokenizer for offline translation
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# Parameters for audio recording
samplerate = 16000
energy_threshold = 1000  # Adjust this threshold based on your environment
silence_duration = 0.5  # Duration (in seconds) of silence to stop recording

# Queue to hold audio files to be processed
audio_queue = queue.Queue()

# Path for the combined script file, name file with current_time saved when start listening
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# Variable to store the last processed English text
last_processed_text = None

# Function to record audio until silence is detected
def record_audio_until_silence(filename, samplerate=16000):
    if show:
        print("Recording...")
    audio_data = []
    silence_counter = 0
    
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while True:
            frame = stream.read(int(samplerate * 0.1))[0]  # Read 0.1-second chunks
            audio_data.append(frame)
            
            # Calculate the energy of the current frame
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0

            # If silence has been detected for a sufficient duration, stop recording
            if silence_counter >= silence_duration:
                break
    
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    
    # Apply noise reduction using pydub
    if show:
        print("Applying noise reduction...")
    audio_segment = AudioSegment.from_wav(filename)
    reduced_noise_audio = audio_segment - 10  # Simple noise reduction

    # Apply normalization
    if show:
        print("Normalizing audio...")
    normalized_audio = reduced_noise_audio.normalize()

    # Apply high-pass filter
    if show:
        print("Applying high-pass filter...")
    filtered_audio = normalized_audio.high_pass_filter(300)  # 300 Hz cutoff frequency

    # Export the processed audio back to a file
    filtered_audio.export(filename, format="wav")
    
    if show:
        print("Recording, noise reduction, normalization, and high-pass filtering complete!")
    audio_queue.put(filename)  # Add the recorded audio to the processing queue

# Function to translate English text to Chinese using the MarianMT model
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Function to process audio (transcribe and translate)
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()  # Get the next audio file from the queue
        if filename:
            # Transcribe using Whisper
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            
            # Only process if the new text is different from the last processed text
            if english_text and english_text != last_processed_text:
                print("Recognized English:", english_text)
                last_processed_text = english_text  # Update the last processed text

                # Translate to Chinese
                chinese_text = translate_to_chinese(english_text)
                print("Translated Chinese:", chinese_text)

                # Write both English and Chinese to the combined script file
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write("English: " + english_text + "\n")
                    combined_file.write("Chinese: " + chinese_text + "\n")
                    combined_file.write("\n")  # Add a blank line for separation
        
        audio_queue.task_done()  # Mark the task as done

current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Recording started at", current_time)
# Start a thread for continuous audio processing
threading.Thread(target=process_audio, daemon=True).start()

# Continuously record audio until stopped
while True:
    # Record audio and save to a temporary file
    record_audio_until_silence("temp_audio.wav")


Recording started at 2024-11-14 00:27:54
Recognized English: Thank you.
Translated Chinese: 谢谢
Recognized English: Now the yogi government as have several other state governments that have resorted to this bulldozer justice model always maintain that the property is an illegal structure.
Translated Chinese: 现在瑜伽政府和其他几个采用推土机司法模式的州政府一样,总是认为该财产是非法结构。
Recognized English: They can see what they want.
Translated Chinese: 他们可以看到他们想要什么。
Recognized English: We leave that in the...
Translated Chinese: 我们把那个留在...
Recognized English: The idea of cutting it or cutting it, they believe minorities are not legal entities in this country.
Translated Chinese: 他们认为少数民族不是这个国家的法律实体。
Recognized English: Dalits are not living entities in this country.
Translated Chinese: 达利特人不是生活在这个国家的实体。
Recognized English: They ran against the farmers, the biggest farmers, and were not legal entities.
Translated Chinese: 他们与农民竞争,而农民是最大的农民,他们不是法律实体。
Recognized English: The idea of the dictatorship is not the idea of India. 

KeyboardInterrupt: 

Recognized English: Thank you.
Translated Chinese: 谢谢


In [23]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime
from pydub import AudioSegment
from pydub.effects import normalize

# Switch to show recording status
show = 0

if not show:
    warnings.filterwarnings("ignore")

# Initialize Whisper model (choose a smaller model like "base" or "small" for faster performance)
model = whisper.load_model("tiny.en")

# Initialize MarianMT model and tokenizer for offline translation
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# Parameters for audio recording
samplerate = 16000
energy_threshold = 1000  # Adjust this threshold based on your environment
silence_duration = 0.5  # Duration (in seconds) of silence to stop recording

# Queue to hold audio files to be processed
audio_queue = queue.Queue()

# Path for the combined script file, name file with current_time saved when start listening
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# Variable to store the last processed English text
last_processed_text = None

# Function to record audio until silence is detected
def record_audio_until_silence(filename, samplerate=16000):
    if show:
        print("Recording...")
    audio_data = []
    silence_counter = 0
    
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while True:
            frame = stream.read(int(samplerate * 0.1))[0]  # Read 0.1-second chunks
            audio_data.append(frame)
            
            # Calculate the energy of the current frame
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0

            # If silence has been detected for a sufficient duration, stop recording
            if silence_counter >= silence_duration:
                break
    
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    
    # Apply noise reduction using pydub
    if show:
        print("Applying noise reduction...")
    audio_segment = AudioSegment.from_wav(filename)
    reduced_noise_audio = audio_segment - 10  # Simple noise reduction

    # Apply normalization
    if show:
        print("Normalizing audio...")
    normalized_audio = reduced_noise_audio.normalize()

    # Apply high-pass filter
    if show:
        print("Applying high-pass filter...")
    filtered_audio = normalized_audio.high_pass_filter(300)  # 300 Hz cutoff frequency

    # Export the processed audio back to a file
    filtered_audio.export(filename, format="wav")
    
    if show:
        print("Recording, noise reduction, normalization, and high-pass filtering complete!")
    audio_queue.put(filename)  # Add the recorded audio to the processing queue

# Function to translate English text to Chinese using the MarianMT model
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Function to process audio (transcribe and translate)
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()  # Get the next audio file from the queue
        if filename:
            # Transcribe using Whisper
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            
            # Only process if the new text is different from the last processed text
            if english_text and english_text != last_processed_text:
                print("Recognized English:", english_text)
                last_processed_text = english_text  # Update the last processed text

                # Translate to Chinese
                chinese_text = translate_to_chinese(english_text)
                print("Translated Chinese:", chinese_text)

                # Write both English and Chinese to the combined script file
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write("English: " + english_text + "\n")
                    combined_file.write("Chinese: " + chinese_text + "\n")
                    combined_file.write("\n")  # Add a blank line for separation
        
        audio_queue.task_done()  # Mark the task as done

current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Recording started at", current_time)
# Start a thread for continuous audio processing
threading.Thread(target=process_audio, daemon=True).start()

# Continuously record audio until stopped
while True:
    # Record audio and save to a temporary file
    record_audio_until_silence("temp_audio.wav")


Recording started at 2024-11-14 00:30:33
Recognized English: Put the flow in case there is anything wrong in the motor that is been issued by the executive. The code is also gone ahead and said that this amounts to collective punishment. When there is only one person who is accused of convicted of a crime and a house is demolished or the property is demolished, it only doesn't just affect him, it also affects his family members. So this amounts to collective punishment which cannot be allowed under the law, he being in mind that right to shelter is a fundamental right of all citizens. In fact, the Supreme Court has gone ahead to see that it is really sad to see that women and children are being thrown out of their properties through this arbitrary abuse of process and the abuse of laws that the executive has been doing. The Supreme Court also went ahead and said that the notice that has been put or served to our owner for the knowledge of their properties should be prominently displaye

KeyboardInterrupt: 

In [24]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime
from pydub import AudioSegment
from pydub.effects import normalize

# Switch to show recording status
show = 0

if not show:
    warnings.filterwarnings("ignore")

# Initialize Whisper model (choose a smaller model like "base" or "small" for faster performance)
model = whisper.load_model("tiny.en")

# Initialize MarianMT model and tokenizer for offline translation
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# Parameters for audio recording
samplerate = 16000
energy_threshold = 1000  # Adjust this threshold based on your environment
silence_duration = 0.5  # Duration (in seconds) of silence to stop recording

# Queue to hold audio files to be processed
audio_queue = queue.Queue()

# Path for the combined script file, name file with current_time saved when start listening
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# Variable to store the last processed English text
last_processed_text = None

# Function to apply dynamic range compression and gain control to enhance speech
def enhance_speech(filename):
    audio_segment = AudioSegment.from_wav(filename)

    # Apply dynamic range compression to reduce the difference between soft and loud sounds
    if show:
        print("Applying dynamic range compression...")
    compressed_audio = audio_segment.compress_dynamic_range(threshold=-20.0, ratio=4.0, attack=5.0, release=50.0)

    # Apply gain control to amplify speech if it's too quiet
    if show:
        print("Applying gain control...")
    amplified_audio = compressed_audio + 5  # Increase volume by 5 dB (you can adjust this)

    # Normalize the audio to ensure the overall loudness is balanced
    if show:
        print("Normalizing audio...")
    normalized_audio = amplified_audio.normalize()

    # Export the enhanced audio
    normalized_audio.export(filename, format="wav")
    if show:
        print("Speech enhancement (compression and gain control) complete!")

# Function to record audio until silence is detected
def record_audio_until_silence(filename, samplerate=16000):
    if show:
        print("Recording...")
    audio_data = []
    silence_counter = 0
    
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while True:
            frame = stream.read(int(samplerate * 0.1))[0]  # Read 0.1-second chunks
            audio_data.append(frame)
            
            # Calculate the energy of the current frame
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0

            # If silence has been detected for a sufficient duration, stop recording
            if silence_counter >= silence_duration:
                break
    
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    
    # Apply noise reduction using pydub
    if show:
        print("Applying noise reduction...")
    audio_segment = AudioSegment.from_wav(filename)
    reduced_noise_audio = audio_segment - 10  # Simple noise reduction

    # Apply normalization
    if show:
        print("Normalizing audio...")
    normalized_audio = reduced_noise_audio.normalize()

    # Apply high-pass filter
    if show:
        print("Applying high-pass filter...")
    filtered_audio = normalized_audio.high_pass_filter(300)  # 300 Hz cutoff frequency

    # Export the processed audio back to a file
    filtered_audio.export(filename, format="wav")

    # Apply speech enhancement (dynamic range compression and gain control)
    enhance_speech(filename)

    if show:
        print("Recording, noise reduction, normalization, high-pass filtering, and enhancement complete!")
    audio_queue.put(filename)  # Add the recorded audio to the processing queue

# Function to translate English text to Chinese using the MarianMT model
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Function to process audio (transcribe and translate)
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()  # Get the next audio file from the queue
        if filename:
            # Transcribe using Whisper
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            
            # Only process if the new text is different from the last processed text
            if english_text and english_text != last_processed_text:
                print("Recognized English:", english_text)
                last_processed_text = english_text  # Update the last processed text

                # Translate to Chinese
                chinese_text = translate_to_chinese(english_text)
                print("Translated Chinese:", chinese_text)

                # Write both English and Chinese to the combined script file
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write("English: " + english_text + "\n")
                    combined_file.write("Chinese: " + chinese_text + "\n")
                    combined_file.write("\n")  # Add a blank line for separation
        
        audio_queue.task_done()  # Mark the task as done

current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Recording started at", current_time)
# Start a thread for continuous audio processing
threading.Thread(target=process_audio, daemon=True).start()

# Continuously record audio until stopped
while True:
    # Record audio and save to a temporary file
    record_audio_until_silence("temp_audio.wav")


Recording started at 2024-11-14 00:46:51
Recognized English: Check it out.
Translated Chinese: 检查出来。
Recognized English: Let's bring in our own this broadcast now which is the party of the Congress for an interview.
Translated Chinese: 让我们把现在的这个节目 带进来,这是国会的党 接受采访。
Recognized English: I
Translated Chinese: 一一
Recognized English: What do you have to say about the Supreme Court's verdict making it very very clear that this bulldozer action is unacceptable and is a breach of fundamental rights.
Translated Chinese: 关于最高法院的判决,你有什么话要说? 说得很清楚,这种推土机行动是不可接受的,是对基本权利的侵犯。
Recognized English: .
Translated Chinese: . .
Recognized English: So, from what is the very good decision.
Translated Chinese: 所以,从什么是非常好的决定。
Recognized English: This cultural pipeline, to learn from area
Translated Chinese: 这条文化管道,从地区中学习
Recognized English: Cheers Guys ! I Ri
Translated Chinese: 干杯,伙计们!
Recognized English: Their
Translated Chinese: 它们的
Recognized English: And the package expenses enhancement to Lufan
Translated C

KeyboardInterrupt: 

In [31]:
import pyaudio
import wave
from pydub import AudioSegment
from pydub.effects import normalize, high_pass_filter

# 录音参数
FORMAT = pyaudio.paInt16  # 16位深度
CHANNELS = 1              # 单声道
RATE = 16000              # 采样率
CHUNK = 1024              # 每个数据块的大小
RECORD_SECONDS = 10       # 录音时间（可以自定义）
OUTPUT_FILENAME = "recorded_audio.wav"  # 输出文件名

def record_audio():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)
    
    print("录音开始，请说话...")
    frames = []

    try:
        while True:
            data = stream.read(CHUNK)
            frames.append(data)
    except KeyboardInterrupt:
        # 按 Ctrl+C 停止录音
        print("录音结束。")
    finally:
        stream.stop_stream()
        stream.close()
        p.terminate()

    # 保存录制的音频
    wf = wave.open(OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

    print(f"音频已保存为 {OUTPUT_FILENAME}")

def enhance_audio(input_filename, output_filename):
    # 加载音频文件
    audio = AudioSegment.from_file(input_filename, format="wav")

    # 增加音量
    louder_audio = audio + 10  # 增加10 dB音量

    # 归一化音频
    normalized_audio = normalize(louder_audio)

    # 高通滤波去除低频噪音
    filtered_audio = high_pass_filter(normalized_audio, cutoff=300)  # 300 Hz截止频率

    # 去除静音部分
    silence_threshold = -40  # 静音阈值，单位为 dBFS
    chunk_length_ms = 10  # 检测静音的时间窗口，单位为毫秒

    trimmed_audio = filtered_audio.strip_silence(silence_thresh=silence_threshold, silence_len=chunk_length_ms)

    # 导出处理后的音频
    trimmed_audio.export(output_filename, format="wav")
    print(f"处理后的音频已保存为 {output_filename}")

if __name__ == "__main__":
    # 录音
    record_audio()

    # 处理录制的音频
    enhance_audio(OUTPUT_FILENAME, "enhanced_audio.wav")


录音开始，请说话...
录音结束。
音频已保存为 recorded_audio.wav


InvalidDuration: padding cannot be longer than silence_len

In [29]:
enhance_audio("recorded_audio.wav", "enhanced_audio.wav")

TypeError: strip_silence() got an unexpected keyword argument 'silence_chunk_len'

In [33]:
import sounddevice as sd
import numpy as np
import wavio
import datetime

# 录音参数
RATE = 16000          # 采样率
CHUNK = 1024          # 每个数据块大小
SILENCE_THRESHOLD = 0.01  # 静音检测阈值
MIN_SILENCE_LEN = 5   # 静音最小持续时间（秒）

def detect_silence(data_chunk, threshold):
    """检测音频数据是否为静音"""
    volume = np.sqrt(np.mean(data_chunk**2))  # 计算RMS音量
    return volume < threshold

def record_and_split():
    recording = []
    silent_chunks = 0
    part_number = 1

    def callback(indata, frames, time, status):
        nonlocal recording, silent_chunks, part_number

        if status:
            print(status)

        # 检测是否为静音
        is_silent = detect_silence(indata, SILENCE_THRESHOLD)

        if is_silent:
            silent_chunks += 1
        else:
            silent_chunks = 0

        # 记录音频数据
        recording.extend(indata.copy())

        # 如果静音持续超过MIN_SILENCE_LEN秒，保存当前录音
        if silent_chunks > (RATE / CHUNK * MIN_SILENCE_LEN):
            if len(recording) > 0:
                filename = f"recording_part_{part_number}.wav"
                wavio.write(filename, np.array(recording), RATE, sampwidth=2)
                print(f"保存音频片段: {filename}")
                part_number += 1
                recording = []
                silent_chunks = 0

    # 开始录音
    with sd.InputStream(callback=callback, channels=1, samplerate=RATE, blocksize=CHUNK):
        print("正在录音...按 Ctrl+C 停止")
        try:
            while True:
                pass  # 主循环保持运行
        except KeyboardInterrupt:
            print("录音结束")

if __name__ == "__main__":
    record_and_split()


正在录音...按 Ctrl+C 停止
保存音频片段: recording_part_1.wav
保存音频片段: recording_part_2.wav
保存音频片段: recording_part_3.wav
保存音频片段: recording_part_4.wav
保存音频片段: recording_part_5.wav
保存音频片段: recording_part_6.wav
保存音频片段: recording_part_7.wav
保存音频片段: recording_part_8.wav
录音结束


In [8]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime

# Suppress warnings
warnings.filterwarnings("ignore")

# Configuration
SAMPLERATE = 16000
ENERGY_THRESHOLD = 1000  # Threshold for detecting silence
SILENCE_DURATION = 0.5  # Duration of silence to stop recording
SHOW_STATUS = 0  # Set to 1 to display recording/transcription progress
COMBINED_SCRIPT_PATH = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"


class AudioRecorder:
    def __init__(self, samplerate=SAMPLERATE, energy_threshold=ENERGY_THRESHOLD, silence_duration=SILENCE_DURATION):
        self.samplerate = samplerate
        self.energy_threshold = energy_threshold
        self.silence_duration = silence_duration

    def record_until_silence(self, filename):
        if SHOW_STATUS:
            print("Recording...")
        audio_data = []
        silence_counter = 0

        with sd.InputStream(samplerate=self.samplerate, channels=1, dtype='int16') as stream:
            while True:
                frame = stream.read(int(self.samplerate * 0.1))[0]  # Read 0.1-second chunks
                audio_data.append(frame)

                # Calculate the energy of the current frame
                if np.max(np.abs(frame)) < self.energy_threshold:
                    silence_counter += 0.1
                else:
                    silence_counter = 0

                # If silence is detected for a sufficient duration, stop recording
                if silence_counter >= self.silence_duration:
                    break

        audio_data = np.concatenate(audio_data, axis=0)
        wavio.write(filename, audio_data, self.samplerate, sampwidth=2)
        if SHOW_STATUS:
            print("Recording complete!")
        return filename


class WhisperTranscriber:
    def __init__(self, model_size="small"):
        self.model = whisper.load_model(model_size)

    def transcribe(self, audio_file):
        result = self.model.transcribe(audio_file, fp16=False)
        text = result["text"].strip()
        # if SHOW_STATUS:
        # if text and text != last_processed_text:
        if text:
            print("Recognized English:", text)
        return text


class MarianTranslator:
    def __init__(self, model_name='Helsinki-NLP/opus-mt-en-zh'):
        self.tokenizer = MarianTokenizer.from_pretrained(model_name)
        self.model = MarianMTModel.from_pretrained(model_name)

    def translate(self, text):
        inputs = self.tokenizer([text], return_tensors="pt", padding=True)
        translated = self.model.generate(**inputs)
        translated_text = self.tokenizer.decode(translated[0], skip_special_tokens=True)
        # if SHOW_STATUS:
        print("Translated Chinese:", translated_text)
        return translated_text


class AudioProcessor:
    def __init__(self, recorder, transcriber, translator, script_path):
        self.recorder = recorder
        self.transcriber = transcriber
        self.translator = translator
        self.script_path = script_path
        self.audio_queue = queue.Queue()
        self.last_processed_text = None

    def process_audio(self):
        while True:
            filename = self.audio_queue.get()  # Get the next audio file from the queue
            if filename:
                # Transcribe the audio
                english_text = self.transcriber.transcribe(filename)

                # Process only if the text is different from the last processed
                if english_text and english_text != self.last_processed_text:
                    self.last_processed_text = english_text

                    # Translate to Chinese
                    chinese_text = self.translator.translate(english_text)

                    # Write to script file
                    with open(self.script_path, "a") as file:
                        file.write("English: " + english_text + "\n")
                        file.write("Chinese: " + chinese_text + "\n")
                        file.write("\n")  # Add a blank line for separation

            self.audio_queue.task_done()  # Mark the task as done

    def add_audio_to_queue(self, filename):
        self.audio_queue.put(filename)


# Main Program
if __name__ == "__main__":
    # Initialize components
    recorder = AudioRecorder()
    transcriber = WhisperTranscriber()
    translator = MarianTranslator()
    processor = AudioProcessor(recorder, transcriber, translator, COMBINED_SCRIPT_PATH)

    # Start the processing thread
    threading.Thread(target=processor.process_audio, daemon=True).start()

    print("Recording started. Press Ctrl+C to stop.")
    while True:
        try:
            # Record audio and add it to the processing queue
            temp_filename = "temp_audio.wav"
            recorded_file = recorder.record_until_silence(temp_filename)
            processor.add_audio_to_queue(recorded_file)
        except KeyboardInterrupt:
            print("\nStopping recording...")
            break


Recording started. Press Ctrl+C to stop.
Recognized English: Hello everybody.
Translated Chinese: 大家好,你们好
Recognized English: And again, welcome to Foundations to Algor-
Translated Chinese: 欢迎来到阿尔戈尔基金会
Recognized English: The last session of...
Translated Chinese: 最后一次会议...
Recognized English: That's what we're meant to-
Translated Chinese: 这就是我们想要...
Recognized English: Oh boy.
Translated Chinese: 哦,男孩。 呵呵,男孩。
Recognized English: ...
Translated Chinese: .

Stopping recording...


Recognized English: Four…
Translated Chinese: 四个... Four...


In [10]:
import warnings
import whisper
import sounddevice as sd
import numpy as np
import wavio
from transformers import MarianMTModel, MarianTokenizer
import threading
import queue
import datetime
import streamlit as st

# 忽略警告
warnings.filterwarnings("ignore")

# 初始化模型
model = whisper.load_model("small")
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# 参数
samplerate = 16000
energy_threshold = 1000
silence_duration = 0.5
audio_queue = queue.Queue()

# 保存转录和翻译的文本文件
combined_script_path = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt"

# 状态变量
is_recording = False
last_processed_text = None

# 音频录制函数
def record_audio_until_silence(filename, samplerate=16000):
    audio_data = []
    silence_counter = 0
    with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16') as stream:
        while is_recording:
            frame = stream.read(int(samplerate * 0.1))[0]
            audio_data.append(frame)
            if np.max(np.abs(frame)) < energy_threshold:
                silence_counter += 0.1
            else:
                silence_counter = 0
            if silence_counter >= silence_duration:
                break
    audio_data = np.concatenate(audio_data, axis=0)
    wavio.write(filename, audio_data, samplerate, sampwidth=2)
    audio_queue.put(filename)

# 翻译函数
def translate_to_chinese(text):
    inputs = tokenizer([text], return_tensors="pt", padding=True)
    translated = translation_model.generate(**inputs)
    return tokenizer.decode(translated[0], skip_special_tokens=True)

# 音频处理函数
def process_audio():
    global last_processed_text
    while True:
        filename = audio_queue.get()
        if filename:
            result = model.transcribe(filename, fp16=False)
            english_text = result["text"].strip()
            if english_text and english_text != last_processed_text:
                last_processed_text = english_text
                chinese_text = translate_to_chinese(english_text)
                with open(combined_script_path, "a") as combined_file:
                    combined_file.write(f"English: {english_text}\n")
                    combined_file.write(f"Chinese: {chinese_text}\n\n")
                st.session_state["transcription"] = english_text
                st.session_state["translation"] = chinese_text
        audio_queue.task_done()

# Streamlit 应用
st.title("音频录制与实时翻译")
st.write("点击下方按钮开始或停止录音，并查看实时的转录和翻译结果。")

# 状态初始化
if "transcription" not in st.session_state:
    st.session_state["transcription"] = ""
if "translation" not in st.session_state:
    st.session_state["translation"] = ""

# 按钮控制
if st.button("开始录音"):
    if not is_recording:
        is_recording = True
        st.write("录音中...")
        threading.Thread(target=record_audio_until_silence, args=("temp_audio.wav",), daemon=True).start()
        threading.Thread(target=process_audio, daemon=True).start()
else:
    is_recording = False
    st.write("录音已停止。")

# 显示结果
st.subheader("转录结果 (英文)")
st.write(st.session_state["transcription"])

st.subheader("翻译结果 (中文)")
st.write(st.session_state["translation"])


2024-11-20 08:51:15.148 
  command:

    streamlit run /Users/wangcan/.pyenv/versions/3.12.2/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]
2024-11-20 08:51:15.153 Session state does not function when running a script without `streamlit run`
