In [1]:
from pydub import AudioSegment
import speech_recognition as sr
from google.cloud import translate_v3 as translate
from google.cloud import texttospeech
from google.oauth2 import service_account
import os
import json


credentials_path = r'C:\Users\abhis\OneDrive\Desktop\realign voice cloning\tactile-vehicle-427513-g4-745aea4da0e0.json'
with open(credentials_path, 'r') as file:
    data = json.load(file)
    project_id = data['project_id']

credentials = service_account.Credentials.from_service_account_file(credentials_path)
translate_client = translate.TranslationServiceClient(credentials=credentials)
tts_client = texttospeech.TextToSpeechClient(credentials=credentials)


r = sr.Recognizer()

def segment_audio(audio_path, segment_length=10):
    
    audio = AudioSegment.from_file(audio_path)
    length = len(audio)
    return [audio[i:i + segment_length * 1000] for i in range(0, length, segment_length * 1000)]

def transcribe_audio(audio_path):
   
    with sr.AudioFile(audio_path) as source:
        audio_recorded = r.record(source)
        try:
            return r.recognize_google(audio_recorded)
        except sr.UnknownValueError:
            return "Google Speech Recognition could not understand audio."
        except sr.RequestError as e:
            return f"Could not request results from Google Speech Recognition service; {e}."

def translate_text(text, target_language="de"):
    
    if not text:
        return "No text to translate."
    parent = f"projects/{project_id}/locations/global"
    response = translate_client.translate_text(
        request={
            "parent": parent,
            "contents": [text],
            "mime_type": "text/plain",
            "source_language_code": "en",
            "target_language_code": target_language,
        }
    )
    return response.translations[0].translated_text

def text_to_speech(text, language_code='en-US', output_filename='output.mp3'):
    
    synthesis_input = texttospeech.SynthesisInput(text=text)
    voice = texttospeech.VoiceSelectionParams(
        language_code=language_code,
        ssml_gender=texttospeech.SsmlVoiceGender.MALE
    )
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )
    response = tts_client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config)
    with open(output_filename, 'wb') as audio_file:
        audio_file.write(response.audio_content)
        print(f'Audio content written to file "{output_filename}"')

def process_audio_file(audio_file):
    segments = segment_audio(audio_file)
    segment_translations = []
    
    for i, segment in enumerate(segments):
        segment_file = f"temp_segment_{i}.wav"
        segment.export(segment_file, format='wav')
        text = transcribe_audio(segment_file)
        translation = translate_text(text)
        tts_filename = f"segment_{i}_translation.mp3"
        text_to_speech(translation, 'de-DE', tts_filename)
        segment_translations.append(translation)
        os.remove(segment_file)
    
    full_text = transcribe_audio(audio_file)
    full_translation = translate_text(full_text)
    text_to_speech(full_translation, 'de-DE', 'full_translation.mp3')
    
    print("Segment Translations:")
    for i, translation in enumerate(segment_translations):
        print(f"Segment {i+1}: {translation}")
    
    print("\nFull Translation:")
    print(full_translation)

if __name__ == "__main__":
    audio_file_path = r'C:\Users\abhis\OneDrive\Desktop\realign voice cloning\Voicy_Answer your question for silence.wav'
    process_audio_file(audio_file_path)


Audio content written to file "segment_0_translation.mp3"
Audio content written to file "full_translation.mp3"
Segment Translations:
Segment 1: Ich werde deine Frage beantworten und dann für gesegnetes Schweigen zurückkehren

Full Translation:
Ich werde deine Frage beantworten und dann für gesegnetes Schweigen zurückkehren
