In [3]:
from pydub import AudioSegment
import speech_recognition as sr
from google.cloud import translate_v3 as translate
from google.oauth2 import service_account
import os
import json


credentials_path = r'C:\Users\abhis\OneDrive\Desktop\realign voice cloning\tactile-vehicle-427513-g4-745aea4da0e0.json'


with open(credentials_path, 'r') as file:
    data = json.load(file)
    project_id = data['project_id']


credentials = service_account.Credentials.from_service_account_file(credentials_path)


translate_client = translate.TranslationServiceClient(credentials=credentials)


r = sr.Recognizer()


def segment_audio(audio_path, segment_length=10):
    audio = AudioSegment.from_file(audio_path)
    length = len(audio)
    return [audio[i:i + segment_length * 1000] for i in range(0, length, segment_length * 1000)]


def transcribe_audio(audio_path):
    with sr.AudioFile(audio_path) as source:
        audio_recorded = r.record(source)
        try:
            return r.recognize_google(audio_recorded)
        except sr.UnknownValueError:
            return "Google Speech Recognition could not understand audio."
        except sr.RequestError as e:
            return f"Could not request results from Google Speech Recognition service; {e}."


def translate_text(text, target_language="de"):
    if not text:
        return "No text to translate."
    parent = f"projects/{project_id}/locations/global"
    response = translate_client.translate_text(
        request={
            "parent": parent,
            "contents": [text],
            "mime_type": "text/plain",
            "source_language_code": "en",
            "target_language_code": target_language,
        }
    )
    return response.translations[0].translated_text


def process_audio_file(audio_file):
    segments = segment_audio(audio_file)
    segment_translations = []
    
    
    for i, segment in enumerate(segments):
        segment_file = f"temp_segment_{i}.wav"
        segment.export(segment_file, format='wav')
        text = transcribe_audio(segment_file)
        translation = translate_text(text)
        segment_translations.append(translation)
        os.remove(segment_file)  # Clean up after processing
    
   
    full_text = transcribe_audio(audio_file)
    full_translation = translate_text(full_text)
    
    print("Segment Translations:")
    for i, translation in enumerate(segment_translations):
        print(f"Segment {i+1}: {translation}")
    
    print("\nFull Translation:")
    print(full_translation)

    


if __name__ == "__main__":
    audio_file_path = r'C:\Users\abhis\OneDrive\Desktop\realign voice cloning\Voicy_Answer your question for silence.wav'  # Update this path
    process_audio_file(audio_file_path)


Segment Translations:
Segment 1: Ich werde deine Frage beantworten und dann für seliges Schweigen zurückkehren

Full Translation:
Ich werde deine Frage beantworten und dann für gesegnetes Schweigen zurückkehren
