# Speech Recognition & Synthesis

### Installing Required Libraries

In [None]:
!pip install SpeechRecognition pydub
!pip install pyttsx3
!pip install pydub SpeechRecognition gtts playsound
!pip install transformers
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
!pip install pyaudio
!pip install deep-translator
!pip install openai==0.28

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.4-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub, SpeechRecognition
Successfully installed SpeechRecognition-3.10.4 pydub-0.25.1
Collecting pyttsx3
  Downloading pyttsx3-2.90-py3-none-any.whl (39 kB)
Installing collected packages: pyttsx3
Successfully installed pyttsx3-2.90
Collecting gtts
  Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)
Collecting playsound
  Downloading playsound-1.3.0.tar.gz (7.7 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: playsound
  Building wheel for playsound (setup.py) ... [?25l[?25hdone
  Created wheel for playsound: filename=playsound-1.3.0-py3-none-any.whl size=7020 sha256=c7f9e4a13290c27da9ad1e87810ccb908cdd02ed071c61b87

In [None]:
# Import required libraries
from IPython.display import display, Audio  # For displaying audio in Jupyter Notebook
import os  # For interacting with the operating system
from pydub import AudioSegment  # For audio file manipulation
import speech_recognition as sr  # For speech recognition
from gtts import gTTS  # For text-to-speech conversion
import playsound  # For playing audio
from deep_translator import GoogleTranslator  # For text translation
from pydub.playback import play  # For playing audio in Jupyter Notebook
import logging  # For logging

# Set up logging
logging.basicConfig(filename='application.log', level=logging.INFO)

# Command-line explanations:

# - IPython.display: Library for displaying audio and other media in Jupyter Notebook
# - os: Library for interacting with the operating system, used for file operations
# - pydub: Library for audio file manipulation
# - speech_recognition: Library for speech recognition
# - gtts: Library for text-to-speech conversion using Google Text-to-Speech API
# - playsound: Library for playing audio files
# - deep_translator: Library for text translation
# - pydub.playback: Module for playing audio in Jupyter Notebook
# - logging: Python logging module for generating log files




In [None]:
# Function to play audio file
def play_audio(audio_path):
    # Display the audio file for playback in Jupyter Notebook
    display(Audio(filename=audio_path))


## Speech Recognition

In [None]:

# Set up logging
logging.basicConfig(filename='speech_recognition.log', level=logging.INFO)

def recognize_speech(audio_path):
    # Initialize the speech recognizer
    recognizer = sr.Recognizer()

    # Open the audio file
    with sr.AudioFile(audio_path) as source:
        # Record the audio data from the file
        audio_data = recognizer.record(source)
        try:
            # Recognize speech using Google Speech Recognition API
            text = recognizer.recognize_google(audio_data)
            # Log recognized speech
            logging.info("Speech recognized: %s", text)
            return text
        except sr.UnknownValueError:
            # Log and print error message for unrecognized speech
            logging.error("Failed to understand the audio.")
            print("Sorry, could not understand the audio.")
            return ""
        except sr.RequestError as e:
            # Log and print error message for speech recognition service failure
            logging.error("Speech recognition service request failed: %s", e)
            print("Sorry, the service is unavailable.")
            return ""
        except Exception as e:
            # Log and print error message for other exceptions
            logging.exception("An error occurred during speech recognition: %s", e)
            print("An error occurred during speech recognition.")
            return ""


## Gpt-2 For Text Generation

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Set up logging
logging.basicConfig(filename='gpt2_generation.log', level=logging.INFO)

def generate_text(input_text):
    try:
        # Load GPT-2 tokenizer and model
        tokenizer = GPT2Tokenizer.from_pretrained('gpt2-xl')  # you can use any GPT size
        model = GPT2LMHeadModel.from_pretrained('gpt2-xl')

        # Encode the input text
        input_ids = tokenizer.encode(input_text, return_tensors='pt')

        # Check if input_ids is None
        if input_ids is None:
            logging.error("Failed to generate input_ids. Check input text or tokenizer settings.")
            print("Failed to generate input_ids. Check input text or tokenizer settings.")
            return ""

        # Set attention mask and pad token ID explicitly
        attention_mask = input_ids.ne(tokenizer.pad_token_id).float() if tokenizer.pad_token_id is not None else None
        pad_token_id = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else None

        # Generate text using GPT-2 model
        output = model.generate(input_ids, max_length=50, num_return_sequences=1,
                                do_sample=True, temperature=0.7,
                                attention_mask=attention_mask, pad_token_id=pad_token_id)

        # Decode the generated output
        generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
        logging.info("Text generated successfully.")
        return generated_text
    except Exception as e:
        # Log and print error message for any exceptions
        logging.exception("An error occurred during text generation: %s", e)
        print("An error occurred during text generation.")
        return ""


## Text to Speech

In [None]:

# Function to convert text to speech and play it using pydub
def text_to_speech(text):
    # Convert text to speech using gTTS
    tts = gTTS(text=text, lang='en')
    # Save the generated speech as an MP3 file
    tts.save("output.mp3")
    # Load the generated speech as an AudioSegment
    sound = AudioSegment.from_file("output.mp3", format="mp3")
    # Play the generated speech
    #play(sound)

# Function to play audio file
def play_audio(audio_path):
    # Display the audio file for playback in Jupyter Notebook
    display(Audio(filename=audio_path))

# Replace 'path_to_audio_file.mp3' with your actual MP3 file path
mp3_file_path = '/content/drive/MyDrive/Speech/sample-008161.mp3'
wav_file_path = '/content/drive/MyDrive/Speech/audio.wav'

# Convert MP3 to WAV
#convert_mp3_to_wav(mp3_file_path, wav_file_path)

# Text to be converted to speech
text = 'conversation between couples'  # Give any text you want to generate speech

# Generate text using GPT-2 based on recognized speech
generated_text = generate_text(text)

KeyboardInterrupt: 

In [None]:
# Recognize speech using SpeechRecognition library
#recognized_text = recognize_speech(wav_file_path)

print("Recognized text:", text,"\n")

print("Generated text: ")
sentences = generated_text.split('.')

# Print each sentence on a new line
for sentence in sentences:
    print(sentence.strip())
print("\n")
# Convert generated text to speech and play it
text_to_speech(generated_text)
play_audio('output.mp3')



--------------------------------------------------------------------------------



In [None]:
# Function to translate text using Google Translate
def translate_text(text, target_language='en'):
    try:
        # Translate the input text to the target language using Google Translate
        translated_text = GoogleTranslator(source='auto', target=target_language).translate(text)
        return translated_text
    except Exception as e:
        # Print an error message if translation fails
        print("Translation failed:", e)
        return ""

# Function to display available languages
def display_languages():
    # Print the available languages along with their language codes
    print("Available languages:")
    print("en: English")
    print("fr: French")
    print("es: Spanish")
    # Add more languages as needed

# Function to convert text to speech and save it as an MP3 file
def text_to_speech(text, language='en'):
    # Convert text to speech using gTTS and save it as an MP3 file
    tts = gTTS(text=text, lang=language)
    tts.save("output2.mp3")

# Display available languages
display_languages()

# Select target language for translation
target_language_code = input("Enter the language code to translate the text to: ")

# Translate input text to the desired language
translated_text = translate_text(generated_text, target_language=target_language_code)

if translated_text:
    # Convert translated text to speech and play it
    print("\n")
    print("Translated Text :",translated_text,"\n")
    text_to_speech(translated_text, language=target_language_code)
    play_audio('output2.mp3')
else:
    print("Translation failed. Please try again.")


Available languages:
en: English
fr: French
es: Spanish
Enter the language code to translate the text to: fr


Translated Text : conversation entre couples.

L'étude, qui est la plus importante à ce jour sur les parents de même sexe aux États-Unis, révèle qu'au cours de la première année de parentalité, les enfants de parents de même sexe sont plus susceptibles que les enfants de parents hétérosexuels de 



In [None]:
# Input text to be translated and synthesized
input_text = 'conversation between couples'

# Display available languages
display_languages()

# Select target language for translation
target_language_code = input("Enter the language code to translate the text to: ")

# Translate input text to the desired language
translated_text = translate_text(input_text, target_language=target_language_code)

if translated_text:
    # Convert translated text to speech and play it
    print("\n")
    print("Translated Text :",translated_text,"\n")
    text_to_speech(translated_text, language=target_language_code)
    play_audio('output2.mp3')

else:
    print("Translation failed. Please try again.")


Available languages:
en: English
fr: French
es: Spanish
Enter the language code to translate the text to: es


Translated Text : conversación entre parejas 

