## Displaying Lyrics while Song is Playing in Background

In [1]:
pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [2]:
pip  install ffmpeg

Collecting ffmpeg
  Downloading ffmpeg-1.4.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ffmpeg
  Building wheel for ffmpeg (setup.py) ... [?25l[?25hdone
  Created wheel for ffmpeg: filename=ffmpeg-1.4-py3-none-any.whl size=6082 sha256=520fa93a81f11754967a9dd9a30df85448fc5e56089b16200225245893c32f1e
  Stored in directory: /root/.cache/pip/wheels/8e/7a/69/cd6aeb83b126a7f04cbe7c9d929028dc52a6e7d525ff56003a
Successfully built ffmpeg
Installing collected packages: ffmpeg
Successfully installed ffmpeg-1.4


### Convert Audio File to WAV File

In [3]:
from pydub import AudioSegment

In [4]:
# Load the MP3 file
audio = AudioSegment.from_mp3("/content/final_audio.mp3")

# Export the audio as a WAV file
audio.export("/content/final_audio.wav", format="wav")

<_io.BufferedRandom name='/content/final_audio.wav'>

### Recognizing Lyrics and Displaying

In [5]:
!pip install

[31mERROR: You must give at least one requirement to install (see "pip help install")[0m[31m
[0m

In [7]:
!pip install pydub
!pip install noisereduce

Collecting noisereduce
  Downloading noisereduce-3.0.3-py3-none-any.whl.metadata (14 kB)
Downloading noisereduce-3.0.3-py3-none-any.whl (22 kB)
Installing collected packages: noisereduce
Successfully installed noisereduce-3.0.3


In [8]:
!pip install SpeechRecognition

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.11.0-py2.py3-none-any.whl.metadata (28 kB)
Downloading SpeechRecognition-3.11.0-py2.py3-none-any.whl (32.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m46.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.11.0


In [9]:
import pydub
import time
import speech_recognition as sr
import numpy as np
import threading
import noisereduce as nr
from pydub import AudioSegment
from IPython.display import display, Audio
from io import BytesIO

In [10]:
#Initialize the Recognizer
recognizer = sr.Recognizer()

In [11]:
#Define normalize_audio Function
def normalize_audio(audio_chunk):
    return audio_chunk.normalize()

In [12]:
#Define reduce_noise Function
def reduce_noise(audio_chunk):
    # Convert audio to numpy array
    audio_array = np.array(audio_chunk.get_array_of_samples())

    # Apply noise reduction
    reduced_noise = nr.reduce_noise(y=audio_array, sr=audio_chunk.frame_rate)

    # Convert numpy array back to AudioSegment
    byte_io = BytesIO()
    reduced_noise.tobytes()  # Convert numpy array to bytes

    # Use pydub's AudioSegment to create an audio file from the byte data
    reduced_audio = AudioSegment(
        reduced_noise.tobytes(),
        frame_rate=audio_chunk.frame_rate,
        sample_width=audio_chunk.sample_width,
        channels=audio_chunk.channels
    )

    return reduced_audio

In [13]:
#Define Transcription Function
def transcribe_audio(audio_data):
    try:
        text = recognizer.recognize_google(audio_data, language="en-US", show_all=False)

        # Retry if the text is empty
        if not text.strip():
            print("Retrying due to unclear audio...")
            text = recognizer.recognize_google(audio_data, language="en-US", show_all=False)  # Retry once

        print(f"Recognized Text: {text}")
        return text
    except sr.UnknownValueError:
        print("Could not understand the audio.")
    except sr.RequestError:
        print("Could not request results from Google Speech Recognition service.")
    return ""

In [14]:
# Function to play the audio and transcribe it
def play_audio_and_transcribe(audio_file_path):
    # Load the audio file (make sure it's a compatible format)
    try:
        audio = AudioSegment.from_wav(audio_file_path)
    except Exception as e:
        print(f"Error loading audio: {e}")
        return

    # Normalize the entire audio for consistent volume
    audio = normalize_audio(audio)

    # Play the audio (this will work in Jupyter)
    display(Audio(audio_file_path, autoplay=True))

    # Break audio into chunks for transcribing (e.g., 6 seconds)
    chunk_duration_ms = 6000  # Duration of each chunk in ms (6 seconds)
    num_chunks = len(audio) // chunk_duration_ms
    for i in range(num_chunks):
        start_time = i * chunk_duration_ms
        end_time = (i + 1) * chunk_duration_ms
        chunk = audio[start_time:end_time]

        # Apply noise reduction to each chunk
        chunk = reduce_noise(chunk)

        # Export chunk to temporary file
        chunk.export("/content/temp_chunk.wav", format="wav")

        # Open temporary file and recognize the speech
        with sr.AudioFile("/content/temp_chunk.wav") as source:
            audio_data = recognizer.record(source)
            text = transcribe_audio(audio_data)

            # Display the text with some visual formatting (highlight current text)
            print(f"Transcribed Text: {text}")

            time.sleep(5)  # Wait for 2 seconds before processing next chunk

    print("Transcription completed.")


In [16]:
# Provide the correct file path for your audio file
audio_file_path = '/content/final_audio.wav'  # Ensure the path is correct

# Start the process of playing and transcribing with highlighting
play_audio_and_transcribe(audio_file_path)


Recognized Text: I'm so lonely Broken Angel I'm so lonely Listen to My Heart
Transcribed Text: I'm so lonely Broken Angel I'm so lonely Listen to My Heart
Recognized Text: you are the one I miss you so much now that you're gone don't don't
Transcribed Text: you are the one I miss you so much now that you're gone don't don't
Recognized Text: I'll be by your side leading the way I'm so lonely Broken Angel
Transcribed Text: I'll be by your side leading the way I'm so lonely Broken Angel
Could not understand the audio.
Transcribed Text: 
Recognized Text: I wish that I could touch touch you again
Transcribed Text: I wish that I could touch touch you again
Recognized Text: lost in a dream
Transcribed Text: lost in a dream
Transcription completed.
