Importing libraries


In [33]:
import tkinter as tk
from tkinter import filedialog, messagebox, IntVar,ttk
import numpy as np
from pydub import AudioSegment
from pydub.playback import play
import librosa
import speech_recognition as sr
import noisereduce as nr

Loading file


In [34]:
def open_file():
    filepath = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav")])
    if filepath:
        filepath_entry.delete(0, tk.END)
        filepath_entry.insert(0, filepath)

Main function that navigates to other functions

In [35]:
def main():
    filepath = filepath_entry.get()
    if not filepath:
        messagebox.showinfo("Error", "Please select an audio file.")
        return
    
    intvars = [echo_var, noise_reduction_var, transcript_var, cartoonic_var,playback_var]
    checked_count = sum(var.get() for var in intvars)
    
    if checked_count > 1 :
        messagebox.showerror("Error", "Please select only one service.")
        return False
    elif checked_count < 1 :
        messagebox.showerror("Error", "Please select at least one service.")
        return False
    
    if echo_var.get() == 1:
        transform_audio()
    elif noise_reduction_var.get() == 1: 
        remove_noise()
    elif transcript_var.get() == 1: 
        transcript()    
    elif cartoonic_var.get() == 1:
        cartoonify_audio()
    elif playback_var.get() == 1:
        playback_audio()
        
    return True


Echo function

In [36]:
def transform_audio():
    filepath = filepath_entry.get()
    audio_data, sample_rate = librosa.load(filepath)
        
    delay = int(0.2 * sample_rate) 
    echo_data = np.zeros(len(audio_data))
    echo_data[delay:] = audio_data[:-delay] 
    audio_with_echo = audio_data + echo_data

    audio_with_echo = (audio_with_echo * (2 ** 15)).astype(np.int16)
        
    echoed_audio_segment = AudioSegment(audio_with_echo.tobytes(), frame_rate=sample_rate, sample_width=2, channels=1)
    play(echoed_audio_segment)
    save_file(echoed_audio_segment)

Noise reduction function

In [37]:
def remove_noise():
    filepath = filepath_entry.get()   
    audio_data, sample_rate = librosa.load(filepath, sr=None)
    
    noise_reduced_audio = nr.reduce_noise(y=audio_data, sr=sample_rate)
    
    noise_reduced_audio = (noise_reduced_audio * (2 ** 15)).astype(np.int16)

    noise_reduced_segment = AudioSegment(noise_reduced_audio.tobytes(), frame_rate=sample_rate, sample_width=2, channels=1)
    play(noise_reduced_segment)
    save_file(noise_reduced_segment)


Transcript function

In [38]:

def transcript():
    filepath = filepath_entry.get() 
    recognizer = sr.Recognizer()
    harvard = sr.AudioFile(filepath)
    
    def record_audio():
        with harvard as source:
            recognizer.adjust_for_ambient_noise(source, duration=0.2)
            audio = recognizer.record(source)
        return audio
    
    def recognize_speech(audio):
        try:
            alternatives = recognizer.recognize_google(audio, show_all=True)
            if alternatives:
                best_alternative = max(alternatives['alternative'], key=lambda x: x.get('confidence', 0))
                best_text = best_alternative['transcript']
                print("Text:", best_text)
                return best_text
            else:
                print("No speech detected.")
                return None
        except sr.UnknownValueError:
            print("Sorry, I couldn't understand the audio.")
            return None
        except sr.RequestError:
            print("There was an error with the speech recognition service.")
            return None

    audio = record_audio()
    recognized_text =  recognize_speech(audio)
    if recognized_text is not None:
        text_box.insert(tk.END, "TEXT: " + recognized_text)

Cartoon effect

In [39]:
def cartoonify_audio():
    filepath = filepath_entry.get()
    audio_data, sample_rate = librosa.load(filepath, sr=None) 

    n_steps = 5  
    modified_audio = librosa.effects.pitch_shift(audio_data, sr=sample_rate, n_steps=n_steps)

    modified_audio = (modified_audio * (2 ** 15)).astype(np.int16)

    modified_segment = AudioSegment(modified_audio.tobytes(), frame_rate=sample_rate, sample_width=2, channels=1)
    play(modified_segment)
    save_file(modified_segment)


Sound speed

In [40]:
def playback_audio():
    filepath = filepath_entry.get()
    audio_data, sample_rate = librosa.load(filepath)
    
    speed_options = {'0.5x': 0.5, '1.5x': 1.5, '2x': 2}
    selected_speed = speed_var.get()
    
    if selected_speed in speed_options:
        modified_audio = librosa.effects.time_stretch(audio_data, rate=speed_options[selected_speed])
        
        modified_audio = (modified_audio * (2 ** 15)).astype(np.int16)
        
        modified_segment = AudioSegment(modified_audio.tobytes(), frame_rate=sample_rate, sample_width=2, channels=1)
        play(modified_segment)
    else:
        messagebox.showerror("Error", "Please select a playback speed.")

In [41]:
def save_file(audio_segment):
    filepath = filedialog.asksaveasfilename(defaultextension=".wav", filetypes=[("WAV files", "*.wav")])
    if filepath:
        audio_segment.export(filepath, format="wav")

GUI part

In [None]:
root = tk.Tk()
root.geometry("600x540")
root.configure(bg="LavenderBlush2")
root.title("DSP project")

base_frame = tk.Frame(root, padx=10, pady=50)
base_frame.configure(bg="DeepPink4")
base_frame.pack()

filepath_label = tk.Label(base_frame, text="Select an audio file:")
filepath_label.grid(row=0, column=0, sticky="w")

filepath_entry = tk.Entry(base_frame, width=40)
filepath_entry.grid(row=0, column=1, padx=5)

browse_btn = tk.Button(base_frame, text="Browse", command=open_file)
browse_btn.grid(row=0, column=2, padx=5)

service_label = tk.Label(base_frame, text="Choose the service:")
service_label.grid(row=3, column=0, sticky="w", pady=5)

echo_var = IntVar()
checkbox_echo = tk.Checkbutton(base_frame, text="Add Echo Effect", variable=echo_var)
checkbox_echo.grid(row=4, column=1, sticky="w")

noise_reduction_var = IntVar()
checkbox_noise_reduction = tk.Checkbutton(base_frame, text="Remove noise", variable=noise_reduction_var)
checkbox_noise_reduction.grid(row=5, column=1, sticky="w")

transcript_var = IntVar()
checkbox_transcript = tk.Checkbutton(base_frame, text="Transcript", variable=transcript_var)
checkbox_transcript.grid(row=6, column=1, sticky="w")

cartoonic_var = IntVar()
checkbox_cartoonic = tk.Checkbutton(base_frame, text="Cartoonic Sound", variable=cartoonic_var)
checkbox_cartoonic.grid(row=7, column=1, sticky="w")

playback_var = IntVar()
checkbox_playback = tk.Checkbutton(base_frame, text="Playback", variable=playback_var)
checkbox_playback.grid(row=8, column=1, sticky="w")

speed_var = tk.StringVar()
speed_var.set("0.5x")  # Default speed

speed_options = ['0.5x', '1.5x', '2x']
speed_menu = ttk.Combobox(base_frame, textvariable=speed_var, values=speed_options)
speed_menu.grid(row=9, column=1)

text_box = tk.Text(base_frame, height=7, width=40, bg="DeepPink4", borderwidth=0)
text_box.grid(row=10, column=1)

transform_btn = tk.Button(base_frame, text="Transform", command=main)
transform_btn.grid(row=11, column=2, columnspan=3, pady=10)

download_btn = tk.Button(base_frame, text="Download", command=save_file)
download_btn.grid(row=13, column=2, columnspan=3, pady=10)

root.mainloop()


Text: you don't believe it yourself
