In [17]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io.wavfile import read
import tkinter as tk
from tkinter import filedialog, messagebox
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from IPython.display import Audio, display

# Function to compute the spectrogram
def spectrogram(x, T, p):
    wind = 0.54 - 0.46 * np.cos(2 * np.pi * np.arange(T) / (T - 1))  # Hamming window
    x = (x - np.mean(x)) / np.std(x)  # Normalization
    S = np.fft.fft(np.array([x[i:i + T] for i in range(0, len(x) - T, p)]) * wind)  # Windowing + FFT
    return np.abs(S), np.angle(S)

# Function to convert spectrogram back to waveform
def spectrogram2wav(amp, phase, p):
    temp = amp * np.exp(1j * phase)  # Combine amplitude and phase (complex number)
    temp = np.fft.ifft(temp)  # Inverse FFT
    temp = np.real(temp)  # Keep only the real part

    n, w = amp.shape
    duree = (n - 1) * p + w  # Estimate duration
    signal = np.zeros(duree)  # Initialize signal
    trams = np.zeros(duree)  # Initialize frame count
    for i in range(n):
        signal[i * p:i * p + w] += temp[i, :]
        trams[i * p:i * p + w] += 1

    signal = signal / trams  # Average overlapping frames
    return signal

# Function to display the spectrogram
def showspectrogram(amp, fs, canvas):
    n, T = amp.shape  # Shape of the spectrogram
    maxfreq = fs / 2  # Max frequency
    maxindice = int(T / 2)  # Max index for frequency
    S = np.log10(amp.T[:maxindice] + 1)  # Log scale
    fig, ax = plt.subplots(figsize=(5, 3))  # Set size of the figure
    ax.pcolormesh(np.arange(n), np.linspace(0, maxfreq, maxindice), S, cmap='gray_r')
    ax.set_ylabel('Frequency (Hz)')
    ax.set_xlabel('Time (sec)')
    ax.grid()

    # Clear the previous plot and show the new plot
    for widget in canvas.winfo_children():
        widget.destroy()
    canvas_agg = FigureCanvasTkAgg(fig, master=canvas)
    canvas_agg.draw()
    canvas_agg.get_tk_widget().pack(fill=tk.BOTH, expand=True)

# Function to compute energy
def Energy(amp):
    return np.mean(amp ** 2, axis=1)

# Function for noise cancelling
def NoiseCancelling(amp, threshold=6, alpha=1):
    energy = Energy(amp)
    silence = amp[energy < threshold]
    silence = np.mean(silence, axis=0)
    amp = amp - silence * alpha
    amp[amp < 0] = 0
    return amp

# Speech segmentation engine class
class Engine:
    def __init__(self, audio, min_threshold, canvas):
        self.fs, self.sgn = read(audio)
        self.p = 8
        self.T = 512
        self.amp, self.ph = spectrogram(self.sgn, T=self.T, p=self.p)
        self.energy = Energy(self.amp)
        self.B = self._beta_zero()
        self.min_threshold = min_threshold
        self.threshold = self.min_threshold
        self.t_sil = 5000 / self.p  # Silence minimum duration (5000 / fs)s
        self.t_min = 1000 / self.p  # Word minimum duration 
        self.t_max = 16000 / self.p  # Word maximum duration 
        self.canvas = canvas

    def spectrogram(self, x, T, p):
        wind = 0.54 - 0.46 * np.cos(2 * np.pi * np.arange(T) / (T - 1))  # Hamming window
        x = (x - np.mean(x)) / np.std(x)  # Normalization
        S = np.fft.fft(np.array([x[i:i + T] for i in range(0, len(x) - T, p)]) * wind)  # Windowing + FFT
        return np.abs(S), np.angle(S)

    def Energy(self, amp):
        return np.mean(amp ** 2, axis=1)

    def plot_energy(self):
        fig, ax = plt.subplots(figsize=(5, 3))  # Set size of the figure
        ax.plot(self.energy)
        ax.set_title("Energy Plot")
        ax.set_xlabel("Frame")
        ax.set_ylabel("Energy")

        # Clear the previous plot and show the new plot
        for widget in self.canvas.winfo_children():
            widget.destroy()
        canvas_agg = FigureCanvasTkAgg(fig, master=self.canvas)
        canvas_agg.draw()
        canvas_agg.get_tk_widget().pack(fill=tk.BOTH, expand=True)

    def speech_segmentation(self):
        t = 0
        while t < len(self.energy):
            if self.energy[t] < self.threshold:
                self.update_threshold(t)
                self.threshold = self.threshold
            else:
                begin_word = t
                t2 = t
                cmpt_sil = 0
                while (cmpt_sil < self.t_sil) and (t2 < len(self.energy)):
                    if self.energy[t2] > self.threshold:
                        cmpt_sil = 0
                    else:
                        cmpt_sil += 1
                    t2 += 1
                t = t2

                end_word = t2 - cmpt_sil
                word_duration = end_word - begin_word + 1
                if self.t_min <= word_duration <= self.t_max:
                    self.word_detected(begin_word, end_word)
            t += 1
        return

    def update_threshold(self, t):
        alpha = 0.95
        gamma = 0.1
        self.B = alpha * self.B + (1 - alpha) * self.energy[t]
        self.threshold = (1 + gamma) * self.B
        self.threshold = max(self.threshold, self.min_threshold)
        return

    def _beta_zero(self):
        t0 = 1
        for t, value in enumerate(self.energy[:-1]):
            if abs(self.energy[t] - self.energy[t + 1]) > 10000:
                t0 = t
                break
        self.beta_zero = np.sum(self.energy[:t0]) / t0
        return self.beta_zero

    def word_detected(self, begin_word, end_word):
        begin_word *= self.p
        end_word *= self.p
        print(f'Word detected at {begin_word / self.fs:.2f}s - {end_word / self.fs:.2f}s')
        display(Audio(self.sgn[begin_word:end_word], rate=self.fs))

# Tkinter GUI class
class AudioProcessingApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Audio Processing App")
        self.root.geometry("600x400")
        self.root.resizable(True, True)
        self.root.configure(bg='#3a5993')

        # Add buttons and canvas
        self.load_button = tk.Button(root, text="Load Audio",foreground='#ffffff' ,bg='#3a5993', font=('times',15,'bold'),width=15 ,command=self.load_audio)
        self.load_button.grid(row=0, column=0, padx=5, pady=5)

        self.process_button = tk.Button(root, text="Process Audio",foreground='#ffffff' ,bg='#3a5993', font=('times',15,'bold'),width=15 , command=self.process_audio)
        self.process_button.grid(row=0, column=1, padx=5, pady=5)

        self.spectrogram_button = tk.Button(root, text="Show Spectrogram", foreground='#ffffff' ,bg='#3a5993', font=('times',15,'bold'),width=15 ,command=self.show_spectrogram)
        self.spectrogram_button.grid(row=0, column=2, padx=5, pady=5)

        self.plot_energy_button = tk.Button(root, text="Plot Energy", foreground='#ffffff' ,bg='#3a5993', font=('times',15,'bold'),width=15 ,command=self.plot_energy)
        self.plot_energy_button.grid(row=1, column=0, padx=5, pady=5)

        self.noise_cancelling_button = tk.Button(root, text="Noise Cancelling", foreground='#ffffff' ,bg='#3a5993', font=('times',15,'bold'),width=15 ,command=self.noise_cancelling)
        self.noise_cancelling_button.grid(row=1, column=1, padx=5, pady=5)

        self.speech_segmentation_button = tk.Button(root, text="Speech Segmentation",foreground='#ffffff' ,bg='#3a5993', font=('times',15,'bold'),width=15 , command=self.speech_segmentation)
        self.speech_segmentation_button.grid(row=1, column=2, padx=5, pady=5)

        self.canvas = tk.Canvas(root, height=290, bg='#3a5993')  # Adjusted height
        self.canvas.grid(row=3, column=0, columnspan=6, sticky="nsew")
        self.audio_file = None
        self.fs = None
        self.sgn = None
        self.amp = None
        self.ph = None

    def load_audio(self):
        self.audio_file = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav")])
        if self.audio_file:
            self.fs, self.sgn = read(self.audio_file)
            messagebox.showinfo("Info", f"Loaded {self.audio_file}")
            display(Audio(self.sgn,rate=self.fs))
    

    def process_audio(self):
        if self.audio_file:
            self.T = 512
            self.p = 16
            self.amp, self.ph = spectrogram(self.sgn, self.T, self.p)
            self.new_sgn = spectrogram2wav(self.amp, self.ph, self.p)
            messagebox.showinfo("Info", "Audio processed")
        else:
            messagebox.showerror("Error", "No audio file loaded")

    def show_spectrogram(self):
        if self.amp is not None:
            showspectrogram(self.amp, self.fs, self.canvas)
        else:
            messagebox.showerror("Error", "No audio processed")

    def plot_energy(self):
        if self.amp is not None:
            energy = Energy(self.amp)
            fig, ax = plt.subplots(figsize=(5, 3))  # Set size of the figure
            ax.plot(energy)
            ax.set_title("Energy Plot")
            ax.set_xlabel("Frame")
            ax.set_ylabel("Energy")
            
            # Clear the previous plot and show the new plot
            for widget in self.canvas.winfo_children():
                widget.destroy()
            canvas_agg = FigureCanvasTkAgg(fig, master=self.canvas)
            canvas_agg.draw()
            canvas_agg.get_tk_widget().pack(fill=tk.BOTH, expand=True)
        else:
            messagebox.showerror("Error", "No audio processed")

    def noise_cancelling(self):
        if self.amp is not None:
            self.amp = NoiseCancelling(self.amp)
            messagebox.showinfo("Info", "Noise cancelled")
           
        else:
            messagebox.showerror("Error", "No audio processed")

    def speech_segmentation(self):
        if self.audio_file:
            min_threshold = 30  # Example minimum threshold
            segmenter = Engine(self.audio_file, min_threshold,canvas=self.canvas)
            segmenter.speech_segmentation()
            segmenter.plot_energy()
        else:
            messagebox.showerror("Error", "No audio file loaded")

if __name__ == "__main__":
    root = tk.Tk()
    app = AudioProcessingApp(root)
    root.mainloop()
