In [13]:
import os
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import wavfile
from scipy.signal import spectrogram
from scipy.signal import find_peaks

In [14]:
def generate_spectrogram(input_folder, output_folder):
    directories_names = os.listdir(input_folder)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for directory in directories_names:
        selected_folder = os.path.join(input_folder, directory)
        for file in os.listdir(selected_folder):
            if file.endswith('.wav'):
                file_path = os.path.join(selected_folder, file)
                samplerate, data = wavfile.read(file_path)

                # Ensure data is mono for simplicity
                if len(data.shape) > 1:
                    data = np.mean(data, axis=1)

                # Trim to first 30 seconds
                data = data[:samplerate * 20]

                # Generate spectrogram
                frequencies, times, Sxx = spectrogram(data, fs=samplerate)
                plt.pcolormesh(times, frequencies, 10 * np.log10(Sxx), shading='gouraud')
                plt.colorbar(label='Intensity [dB]')
                plt.title(f"Spectrogram: {file}")
                plt.xlabel('Time [s]')
                plt.ylabel('Frequency [Hz]')

                # Save the spectrogram
                output_file = os.path.join(output_folder, f"{file}_spectrogram.png")
                plt.savefig(output_file)
                plt.close()
                
                # save the spectrogram data as npy
                output_file = os.path.join(output_folder, f"{file}_spectrogram.npy")
                np.save(output_file, Sxx)
                


In [15]:
def extract_features(data, samplerate):
    # Extract peaks
    frequencies, times, Sxx = spectrogram(data, fs=samplerate)
    features = []

    for i in range(Sxx.shape[1]):
        peaks, _ = find_peaks(Sxx[:, i], height=0.5 * np.max(Sxx[:, i]))
        features.append(frequencies[peaks])

    return features


In [None]:
def plot_spectrogram_using_librosa(audio_data, sample_rate):
    """
    Plots the spectrogram of the audio data.

    Parameters:
        audio_data (numpy.ndarray): The audio signal data.
        sample_rate (int): The sample rate of the audio data.
    """
    S = np.abs(librosa.stft(audio_data))
    S_db = librosa.amplitude_to_db(S, ref=np.max)
    
    plt.figure(figsize=(10, 6))
    librosa.display.specshow(S_db, sr=sample_rate, x_axis="time", y_axis="hz", cmap='magma')
    # plt.specgram(audio_data, Fs=sample_rate, NFFT=1024, noverlap=512, cmap="YlOrRd")
    plt.colorbar(label="Intensity [dB]")
    plt.title("Spectrogram")
    plt.xlabel("Time [s]")
    plt.ylabel("Frequency [Hz]")
    plt.show()

from scipy.signal import find_peaks
import numpy as np
import matplotlib.pyplot as plt

def plot_spectrogram_with_peaks(data, sample_rate, title="Spectrogram with Peaks"):
    try:
        # Parameters for the STFT
        n_fft = 1024
        hop_length = 512

        # Compute the STFT using numpy
        num_segments = (len(data) - n_fft) // hop_length + 1
        S = np.empty((n_fft // 2 + 1, num_segments))
        for i in range(num_segments):
            start = i * hop_length
            segment = data[start:start + n_fft] * np.hanning(n_fft)  # Apply a window function
            fft_segment = np.fft.rfft(segment)
            S[:, i] = np.abs(fft_segment) ** 2  # Power spectrum

        # Frequency and time axes
        freqs = np.fft.rfftfreq(n_fft, d=1/sample_rate)
        times = np.arange(num_segments) * hop_length / sample_rate

        # Convert to dB scale
        S_db = 10 * np.log10(S + 1e-10)  # Adding a small value to avoid log(0)

        # Plot the spectrogram
        plt.figure(figsize=(10, 6))
        plt.pcolormesh(times, freqs, S_db, shading='gouraud', cmap='viridis')
        plt.colorbar(label="Magnitude (dB)")
        plt.title(title)
        plt.xlabel("Time (s)")
        plt.ylabel("Frequency (Hz)")

        # Find peaks for each time slice (frequency axis)
        for i in range(S_db.shape[1]):  # Loop over time slices
            peaks, _ = find_peaks(S_db[:, i], height=np.max(S_db[:, i]) * 0.5)  # Adjust height threshold
            peak_freqs = freqs[peaks]
            peak_times = [times[i]] * len(peaks)
            print (peak_freqs)

            # Plot the peaks
            plt.scatter(peak_times, peak_freqs, color='red', s=10)

        plt.show()
        return peak_freqs

    except Exception as e:
        print(f"An error occurred while plotting the spectrogram: {e}")

def play_audio(audio_data, sample_rate=mozart_sample_rate):
    """
    Plays the audio data.

    Parameters:
        audio_data (numpy.ndarray): The audio signal data.
        sample_rate (int): The sample rate of the audio data.
    """
    # Normalize audio for IPython playback
    normalized_audio = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)

    # Temporary WAV file (to allow playback in Jupyter)
    write("temp_audio.wav", sample_rate, normalized_audio)
    return ipd.Audio("temp_audio.wav")

def plot_magnitude(ft_data, _frequencies):
    """
    Plots the magnitude of the Fourier Transform.
    :param _frequencies:
    :param ft_data:
    :return:
    """
    _ft_magnitude = np.abs(ft_data)
    plt.figure(figsize=(10, 6))
    plt.plot(_frequencies, _ft_magnitude)
    plt.title("Fourier Transform")
    plt.xlabel("Frequency [Hz]")
    plt.ylabel("Magnitude")
    plt.show()

def inverse_ft(ft_data):
    _shifted_ft = np.fft.ifftshift(ft_data)
    _filtered_signal = np.fft.ifft(_shifted_ft)
    _filtered_signal = np.real(_filtered_signal)
    return _filtered_signal

def export_to_file(audio_data, file_name, sample_rate=mozart_sample_rate):
    """
    Exports the audio data to a file.

    Parameters:
        audio_data (numpy.ndarray): The audio signal data.
        sample_rate (int): The sample rate of the audio data.
        file_name (str): The name of the file to save the audio data to.
    """
    # Normalize audio for IPython playback
    normalized_audio = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)

    # Save the audio data to a WAV file
    write(file_name, sample_rate, normalized_audio)

In [12]:
input_folder = "Task 5 Data/original data"
output_folder = "Task 5 Data/generated spectrogram"
generate_spectrogram(input_folder, output_folder)

Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!


  plt.pcolormesh(times, frequencies, 10 * np.log10(Sxx), shading='gouraud')


Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!


  samplerate, data = wavfile.read(file_path)


Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
Spectrograms generated successfully!
