Title


In [1]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import math
import json
import scipy
import librosa

In [2]:
def plot_waveforms(audio, fs):
    """Plots the waveform of audio in the time domain.
    
    Parameters:
        audio (numpy.ndarray): audio signal
        fs (int): sampling frequency (Hz) of audio signal
        
    """
    plt.figure(figsize=(12, 6))
    librosa.display.waveshow(audio, sr=fs, alpha=0.58)
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    plt.show()

In [3]:
def calculate_spectrum(audio, kind='mag'):
    """
    Calculates the spectrum of an audio signal.
    Parameters:
        audio (numpy.ndarray): audio signal
        kind (str): 'mag' for magnitude, 'phase' for phase, 'complex' for complex
    """
    spec = scipy.fft.fft(audio)
    if kind == 'mag':
        return 20*np.log10(np.abs(spec))
    elif kind == 'phase':
        return np.angle(spec)
    elif kind == 'complex':
        return 20*np.log10(spec)
    else:
        raise ValueError('Invalid kind')

In [4]:
#Function to plot spectrum 
def plot_spec(audio, fs, kind):
    """
    Plots the spectrum of an audio signal.
    parameters:
        audio (numpy.ndarray): audio signal
        fs (int): sampling frequency (Hz) of audio signal
        kind (str): 'mag' for magnitude, 'phase' for phase, 'complex' for complex
    """
    spec_db = calculate_spectrum(audio, kind)
    frequency_axis = np.linspace(0, fs, len(spec_db))
    #Nyquist frequencies
    frequency_axis = frequency_axis[:len(frequency_axis)//2]
    spec_db = spec_db[:len(spec_db)//2]

    #plot
    ax = plt.figure(figsize=(12, 6))
    plt.plot(frequency_axis, spec_db)
    plt.xlabel("Frequency (Hz)")
    plt.ylabel("Magnitude (dB)")
    plt.show()
    if fs < 44100:
        plt.xticks([1, 2, 4, 8, 16, 31, 63, 125, 250,500,1000,2000,5000,10000], 
                   ["1", "2", "4", "8", "16", "31", "63", "125", "250", "500", "1K", "2K", "5K", "10K"])
    else:
        plt.xticks([1, 2, 4, 8, 16, 31, 63, 125, 250,500,1000,2000,5000,10000, 20000], 
                   ["1", "2", "4", "8", "16", "31", "63", "125", "250", "500", "1K", "2K", "5K", "10K", "20k"])


In [5]:
def calculate_stft(audio, fs, n_fft=2048, hop_length=512, dB=True):
    """
    Calculates the Short-Time Fourier Transform (STFT) of an audio signal.
    Parameters:
        audio (numpy.ndarray): audio signal
        fs (int): sampling frequency (Hz) of audio signal
        n_fft (int): number of samples per frame
        hop_length (int): number of samples between frames
        dB (bool): if True, returns the magnitude in decibels
    """
    stft = librosa.stft(audio, n_fft=n_fft, hop_length=hop_length)
    if dB:
        return librosa.amplitude_to_db(np.abs(stft)) # Convert to dB in log scale
    else:
        return np.abs(stft)

In [6]:
def spectrogram(audio, fs, n_fft = 2048, hop_length = 512, dB = True):
    """
    Plots the spectrogram of an audio signal.
    parameters:
        audio (numpy.ndarray): audio signal
        fs (int): sampling frequency (Hz) of audio signal
        n_fft (int): number of samples per frame
        hop_length (int): number of samples between frames
        dB (bool): if True, returns the magnitude in decibels
    """
    stft_db = calculate_stft(audio, fs, n_fft, hop_length, dB)
    plt.figure(figsize=(12,6))
    librosa.display.specshow(stft_db, sr=fs, hop_length=hop_length, x_axis='time', y_axis='linear', cmap = 'inferno')
    plt.title('Spectrogram')
    plt.colorbar(format='%+2.0f dB')
    plt.show()
    plt.tight_layout()

In [None]:
def plot_mel_spectrogram(audio, fs, n_mfcss=128, n_fft = 2048, hop_length = 512):
    mel_spec = librosa.feature.melspectrogram(audio, sr=fs, n_mels=n_mfcss, n_fft=n_fft, hop_length=hop_length)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    return mel_spec_db

def plot_mel_spectrogram(mfcc, fs):
    plt.figure(figsize=(12, 6))
    librosa.display.specshow(mfcc, sr=fs, x_axis='time', y_axis='mel', cmao='inferno')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel Spectrogram')
    plt.tight_layout()
    plt.show()