# Coding Section
- Any given code is optional to help you; if you want to edit it, modify it, or implement your own, it's okay.

## Imports

In [None]:
import matplotlib.pyplot as plt
import librosa
import pandas as pd
import numpy as np

## Reading Data

- Wrtie a Python Code that reads four audio files (WAV format). For each file, load the audio using librosa and store the results in a dictionary where the **keys** are the **labels** and the values are tuples containing:

  - The audio signal
  - The sampling rate

- Output Example:
  - { "e": (signal_e, sr_e) }

In [None]:
labels = ["e","j","3","7"]

audio_dict = {}



## Isolation

In [None]:
def isolator():
  """
    Isolate individual strokes.

    Parameters:
    -----------


    Returns:
    --------
    strokes : list of torch.Tensor or numpy array or whatever datatype you use.
        A list of 1D tensors or arrays, each containing an isolated audio segment (stroke).
  """

  return

In [None]:
data = {
    "Label": [],
    "Signal": []
}

for label, (signal, sample_rate) in audio_dict.items():

    strokes = isolator()

    # print(label, len(strokes)) or print(label, strokes.shape) # one of those two print lines should be work depend on your datatype return from isolation function.

    for stroke in strokes:
        data["Label"].append(label)
        data["Signal"].append(stroke)

df = pd.DataFrame(data)
df

In [None]:
def extract_features(signal, sample_rate, n_mfcc=13):
    mfcc = librosa.feature.mfcc(y=np.asarray(signal), sr=sample_rate, n_mfcc=n_mfcc)
    return np.hstack([mfcc.mean(axis=1), mfcc.std(axis=1)])

In [None]:
X = []
y = []

for _, row in df.iterrows():
    signal = row["Signal"]
    label = row["Label"]
    feat = extract_features(signal.squeeze(), sample_rate)
    X.append(feat)
    y.append(label)

X = np.array(X)
y = np.array(y)
print("X Shape is:", X.shape)
print("y Shape is:", y.shape)

## GMM

In [None]:
def gaussian_pdf(x, mean, cov):
    d = len(x)
    cov += np.eye(d) * 1e-6  # numerical stability
    det = np.linalg.det(cov)
    inv = np.linalg.inv(cov)
    norm = 1.0 / np.sqrt((2 * np.pi) ** d * det)
    diff = x - mean
    return norm * np.exp(-0.5 * diff @ inv @ diff)

- You can ignore this function description and implement your own, but it should correctly implement train_gmm **from scratch.**

In [None]:
def train_gmm(X, K, n_iters=2):
    """
    Train a Gaussian Mixture Model (GMM) on the given dataset using the
    Expectation-Maximization (EM) algorithm.

    Parameters
    ----------
    X : np.ndarray
        Input data matrix of shape (N, D), where N is the number of samples
        and D is the dimensionality of each feature vector.
    K : int
        Number of Gaussian components in the mixture model.
    n_iters : int, optional
        Number of EM iterations to perform (default is 50).

    Returns
    -------
    weights : np.ndarray
        Array of mixture weights for each Gaussian component (length K).
    means : np.ndarray
        Array of mean vectors for each Gaussian component (shape K x D).
    covs : list of np.ndarray
        List containing the covariance matrices for each Gaussian component
        (each of shape D x D).

    Method
    ------
    1. Initialize:
       - Select K random samples from X as the initial means.
       - Initialize each component's covariance matrix using the sample covariance.
       - Set mixture weights uniformly (1/K for each component).

    2. Repeat EM steps for n_iters iterations:
       a) E-step (Expectation):
          - For each data point and each Gaussian component, compute the
            responsibility (posterior probability) using the current means,
            covariances, and mixture weights.
          - Normalize the responsibilities across components for each data point.
          - Compute Nk, the effective number of points assigned to each component.

       b) M-step (Maximization):
          - Update each Gaussian's mean using the weighted average of the data,
            weighted by the responsibilities.
          - Update each Gaussian's covariance matrix using the weighted outer
            product of deviations from the mean.
          - Update the mixture weights as Nk divided by the total number of samples.

    Notes
    -----
    - This implementation uses a full covariance matrix for each Gaussian.
    - The function assumes a multivariate Gaussian PDF for computing responsibilities.
    - The returned parameters can be used directly for likelihood computation
      or for making predictions with new data.
    """
    pass


In [None]:
gmms = {}
K = 4  # number of Gaussian components

for label in np.unique(y):
    X_class = X[y == label]
    gmms[label] = train_gmm(X_class, K)
    print("Train is done for label:   ", label)

# Discussion

## Explain your isolation logic

## What is the main use of GMM models?