<a href="https://colab.research.google.com/github/TheGreatWaves/C-base/blob/main/training_notebook_non_dl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import os

import numpy as np
from numpy import ndarray
import pandas as pd
import random

import librosa

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa.display

from tqdm import tqdm

import tqdm

import glob
import torch

from pathlib import Path

In [12]:
librosa.__version__

'0.10.1'

In [13]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [14]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [15]:
# Assumptions:
# - The transcription file is located in the same directory as the audio files.
class AudioDataset:
  """
  Class for loading and storing audio data.
  """

  def __init__(self):
    self.df = pd.DataFrame(columns=['id', 'path', 'transcription'])

  def load_transcriptions(self, directory_path: str) -> bool:
    """
    Load all transcriptions from a given directory, including subdirectories.
    Returns False if no transcription files were found, or if any failed to load.
    """
    sound_names = glob.glob(
        f"{directory_path}/**/*.wav",
        recursive=True
    )

    if len(sound_names) == 0:
      return False

    for path in sound_names:
      if not self.load_transcription_file(path):
        return False

    return True



  def load_transcription_file(self, file_path: str) -> bool:
    """
    Parse transcription file and records the audio ID - subtitle mapping.
    Returns False if the file could not be read.
    """

    file_directory = Path(file_path).parent
    file_name = Path(file_path).stem
    self.df.loc[len(self.df)] = {
        'id':file_name,
        'transcription':file_directory.name,
        'path': f'{file_directory}/{file_name}.wav'
    }
    return True

  def keys(self):
    return iter(self.df['id'])

  def get(self, id: int):
    """
    Retrieve a dataframe row from ID.
    """
    return self.df.loc[self.df['id'] == id]

In [16]:
random.seed(42)

def split_dataframe(df, ratio):
    # Shuffle the DataFrame
    df_shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)

    # Calculate the split index
    split_index = int(len(df_shuffled) * ratio)

    # Split the DataFrame
    df_1 = df_shuffled.iloc[:split_index]
    #df_2 = df_shuffled.iloc[split_index:] unneeded for training

    return df_1

In [25]:
df_dict = {}

In [None]:
all_subdirectories = glob.glob(
        f"/content/drive/MyDrive/datasets/single_words/*",
        recursive=True
)

for i in tqdm.tqdm(range(len(all_subdirectories))):
  path = all_subdirectories[i]
  fruit_label = Path(path).name
  ds = AudioDataset()
  if ds.load_transcriptions(path):
    df_dict[fruit_label] = split_dataframe(ds.df, 0.8)

 17%|█▋        | 5/30 [00:52<04:17, 10.29s/it]

In [None]:
df_dict

In [None]:
sample_df = df_dict['bed']
sample_input_file = sample_df['path'].iloc[0]
print("Selected input file: ", sample_input_file)

In [None]:
audio, sampling_freq = librosa.load(sample_input_file)
pd.Series(audio).plot(figsize=(10, 5),lw=1, title="Audio signal", xlabel="Time", ylabel="Amplitude")

In [None]:
fft = np.fft.fft(audio)

magnitude = np.abs(fft)
frequency = np.linspace(0, sampling_freq, len(magnitude))

left_freq = frequency[:int(len(frequency)/2)]
left_mag = magnitude[:int(len(frequency)/2)]

plt.plot(left_freq, left_mag)
plt.xlabel("Frequency")
plt.ylabel("Amplitude")
plt.title("Power Spectrum")

In [None]:
n_fft = 2048
hop_length = 512

stft = librosa.core.stft(audio, hop_length=hop_length, n_fft=n_fft)
spectrogram = np.abs(stft)

log_spectrogram = librosa.amplitude_to_db(spectrogram)

librosa.display.specshow(log_spectrogram, sr=sampling_freq, hop_length=hop_length)
plt.xlabel("Time")
plt.ylabel("Frequency")
plt.colorbar()
plt.show()


In [None]:
mfccs = librosa.feature.mfcc(y=audio, n_fft=n_fft, hop_length=hop_length, n_mfcc=15)
librosa.display.specshow(mfccs, sr=sampling_freq, hop_length=hop_length)
plt.xlabel("Time")
plt.ylabel("MFCC")
plt.colorbar()
plt.show()

### Extracting Features

In [None]:
# from librosa.feature import mfcc
# import librosa
# audio, sampling_freq = librosa.load(ds.df.head()['path'].values[0])
# mfcc_features = librosa.feature.mfcc(sr=sampling_freq, y=audio)
# print('\nNumber of windows =', mfcc_features.shape[0])
# print('Length of each feature =', mfcc_features.shape[1])

In [None]:
# mfcc_features = mfcc_features.T
# plt.matshow(mfcc_features)
# plt.title('MFCC')

In [None]:
!pip install hmmlearn
!pip install features

In [None]:
import os
import numpy as np
from scipy.io import wavfile
from hmmlearn import hmm #importing GaussianHMM
import librosa # reading wavefilesfrom librosa.feature import mfcc #to extract mfcc features

In [None]:
class HMMTrainer(object):
  def __init__(self, model_name='GaussianHMM', n_components=4):
     self.model_name = model_name
     self.n_components = n_components

     self.models = []
     if self.model_name == 'GaussianHMM':
        self.model=hmm.GaussianHMM(n_components=4)
     else:
        print("Please choose GaussianHMM")

  def train(self, X):
      self.models.append(self.model.fit(X))

  def get_score(self, input_data):
      return self.model.score(input_data)

In [None]:
hmm_models = []
items = list(df_dict.items())
for i in tqdm.tqdm(range(len(items))):
  df = items[i]
  fruit_label = df[0]
  X = np.array([])
  for index, row in df[1].iloc[:-1,:15].iterrows():
    # Read the input file
    audio, sampling_freq = librosa.load(row['path'])
    # Extract MFCC features
    mfcc_features = librosa.feature.mfcc(sr=sampling_freq, y=audio)
    # Append to the variable X
    if len(X) == 0:
      X = mfcc_features[:, :15]  # 15 here denotes the number of MFCC coefficients to consider
    else:
      X = np.append(X, mfcc_features[:, :15], axis=0)
    # Append the label
    # print('X.shape =', X.shape)

  # Train HMM model for this iteration
  hmm_trainer = HMMTrainer()
  hmm_trainer.train(X)  # Train using the current MFCC features
  hmm_models.append((hmm_trainer, fruit_label))


In [None]:
print(len(hmm_models))

In [None]:
test_files = {}

for df in df_dict.items():
  truth_label = df[0]
  selected_test = df[1].iloc[-1,:]
  test_files[truth_label] = selected_test['path']

In [None]:
print(test_files)

In [None]:
for truth_label, path in test_files.items():
  audio, sampling_freq = librosa.load(path)
  # Extract MFCC features
  mfcc_features = librosa.feature.mfcc(sr=sampling_freq, y=audio)
  X = mfcc_features[:, :15]
  scores = []
  for hmm_model, label in hmm_models:
    score = hmm_model.get_score(X)
    scores.append(score)
  index = np.array(scores).argmax()
  # Print the output
  print("\nTrue:", truth_label)
  print("Predicted:", hmm_models[index][1])

In [None]:
#!pip install torchaudio ipywebrtc

In [None]:
# # Download a static FFmpeg build and add it to PATH.
# exist = !which ffmpeg
# if not exist:
#   !curl https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz -o ffmpeg.tar.xz \
#      && tar -xf ffmpeg.tar.xz && rm ffmpeg.tar.xz
#   ffmdir = !find . -iname ffmpeg-*-static
#   path = %env PATH
#   path = path + ':' + ffmdir[0]
#   %env PATH $path
# print('')
# !which ffmpeg
# print('Done!')

In [None]:
# from ipywebrtc import AudioRecorder, CameraStream
# import torchaudio
# from IPython.display import Audio

# from google.colab import output
# output.enable_custom_widget_manager()

In [None]:
# camera = CameraStream(constraints={'audio': True,'video':False})
# recorder = AudioRecorder(stream=camera)
# recorder

In [None]:
# # Write to recording.webm
# with open('recording.webm', 'wb') as f:
#     f.write(recorder.audio.value)

# # Translate recording.wemb to file.wav
# !ffmpeg -i recording.webm -ac 1 -f wav file.wav -y -hide_banner -loglevel panic

# # Extract features from input and predict
# input_audio, sr = librosa.load('file.wav')
# mfcc_features = librosa.feature.mfcc(sr=sr, y=input_audio)
# X = mfcc_features[:, :15]
# scores = []
# for hmm_model, label in hmm_models:
#   score = hmm_model.get_score(X)
#   scores.append(score)
# index = np.array(scores).argmax()
# print("Predicted:", hmm_models[index][1])

# Saving the model


In [None]:
try:
    import cPickle as pickle
except ModuleNotFoundError:
    import pickle

from google.colab import files

In [None]:
def save_object(obj, filename):
    """
    Serialize object and store it in the newly
    created file with the specified file name.

    Note: This works for array as well.
    """

    with open(f"{filename}.pkl", 'wb') as outp:
        pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)

def load_object(filename):
  """
  Load serialized object and return it. Exception will
  be raised if the specified file is missing.
  """

  with open(f"{filename}.pkl", 'rb') as inp:
    return pickle.load(inp)
  print(f"File '{filename}.pkl' not found.")

# ==== Usage Example ====
#
#Save object:
save_object(hmm_models, 'hmm_model')

# Load object:
# foo_obj = load_object('foo')

In [None]:
files.download("hmm_model.pkl")