# Exploration Notebook

## Specify Data Source

In [None]:
from config import *

In [None]:
dataset_path = DATA_PATH

In [None]:
hungry = dataset_path + 'hungry'

In [None]:
hungry

## Load Data

### Check Contents of Data Sub Folders

In [None]:
import os
import librosa
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
import IPython.display as ipd

In [None]:

audio_files = os.listdir(hungry)
print('Number of audio files in hungry folder: ', len(audio_files))

In [None]:
burping = dataset_path + 'burping'
audio_files = os.listdir(burping)
print('Number of audio files in burping folder: ', len(audio_files))

In [None]:
discomfort = dataset_path + 'discomfort'
audio_files = os.listdir(discomfort)
print('Number of audio files in discomfort folder: ', len(audio_files))

In [None]:
tired = dataset_path + 'tired'
audio_files = os.listdir(tired)
print('Number of audio files in tired folder: ', len(audio_files))

In [None]:
belly_pain = dataset_path + 'belly_pain'
audio_files = os.listdir(belly_pain)
print('Number of audio files in belly_pain folder: ', len(audio_files))

### Load in an Audio Sample to Visualise

#### Load audio file

In [None]:
audio_file = hungry + HUNGRY_TEST_SAMPLE
audio_data, sample_rate = librosa.load(audio_file, sr=None)

In [None]:
print('Audio data: ', audio_data)
print('Sampling rate: ', sample_rate)
print('Audio duration: ', len(audio_data)/sample_rate, 'seconds')


#### Visualise Audio Waveform

In [None]:
plt.plot(audio_data)
plt.title('Female 0 - 4 (hungry)')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

#### Spectrogram to visualise Frequency Content Over Time

In [None]:
X = librosa.stft(audio_data)
Xdb = librosa.amplitude_to_db(abs(X))
librosa.display.specshow(Xdb, sr=sample_rate, x_axis='time', y_axis='hz')
plt.colorbar()
plt.title('Female 0 - 4 (hungry)')
plt.show()


### Compare Two Waveforms from Same Category to Observe Similarities

#### Load Audio File From Same Sub Folder

In [None]:
audio_file2 = hungry + '/0D1AD73E-4C5E-45F3-85C4-9A3CB71E8856-1430742197-1.0-m-04-hu.wav'
y2, sample_rate2 = librosa.load(audio_file, sr=None)

##### Extract MFCCs for Both Audio Files

In [None]:
mfccs1 = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
mfccs2 = librosa.feature.mfcc(y=y2, sr=sample_rate2, n_mfcc=13)

#### Compute cosine similarity and compute Average Similarity

In [None]:
similarity_matrix = cosine_similarity(mfccs1.T, mfccs2.T)
average_similarity = similarity_matrix.mean()

In [None]:
print("Average Cosine Similarity:", average_similarity)
# An average cosine similarity close to 1 means that the two audio files are similar

In [None]:
plt.plot(y2)
plt.title('Male 0 - 4 (hungry)')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

In [None]:
X = librosa.stft(y2)
Xdb = librosa.amplitude_to_db(abs(X))
librosa.display.specshow(Xdb, sr=sample_rate2, x_axis='time', y_axis='hz')
plt.colorbar()
plt.title('Male 0 - 4 (hungry)')
plt.show()

#### Compare a Hungry cry to a Burping cry

In [None]:
burping = dataset_path + 'burping'
audio_file3 = burping + '/F24DE44B-762C-4149-AC92-96A5E57ED118-1430816949-1.0-m-04-bu.wav'
y3, sample_rate3 = librosa.load(audio_file3, sr=None)


In [None]:
mfccs3 = librosa.feature.mfcc(y=y3, sr=sample_rate3, n_mfcc=13)

In [None]:
similarity_matrix = cosine_similarity(mfccs1.T, mfccs3.T)
average_similarity2 = similarity_matrix.mean()

In [None]:
print("Average Cosine Similarity:", average_similarity2)
# An average cosine similarity close to 1 means that the two audio files are similar (in this case, they are not)

In [None]:
print(sample_rate, sample_rate2, sample_rate3)

In [None]:

audio_file4 = burping + '/F24DE44B-762C-4149-AC92-96A5E57ED118-1430816949-1.0-m-04-bu.wav'
original_audio, original_sr = librosa.load(audio_file4, sr=None)

# Upsample the audio to the desired sampling rate
target_sr = 44100  # Example: desired sampling rate
resampled_audio = librosa.resample(original_audio, orig_sr=original_sr, target_sr=target_sr)


In [None]:
# Extract MFCCs from the resampled audio
# https://medium.com/@tanveer9812/mfccs-made-easy-7ef383006040
mfccs = librosa.feature.mfcc(y=resampled_audio, sr=target_sr, n_mfcc=13)

# Visualize the MFCCs
plt.figure(figsize=(10, 6))
librosa.display.specshow(mfccs, x_axis='time')
plt.colorbar()
plt.title('MFCCs after Resampling')
plt.ylabel('MFCC Coefficients')
plt.xlabel('Time')
plt.show()


## Data Augmentation Technique Exploration
* Noise injection
* Time Shift
* Pitch Shift
* Time Compress
* Time Stretch

In [None]:
# before augmentation
plt.figure(figsize=(4, 3))
plt.plot(audio_data)
plt.title('Male 0 - 4 (hungry)')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

In [None]:
# play the original audio
ipd.Audio(audio_data, rate=sample_rate)

### Noise Injection Example ###

* https://medium.com/@makcedward/data-augmentation-for-audio-76912b01fdf6

In [None]:
import numpy as np

def inject_noise(audio_data, noise_factor):
    noise = np.random.randn(len(audio_data))
    augmented_data = audio_data + noise_factor * noise
    # Cast back to same data type
    augmented_data = augmented_data.astype(type(audio_data[0]))
    return augmented_data

In [None]:
augmented_data_noise = inject_noise(audio_data, 0.015)

In [None]:
plt.figure(figsize=(4, 3))
plt.plot(augmented_data_noise)
plt.title('Male 0 - 4 (hungry) - Inject Noise')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

In [None]:
# play the augmented audio
ipd.Audio(augmented_data_noise, rate=sample_rate)

### Time Shift Example

In [None]:
def shift_time(data, sampling_rate, shift_max, shift_direction):
    shift = np.random.randint(sampling_rate * shift_max)
    if shift_direction == 'right':
        shift = -shift
    elif self.shift_direction == 'both':
        direction = np.random.randint(0, 2)
        if direction == 1:
            shift = -shift
    augmented_data = np.roll(data, shift)
    # Set to silence for heading/ tailing
    if shift > 0:
        augmented_data[:shift] = 0
    else:
        augmented_data[shift:] = 0
    return augmented_data

In [None]:
augemented_data_time_shifted = shift_time(audio_data, sample_rate, 0.5, 'right')

In [None]:
plt.figure(figsize=(4, 3))
plt.plot(augemented_data_time_shifted)
plt.title('Male 0 - 4 (hungry) - Data Shifted')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

In [None]:
# play the augmented audio
import IPython.display as ipd
ipd.Audio(augemented_data_time_shifted, rate=sample_rate)


### Pitch Shift Example

In [None]:
from librosa.effects import pitch_shift

In [None]:
shift_pitched_data = pitch_shift(audio_data, sr=sample_rate, n_steps=2)

In [None]:
plt.figure(figsize=(4, 3))
plt.plot(shift_pitched_data)
plt.title('Male 0 - 4 (hungry) - Pitch Shifted')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

In [None]:
# play the augmented audio
ipd.Audio(shift_pitched_data, rate=sample_rate)

### Time Stretch Example

In [None]:
time_stretched_data = librosa.effects.time_stretch(audio_data, rate=0.8)

In [None]:
plt.figure(figsize=(4, 3))
plt.plot(time_stretched_data)
plt.title('Male 0 - 4 (hungry) - Time Stretched')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

In [None]:
# play the augmented audio
ipd.Audio(time_stretched_data, rate=sample_rate)

### Time Compression Example

In [None]:
time_compress_data = librosa.mu_compress(audio_data, mu=63)

In [None]:
time_compress_data2 = librosa.effects.time_stretch(audio_data, rate=1.2)

In [None]:
len(time_compress_data2)

In [None]:
len(audio_data)

In [None]:
len(time_compress_data)

In [None]:
plt.figure(figsize=(4, 3))
plt.plot(time_compress_data)
plt.title('Male 0 - 4 (hungry) - Time Compressed')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

In [None]:
ipd.Audio(time_compress_data, rate=sample_rate)

In [None]:
plt.figure(figsize=(4, 3))
plt.plot(time_compress_data2)
plt.title('Male 0 - 4 (hungry) - Time Compressed')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()


In [None]:
ipd.Audio(time_compress_data2, rate=sample_rate)

#### Stretching using AudioStretchy

In [None]:
import audiostretchy
from audiostretchy.stretch import stretch_audio

stretch_audio(audio_file, "output.wav", ratio=1.1)

In [None]:
aud_stretch, sr = librosa.load("output.wav", sr=None)

In [None]:
plt.figure(figsize=(4, 3))
plt.plot(aud_stretch)
plt.title('Male 0 - 4 (hungry) - Audio Stretched')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

# New Dataset

In [None]:
import librosa

X = librosa.stft(audio_data)
Xdb = librosa.amplitude_to_db(abs(X))
librosa.display.specshow(Xdb, sr=sample_rate, x_axis='time', y_axis='hz')
plt.colorbar()
plt.title('Actor 01 - 03-01-02-01-02-02-01.wav')
plt.show()

In [None]:
root_data = '../data/processed/'

In [None]:
import os

root_data = '../data/processed/'

total_train = sum(len(files) for _, _, files in os.walk(root_data + 'train/'))
total_val = sum(len(files) for _, _, files in os.walk(root_data + 'val/'))
total_test = sum(len(files) for _, _, files in os.walk(root_data + 'test/'))

total_data = total_train + total_val + total_test

print('Total data: ', total_data)
print('Training data: ', total_train)
print('Validation data: ', total_val)
print('Testing data: ', total_test)


In [None]:
import config

wav_files = config.WAV_DIR_PATH

print(wav_files)

In [None]:
# librosa normalises sr anyway

def check_sample_rate(directory):

    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        if os.path.isfile(filepath):
            try:
                y, sr = librosa.load(filepath, sr=None)
                print("File:", filename, "Sample Rate: ", sr)
            except Exception as e:
                print("Error processing", filename, e)

In [None]:
check_sample_rate(wav_files)