# **Feature Extraction Methods: Imbalanced Data With Annotations**

- *Key Features*: [MFCCs, Mel-Spectrograms, Chroma Frequencies, RMS Power]
- *Key Manipulations*: [Varying Window Sizes, Normalization, Average Pooling (Compression), Filtering]
- *Process Assistence*: [Converting them to numpy arrays now, easy label access across features]
- *Conversion*: [To numpy arrays and pkl files]


In [1]:
# Standard libraries
import numpy as np
import pandas as pd
import os
import time

# Libraries for audio
from IPython.display import Audio
import librosa
import librosa.display

# Training and Testing Split
from sklearn.model_selection import train_test_split

# for normalization & avgpooling features
import tensorflow as tf
# for preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder

# Operational
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import time
import random
import IPython.display as ipd

In [2]:
# Variabels to be reused
path = 'C:/Users/thato/Documents/Final-Year-Project/Dataset/Project-V4/audio_files' 
npy_path = 'C:/Users/thato/Documents/Final-Year-Project/Dataset/Project-V4/train_audio_npy/' 
train_csv = 'C:/Users/thato/Documents/Final-Year-Project/Dataset/Project-V4/train-not-annotated.csv' 
annotated_train_csv = 'C:/Users/thato/Documents/Final-Year-Project/Dataset/Project-V4/trainval-split/trainval-annotated.csv'
not_annotated_splt = 'C:/Users/thato/Documents/Final-Year-Project/Dataset/Project-V4/trainval-split/trainval.csv'
sr = 22050

In [3]:
trainval_data = pd.read_csv(annotated_train_csv)
train_data = trainval_data[trainval_data['set'] == 'tr']
val_data = trainval_data[trainval_data['set'] == 'val']

# **Creating a class to do the extraction**

In [7]:
class Extraction:
  def __init__(self, train_df, val_df, window_size, overlap=0.5, npy_path=npy_path, sr=sr, n_mels=128, n_mfcc=20, n_chroma=12, n_cqt=84, hoplength=256, features=['mfcc'], normalize=True, avgpool=False):
    """
    Instantiate the Extraction class to extract features.

    Parameters:
      sr (int): Sample rate of the audio files.
      n_mfccs (int): Number of MFCCs to extract.
      n_mels (int): Number of Mel bands to extract.
      n_chroma (int): Number of chroma bins to use.
      n_cqt (int): Number of CQT bins to use.
      features (list): List of features to extract.
        accepted features: 'mfcc', 'chroma', 'cqt', 'melspectrogram'.
      normalize (bool): Whether to normalize the features.
      avgpool (bool): Whether to avgpool the features.
    """
    
    self.train_df = train_df
    self.val_df = val_df
    self.npy_path = npy_path
    self.window_size = window_size
    self.overlap = overlap
    self.sr = sr
    self.n_mels = n_mels
    self.n_mfcc = n_mfcc
    self.n_chroma = n_chroma
    self.n_cqt = n_cqt
    self.hoplength = hoplength

     # confirm features have been specified
    assert len(features) != 0, "Must Specify At Least One Feature In The Form Of A List."
    self.features = features

    self.accepted_feature = ['mfcc', 'chroma', 'cqt', 'melspectrogram']
    for feature in self.features:
      assert feature in self.accepted_feature, f"{feature} is not an accepted feature, only 'mfcc', 'chroma', 'cqt', 'melspectrogram' are accepted features."

    self.normalize = normalize
    self.avgpool = avgpool

    print(f"Train DataFrame shape: {train_df.shape}")
    print(f"Validation DataFrame shape: {val_df.shape}")

    # extract train and val labels and features
    self.train_y, self.train_features, self.train_ids = self.feature_extraction(self.train_df, window_size=self.window_size)
    self.val_y, self.val_features, self.val_ids = self.feature_extraction(self.val_df, window_size=self.window_size)

    # process the features by average pooling
    self.train_features_2D, self.val_features_2D, self.train_features_1D, self.val_features_1D = self.process_features(self.train_features, self.val_features)


  def normalize_audio(self, audio):
    return (audio - np.min(audio)) / (np.max(audio) - np.min(audio))
  
  def generate_pink_noise(self, num_samples):
    white_noise = np.random.randn(num_samples)
    
    # Apply a filter to convert white noise into pink noise (1/f noise)
    X = np.fft.rfft(white_noise)
    S = np.arange(1, len(X) + 1)  # Frequency scaling
    pink_noise = np.fft.irfft(X / S)

    if len(pink_noise) < num_samples:
        # Pad with zeros if the length is less than num_samples
        pink_noise = np.pad(pink_noise, (0, num_samples - len(pink_noise)), mode='constant')
    elif len(pink_noise) > num_samples:
        # Trim if necessary
        pink_noise = pink_noise[:num_samples]
    
    return self.normalize_audio(pink_noise)
  
  def pad_with_noise(self, audio_data, window_length, window_samples):
    current_length = librosa.get_duration(y=audio_data, sr=self.sr)

    if current_length > window_length:
        return audio_data
    
    target_length_samples = int(window_length * sr) 
    current_length_samples = window_samples
    padding_length_samples = target_length_samples - current_length_samples

    assert target_length_samples == (current_length_samples+padding_length_samples)
    
    # Generate pink noise to pad with
    pink_noise = self.generate_pink_noise(padding_length_samples)
    padded_audio = np.concatenate([audio_data, pink_noise])
    # if len(padded_audio) < target_length_samples:
    #     padded_audio = np.append(padded_audio, self.generate_pink_noise(1))

    assert target_length_samples == len(padded_audio)
    
    return padded_audio
  
  def avg_pooling_keras(self, feature):
    # Clear the previous Keras session
    tf.keras.backend.clear_session()

    # Define the input shape based on features
    input_shape = feature.shape[1:]  # (n_mels, time_steps)

    # Create the Keras model for average pooling
    inputs = tf.keras.layers.Input(shape=input_shape)
    pooled = tf.keras.layers.GlobalAveragePooling1D()(inputs)
    pooling_model = tf.keras.models.Model(inputs=inputs, outputs=pooled)

    # Perform pooling using the model
    pooled_features = pooling_model.predict(feature)

    return pooled_features

#-------------------------Feature Extraction---------------------------------------
  def extract_mfcc(self, window):
    mfcc = librosa.feature.mfcc(y=window, sr=self.sr, n_mfcc=self.n_mfcc, hop_length=self.hoplength)
    if self.normalize:
      return librosa.util.normalize(mfcc)
    else:
      return mfcc


  def extract_chroma(self, window):
    chroma = librosa.feature.chroma_stft(y=window, sr=self.sr, n_chroma=self.n_chroma, hop_length=self.hoplength)
    if self.normalize:
      return librosa.util.normalize(chroma)
    else:
      return chroma
   

  def extract_cqt(self, window):
    cqt = librosa.cqt(y=window, sr=sr, hop_length=self.hoplength, n_bins=self.n_cqt)
    cqt_db = librosa.amplitude_to_db(np.abs(cqt), ref=np.max)
    return cqt_db

  def extract_melspectrogram(self, window):
    mel = librosa.feature.melspectrogram(y=window, sr=self.sr, n_mels=self.n_mels, hop_length=self.hoplength)
    mel_db = librosa.power_to_db(mel, ref=np.max)
    if self.normalize:
      return librosa.util.normalize(mel_db)
    else:
      return mel_db
    
  def avgpooling(self, train_X, val_X, n_time, n_features):
    """
    Average pooling the train and val features.

    Parameters:
      train_X (npy): Training feature array of shape (batch_size, n_features, n_time)
      val_X (npy): Validation feature array of shape (batch_size, n_features, n_time)
      n_time (int): Time axis
      n_features (int): Feature axis

    Returns:
      train_X (npy): Avgpooled training feature array of shape (batch_size, n_features)
      val_X (npy): Avgpooled validation feature array of shape (batch_size, n_features)
    """
    # Clear the Keras session
    tf.keras.backend.clear_session()
    
    # Create the Keras input layer with shape (n_features, n_time)
    input_layer = tf.keras.layers.Input(shape=(n_features, n_time))
    
    # Apply average pooling over the time axis (axis=-1) to reduce n_time
    avg_pool = tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1))(input_layer)
    
    # Build the model
    pooling_model = tf.keras.models.Model(inputs=input_layer, outputs=avg_pool)

    # Use the model to apply average pooling on the training and validation features
    train_X = pooling_model.predict(train_X)
    val_X = pooling_model.predict(val_X)

    return train_X, val_X

    
  def process_features(self, train_features_dict, val_features_dict):
    train_copy = train_features_dict.copy()
    val_copy = val_features_dict.copy()
    for each in train_copy.keys():
      
      if each == 'mfcc':
        n_features=self.n_mfcc
      elif each == 'chroma':
        n_features=self.n_chroma
      elif each == 'cqt':
        n_features=self.n_cqt
      elif each == 'melspectrogram':
        n_features=self.n_mels
      
      train_feature = train_copy[each]
      val_feature = val_copy[each]

      if self.avgpool:
        train_copy[each], val_copy[each] = self.avgpooling(train_feature, val_feature, n_time=train_feature.shape[2], n_features=n_features)
      else:
        train_copy[each], val_copy[each] = train_copy[each], val_copy[each]
    
    return train_features_dict, val_features_dict, train_copy, val_copy
      

  def feature_extraction(self, dataframe, window_size):
    y = [] # To hold the labels
    ids = []
    features_dict = {item: [] for item in self.features} # Create a key for each feature listed
    print(f"Number of rows in dataframe: {len(dataframe)}")
    for _, row in tqdm(dataframe.iterrows(), desc="Processing data", total=len(dataframe)):
          label = row['species']
          file_path = os.path.join(self.npy_path, row['filename_npy'])
          id = row['audio_name']
          start = row['start']
          end = row['end']

          # print(f"Processing file: {file_path}")

          try:
            
              audio = np.load(file_path)
          except FileNotFoundError:
              print(f"File not found: {file_path}")
              continue


          start = int(start * sr)
          end = int(end * sr)+512



          if end > len(audio):
             end = len(audio)

          sample = audio[start:end]

          if len(sample) < 512:
                continue

          sample = self.normalize_audio(sample)

          sample = self.pad_with_noise(sample, window_length=self.window_size, window_samples=len(sample))
          # print(len(sample))

          window_samples = int(window_size * self.sr)
          hop_samples = int(window_samples * (1 - self.overlap))  # For overlapping

          # Break the audio into windows with the specified overlap
          audio_windows = librosa.util.frame(sample, frame_length=window_samples, hop_length=hop_samples).T
          
          
          # display(label)
          
          for _, window in enumerate(audio_windows):
              
              y.append(label)
              ids.append(id)

              if len(window) < window_samples:
                  if len(window) < 512*2:
                     continue
                  else:
                      window = self.pad_with_noise(window, window_length=window_size)
              
              # Feature Extraction FR --------------------------------------------------------------------
              # dynatically call the extract_x function to extract the listed features
              for feature in self.features:
                extract = f"extract_{feature}"
                if hasattr(self, extract) and callable(func := getattr(self, extract)):
                  features_dict[feature].append(func(window))

          # cast lists to np arrays
    for each in features_dict.keys():
              features_dict[each] = np.array(features_dict[each])

    y = np.array(y)
    ids = np.array(ids)

          # If not using average pooling, return resized features
    return y, features_dict, ids

## **Window Size = 3s**

In [5]:
features_list = ['melspectrogram', 'mfcc', 'chroma', 'cqt']

In [8]:
features = Extraction(train_data,
                      val_data,
                      window_size=3,
                      features=features_list,
                      avgpool=True
                      )

Train DataFrame shape: (3444, 9)
Validation DataFrame shape: (834, 9)
Number of rows in dataframe: 3444


  return pitch_tuning(
Processing data: 100%|██████████| 3444/3444 [06:05<00:00,  9.42it/s]


Number of rows in dataframe: 834


Processing data: 100%|██████████| 834/834 [01:26<00:00,  9.64it/s]



[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 559us/step
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 465us/step
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 476us/step
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 451us/step
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 859us/step


In [9]:
train_y = features.train_y
val_y = features.val_y

display(train_y.shape)
display(val_y.shape)

(5278,)

(1350,)

In [10]:
# Avgpooled Features
train_features_1D = features.train_features_1D
for key in train_features_1D.keys():
  display(key)
  display(train_features_1D[key].shape)

'melspectrogram'

(5278, 128)

'mfcc'

(5278, 20)

'chroma'

(5278, 12)

'cqt'

(5278, 84)

In [11]:
# Avgpooled Features
val_features_1D = features.val_features_1D
for key in val_features_1D.keys():
  display(key)
  display(val_features_1D[key].shape)

'melspectrogram'

(1350, 128)

'mfcc'

(1350, 20)

'chroma'

(1350, 12)

'cqt'

(1350, 84)

In [12]:
# Not avgpooled Features
train_features_2D = features.train_features_2D
for key in train_features_2D.keys():
  display(key)
  display(train_features_2D[key].shape)

'melspectrogram'

(5278, 128, 259)

'mfcc'

(5278, 20, 259)

'chroma'

(5278, 12, 259)

'cqt'

(5278, 84, 259)

In [13]:
# Not avgpooled Features
val_features_2D = features.val_features_2D
for key in val_features_2D.keys():
  display(key)
  display(val_features_2D[key].shape)

'melspectrogram'

(1350, 128, 259)

'mfcc'

(1350, 20, 259)

'chroma'

(1350, 12, 259)

'cqt'

(1350, 84, 259)

In [14]:
train_ids = features.train_ids
val_ids = features.val_ids

### Encode Classes

In [15]:
label_encoder = LabelEncoder().fit(train_y)
train_y_encoded = label_encoder.transform(train_y)
val_y_encoded = label_encoder.transform(val_y)

classes = list(label_encoder.inverse_transform([0, 1, 2]))
print("Encoded classes for [0, 1, 2]:", classes)
print("Encoded training labels:", train_y_encoded)
print("Encoded validation labels:", val_y_encoded)

Encoded classes for [0, 1, 2]: ['Acrocephalus arundinaceus', 'Acrocephalus melanopogon', 'Acrocephalus scirpaceus']
Encoded training labels: [ 9  9  9 ... 17 17 17]
Encoded validation labels: [14 14 14 ...  4  4  4]


In [16]:
# Avg Pooled

train_features_1D['label'] = train_y_encoded
val_features_1D['label'] = val_y_encoded

train_features_1D['id'] = train_ids
val_features_1D['id'] = val_ids

# Not Avg Pooled

train_features_2D['label'] = train_y_encoded
val_features_2D['label'] = val_y_encoded

train_features_2D['id'] = train_ids
val_features_2D['id'] = val_ids

In [17]:
merged_dict_1D = {'train': train_features_1D, 'val': val_features_1D}
merged_dict_1D

{'train': {'melspectrogram': array([[-0.01761791, -0.28131086, -0.26533863, ..., -0.9942341 ,
          -0.99861884, -1.        ],
         [-0.01783594, -0.28790727, -0.27350605, ..., -0.9952871 ,
          -0.999275  , -1.        ],
         [-0.01663349, -0.23677032, -0.2753801 , ..., -0.9895009 ,
          -0.9974265 , -1.        ],
         ...,
         [-0.08988359, -0.4680023 , -0.5110256 , ..., -0.9615641 ,
          -0.9693968 , -0.99108076],
         [-0.0954483 , -0.56289166, -0.5976204 , ..., -0.94434714,
          -0.95767987, -0.99251   ],
         [-0.08350353, -0.4676744 , -0.5083498 , ..., -0.9486351 ,
          -0.9575719 , -0.990598  ]], dtype=float32),
  'mfcc': array([[-0.9992188 ,  0.552929  , -0.3020308 , ...,  0.02957186,
           0.06612163,  0.02512584],
         [-1.        ,  0.54670596, -0.30596924, ...,  0.0395895 ,
           0.06703314,  0.02110985],
         [-0.9954227 ,  0.5823921 , -0.33912998, ...,  0.01689669,
           0.07941275,  0.04006654]

In [18]:
merged_dict_2D = {'train': train_features_2D, 'val': val_features_2D}
merged_dict_2D

{'train': {'melspectrogram': array([[[-0.01838665,  0.        , -0.00848863, ..., -0.0140964 ,
           -0.00224623, -0.00832028],
          [-0.1968215 , -0.21882324, -0.27436921, ..., -0.28576213,
           -0.22507286, -0.19316849],
          [-0.23742069, -0.24905342, -0.28791261, ..., -0.28030974,
           -0.26599282, -0.25660488],
          ...,
          [-0.99879366, -0.99876946, -0.99870896, ..., -0.99917048,
           -0.99937928, -0.99937975],
          [-0.9994275 , -0.99943322, -0.99946558, ..., -0.99974817,
           -0.99971521, -0.99969888],
          [-1.        , -1.        , -1.        , ..., -1.        ,
           -1.        , -1.        ]],
  
         [[-0.01671414,  0.        , -0.0082565 , ..., -0.01512463,
           -0.00132613, -0.0053683 ],
          [-0.17772998, -0.19626212, -0.24940659, ..., -0.30918679,
           -0.24149542, -0.20524147],
          [-0.23100212, -0.23447959, -0.25692546, ..., -0.26929191,
           -0.2456764 , -0.23258191],


### Save the merged dictionary to a pkl

In [20]:
# Avrg Pooled
with open('C:/Users/thato/Documents/Final-Year-Project/Dataset/Final-Version-Of-Bird-Classification-Project/feature-extraction/Annotated/Regular/AveragePooled/split_features_3s_all_1D.pkl', 'wb') as file:
  pickle.dump(merged_dict_1D, file)
del file

In [21]:
# Avrg Pooled
with open('C:/Users/thato/Documents/Final-Year-Project/Dataset/Final-Version-Of-Bird-Classification-Project/feature-extraction/Annotated/Regular/NotAveragePooled/split_features_3s_all_2D.pkl', 'wb') as file:
  pickle.dump(merged_dict_2D, file)
del file

## **Window Size = 1s**

In [22]:
features_list = ['melspectrogram', 'mfcc', 'chroma', 'cqt']

In [23]:
features = Extraction(train_data,
                      val_data,
                      window_size=1,
                      features=features_list,
                      avgpool=True
                      )

Train DataFrame shape: (3444, 9)
Validation DataFrame shape: (834, 9)
Number of rows in dataframe: 3444


  return pitch_tuning(
Processing data: 100%|██████████| 3444/3444 [10:12<00:00,  5.62it/s] 


Number of rows in dataframe: 834


Processing data: 100%|██████████| 834/834 [02:17<00:00,  6.06it/s]


[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 861us/step
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 808us/step
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 534us/step
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 546us/step
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 525us/step
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 505us/step
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 755us/step
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 720us/step


In [24]:
train_y = features.train_y
val_y = features.val_y

display(train_y.shape)
display(val_y.shape)

(12565,)

(3318,)

In [25]:
# Avgpooled Features
train_features_1D = features.train_features_1D
for key in train_features_1D.keys():
  display(key)
  display(train_features_1D[key].shape)

'melspectrogram'

(12565, 128)

'mfcc'

(12565, 20)

'chroma'

(12565, 12)

'cqt'

(12565, 84)

In [26]:
# Avgpooled Features
val_features_1D = features.val_features_1D
for key in val_features_1D.keys():
  display(key)
  display(val_features_1D[key].shape)

'melspectrogram'

(3318, 128)

'mfcc'

(3318, 20)

'chroma'

(3318, 12)

'cqt'

(3318, 84)

In [27]:
# Not avgpooled Features
train_features_2D = features.train_features_2D
for key in train_features_2D.keys():
  display(key)
  display(train_features_2D[key].shape)

'melspectrogram'

(12565, 128, 87)

'mfcc'

(12565, 20, 87)

'chroma'

(12565, 12, 87)

'cqt'

(12565, 84, 87)

In [28]:
# Not avgpooled Features
val_features_2D = features.val_features_2D
for key in val_features_2D.keys():
  display(key)
  display(val_features_2D[key].shape)

'melspectrogram'

(3318, 128, 87)

'mfcc'

(3318, 20, 87)

'chroma'

(3318, 12, 87)

'cqt'

(3318, 84, 87)

In [29]:
train_ids = features.train_ids
val_ids = features.val_ids

### Encode Classes

In [30]:
label_encoder = LabelEncoder().fit(train_y)
train_y_encoded = label_encoder.transform(train_y)
val_y_encoded = label_encoder.transform(val_y)

classes = list(label_encoder.inverse_transform([0, 1, 2]))
print("Encoded classes for [0, 1, 2]:", classes)
print("Encoded training labels:", train_y_encoded)
print("Encoded validation labels:", val_y_encoded)

Encoded classes for [0, 1, 2]: ['Acrocephalus arundinaceus', 'Acrocephalus melanopogon', 'Acrocephalus scirpaceus']
Encoded training labels: [ 9  9  9 ... 17 17 17]
Encoded validation labels: [14 14 14 ...  4  4  4]


In [31]:
# Avg Pooled

train_features_1D['label'] = train_y_encoded
val_features_1D['label'] = val_y_encoded

train_features_1D['id'] = train_ids
val_features_1D['id'] = val_ids

# Not Avg Pooled

train_features_2D['label'] = train_y_encoded
val_features_2D['label'] = val_y_encoded

train_features_2D['id'] = train_ids
val_features_2D['id'] = val_ids

In [32]:
merged_dict_1D = {'train': train_features_1D, 'val': val_features_1D}
merged_dict_1D

{'train': {'melspectrogram': array([[-0.01898465, -0.2811592 , -0.25845313, ..., -0.9981291 ,
          -0.9983617 , -1.        ],
         [-0.01670736, -0.27276382, -0.2651618 , ..., -0.9903572 ,
          -0.9974626 , -1.        ],
         [-0.01776626, -0.26800779, -0.26286083, ..., -0.9897578 ,
          -0.9985065 , -0.9999572 ],
         ...,
         [-0.07289265, -0.5997221 , -0.6325296 , ..., -0.9177227 ,
          -0.93674475, -0.99452364],
         [-0.01720924, -0.9334746 , -0.9393428 , ..., -0.8331133 ,
          -0.8683263 , -0.98977023],
         [-0.06566753, -0.6877171 , -0.70655054, ..., -0.8833477 ,
          -0.89263326, -0.9825436 ]], dtype=float32),
  'mfcc': array([[-9.9767429e-01,  5.7248688e-01, -2.8398493e-01, ...,
           1.3116055e-02,  6.1448671e-02,  3.6781926e-02],
         [-9.9636453e-01,  5.7445627e-01, -2.7706087e-01, ...,
           2.2723455e-02,  6.6032313e-02,  2.8396677e-02],
         [-1.0000000e+00,  5.6324875e-01, -2.5171626e-01, ...,
   

In [33]:
merged_dict_2D = {'train': train_features_2D, 'val': val_features_2D}
merged_dict_2D

{'train': {'melspectrogram': array([[[-2.10519694e-02, -2.64618150e-03, -1.09052584e-02, ...,
           -1.14497980e-02,  0.00000000e+00, -1.33858034e-02],
          [-1.99002311e-01, -2.20890388e-01, -2.76137829e-01, ...,
           -2.59782881e-01, -2.09464088e-01, -1.87755451e-01],
          [-2.39491284e-01, -2.51040578e-01, -2.89648235e-01, ...,
           -2.82749534e-01, -2.52160460e-01, -2.39700243e-01],
          ...,
          [-9.98796940e-01, -9.98772740e-01, -9.98712122e-01, ...,
           -9.96163070e-01, -9.97696698e-01, -9.98156428e-01],
          [-9.99429047e-01, -9.99434769e-01, -9.99466956e-01, ...,
           -9.99509513e-01, -9.99450743e-01, -9.99419093e-01],
          [-1.00000000e+00, -1.00000000e+00, -1.00000000e+00, ...,
           -1.00000000e+00, -1.00000000e+00, -1.00000000e+00]],
  
         [[-1.80637967e-02, -3.36689858e-08, -7.33598135e-03, ...,
           -1.05369762e-02, -8.22041358e-04, -1.49706351e-02],
          [-1.93169340e-01, -2.11562306e-01,

### Save the merged dictionary to a pkl

In [34]:
# Avrg Pooled
with open('C:/Users/thato/Documents/Final-Year-Project/Dataset/Final-Version-Of-Bird-Classification-Project/feature-extraction/Annotated/Regular/AveragePooled/split_features_1s_all_1D.pkl', 'wb') as file:
  pickle.dump(merged_dict_1D, file)
del file

In [35]:
# Avrg Pooled
with open('C:/Users/thato/Documents/Final-Year-Project/Dataset/Final-Version-Of-Bird-Classification-Project/feature-extraction/Annotated/Regular/NotAveragePooled/split_features_1s_all_2D.pkl', 'wb') as file:
  pickle.dump(merged_dict_2D, file)
del file