<a href="https://colab.research.google.com/github/CollinPyxeda/SampleDataAudio/blob/main/Copy_of_audio_data_processing_2_types_of_features.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Mount Google Drive locally

Mount your Google Drive so that files in your drive are available to your python program.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Provide path to the audio files

In [None]:
src_folder = "/content/drive/My Drive/SampleData"

## Function to extract audio features

In [None]:
import librosa
import numpy as np

# As a first step, we extract only mfcc features
def extract_audio_features(y, sr):
  # Feature 1
  mfcc = librosa.feature.mfcc(y, sr)
  mfcc_mean = mfcc.mean(axis=1).T
  mfcc_std = mfcc.std(axis=1).T
  # Feature 2
  stft = librosa.feature.chroma_stft(y, sr)
  stft_mean = stft.mean(axis=1).T
  stft_std = stft.std(axis=1).T
  features = np.hstack([mfcc_mean, mfcc_std, stft_mean, stft_std])
  return list(features)

## Create a csv file from audio files

In [None]:
import os
import pandas as pd
# List all sub-folders in the path
directory_contents = os.listdir(src_folder)

## Initialize a dataframe
# Number of features is 40 by default
num_features = 64
# Name the columns of the dataset mfcc_0, mfcc_1 .. etc
columns = ['mfcc_'+str(a) for a in range(0,num_features)]
# Append an additional column called label, which will save the label of audio file
columns.append('label')
# Create an empty dataframe
df = pd.DataFrame(columns=columns)
# View the empty dataframe
df.head()

In [None]:
# Go over wav files in each subfolder and add to the dataframe
for folder_name in directory_contents:
  print('Processing ', folder_name)
  files_list = [_ for _ in os.listdir(os.path.join(src_folder, folder_name))if _.endswith('.wav')]
  print('Number of files in ', folder_name, ' is ', len(files_list))
  for audio_file in files_list:
    # Path to an individual audio file
    audio_file_path = os.path.join(src_folder, folder_name, audio_file)
    # Load the file using librosa. y is the signal, sr is sampling rate
    y, sr = librosa.load(audio_file_path)
    # Send this to the feature extracting function
    feature = extract_audio_features(y,sr)
    # Append the label
    feature.append(folder_name)
    # Append the dataframe with the new sample
    df = df.append(pd.DataFrame([feature], columns=columns), ignore_index=True)

## Write the csv file

In [None]:
df.to_csv('audio_features.csv', index=False)