# Performing pre-processing on the audio dataset (extracting the Mel-Frequency Cepstral Coefficients (MFCC) features):


In [20]:
#   Function to extract and return relevant features from the audio files.

def extract_features(file_name):
    audio, sample_rate=librosa.load(file_name, res_type="kaiser_fast")  #   Loading the audio file.
    mfcc=librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)   #   Extracting the Mel-Frequency Cepstral Coefficients (MFCC) features.
    mfcc_scaled=np.mean(mfcc.T, axis=0) #   Scaling the Mel-Frequency Cepstral Coefficients (MFCC) features.
    return mfcc_scaled

In [7]:
import os
import pandas as pd
import numpy as np
import librosa
from tqdm import tqdm

# Function to extract MFCC features from audio files
def extract_mfcc(file_path, num_mfcc=20, max_length=500):
    try:
        # Load audio file
        audio, sr = librosa.load(file_path, sr=None)
        # Extract MFCC features
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=num_mfcc)
        # Pad or truncate MFCC features to max_length
        if mfccs.shape[1] < max_length:
            mfccs = np.pad(mfccs, ((0, 0), (0, max_length - mfccs.shape[1])), mode='constant')
        else:
            mfccs = mfccs[:, :max_length]
        # Return MFCC features
        return mfccs.T
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Function to iterate through all files in a folder and its sub-folders
def iterate_files(root_folder):
    for root, dirs, files in os.walk(root_folder):
        for filename in files:
            yield os.path.join(root, filename)

# Path to the root folder containing audio files
root_folder = "fma_large"

# Initialize lists to store flattened features and labels
flat_features = []
labels = []

# Iterate through audio files in the folder and its sub-folders
for file_path in tqdm(iterate_files(root_folder)):
    if file_path.endswith('.mp3'):
        # Extract features
        mfcc = extract_mfcc(file_path)
        if mfcc is not None:
            # Flatten the MFCC features
            flat_mfcc = mfcc.flatten()
            # Append flattened features and corresponding label
            flat_features.append(flat_mfcc)
            labels.append(os.path.basename(file_path).split('.')[0])  # Assuming label is the part before the first dot in the filename

# Convert lists to numpy arrays
flat_features = np.array(flat_features)
labels = np.array(labels)

# Create a DataFrame to store features and labels
df = pd.DataFrame({'Feature': list(flat_features), 'Label': labels})

# Save the DataFrame to a CSV file
df.to_csv('mfcc_features.csv', index=False)


1155it [03:40,  6.99it/s]

Error processing fma_large\001\001486.mp3: 


1681it [05:22,  4.13it/s]

Error processing fma_large\002\002624.mp3: 


1710it [05:27,  7.03it/s]

Error processing fma_large\003\003284.mp3: 


3280it [10:29,  5.14it/s]

Error processing fma_large\005\005574.mp3: 


4349it [14:01,  7.21it/s]

Error processing fma_large\008\008669.mp3: 


5345it [16:59,  6.87it/s]

Error processing fma_large\010\010116.mp3: 


6339it [20:09,  5.51it/s]

Error processing fma_large\011\011583.mp3: 


7306it [23:26,  6.45it/s]

Error processing fma_large\012\012838.mp3: 


7828it [25:09,  5.29it/s]

Error processing fma_large\013\013529.mp3: 


8293it [26:43,  5.83it/s]

Error processing fma_large\014\014116.mp3: 


8344it [26:53,  4.55it/s]

Error processing fma_large\014\014180.mp3: 


11555it [37:38,  4.06it/s]

Error processing fma_large\018\018924.mp3: 


12753it [41:37,  4.97it/s]

Error processing fma_large\020\020814.mp3: 


14002it [45:48,  5.02it/s]

Error processing fma_large\022\022554.mp3: 


14557it [47:37,  5.11it/s]

Error processing fma_large\023\023429.mp3: 
Error processing fma_large\023\023430.mp3: 


14561it [47:38,  7.15it/s]

Error processing fma_large\023\023431.mp3: 


15720it [51:30,  4.59it/s]

Error processing fma_large\025\025173.mp3: 
Error processing fma_large\025\025174.mp3: 
Error processing fma_large\025\025175.mp3: 
Error processing fma_large\025\025176.mp3: 


15727it [51:31,  6.66it/s]

Error processing fma_large\025\025180.mp3: 


18327it [1:00:07, 10.32it/s]

Error processing fma_large\029\029345.mp3: 
Error processing fma_large\029\029346.mp3: 


18334it [1:00:08, 12.01it/s]

Error processing fma_large\029\029352.mp3: 


18338it [1:00:08, 12.42it/s]

Error processing fma_large\029\029356.mp3: 


21315it [1:10:11,  4.48it/s]

Error processing fma_large\033\033391.mp3: 


21322it [1:10:12,  6.66it/s]

Error processing fma_large\033\033411.mp3: 
Error processing fma_large\033\033413.mp3: 
Error processing fma_large\033\033414.mp3: 


21330it [1:10:12, 14.08it/s]

Error processing fma_large\033\033417.mp3: 
Error processing fma_large\033\033418.mp3: 
Error processing fma_large\033\033419.mp3: 


21334it [1:10:13, 10.45it/s]

Error processing fma_large\033\033425.mp3: 


22932it [1:15:37,  2.04it/s]

Error processing fma_large\035\035725.mp3: 


25500it [1:24:13,  5.48it/s]

Error processing fma_large\039\039363.mp3: 


26447it [1:27:26,  6.63it/s]

Error processing fma_large\040\040576.mp3: 


27385it [1:30:35,  4.90it/s]

Error processing fma_large\041\041745.mp3: 


28396it [1:33:59,  7.08it/s]

Error processing fma_large\042\042986.mp3: 


28957it [1:35:54,  7.06it/s]

Error processing fma_large\043\043753.mp3: 


33257it [1:50:39,  6.36it/s]

Error processing fma_large\050\050594.mp3: 


33365it [1:51:01,  5.86it/s]

Error processing fma_large\050\050782.mp3: 


34857it [1:56:10,  6.32it/s]

Error processing fma_large\052\052640.mp3: 


35561it [1:58:35,  6.68it/s]

Error processing fma_large\053\053668.mp3: 


36255it [2:00:58,  7.18it/s]

Error processing fma_large\054\054569.mp3: 


36268it [2:00:59,  9.46it/s]

Error processing fma_large\054\054582.mp3: 


41272it [2:18:10,  6.60it/s]

Error processing fma_large\061\061480.mp3: 


41547it [2:19:08,  5.31it/s]

Error processing fma_large\061\061822.mp3: 


42738it [2:23:13,  6.25it/s]

Error processing fma_large\063\063422.mp3: 


43103it [2:24:29,  6.30it/s]

Error processing fma_large\063\063997.mp3: 


44558it [2:29:30,  4.84it/s]

Error processing fma_large\065\065753.mp3: 


48825it [2:44:16,  6.01it/s]

Error processing fma_large\071\071612.mp3: 


49651it [2:47:06,  7.05it/s]

Error processing fma_large\072\072656.mp3: 


49819it [2:47:41,  5.35it/s]

Error processing fma_large\072\072980.mp3: 


50267it [2:49:15,  4.87it/s]

Error processing fma_large\073\073510.mp3: 


54117it [3:02:36,  6.59it/s]

Error processing fma_large\080\080391.mp3: 


54194it [3:02:52,  6.50it/s]

Error processing fma_large\080\080553.mp3: 


55939it [3:08:56,  4.28it/s]

Error processing fma_large\082\082699.mp3: 


57339it [3:13:44,  4.32it/s]

Error processing fma_large\084\084503.mp3: 
Error processing fma_large\084\084504.mp3: 


57350it [3:13:46,  5.23it/s]

Error processing fma_large\084\084522.mp3: 


57352it [3:13:46,  6.50it/s]

Error processing fma_large\084\084524.mp3: 


59133it [3:20:00,  5.00it/s]

Error processing fma_large\086\086656.mp3: 


59136it [3:20:00,  5.31it/s]

Error processing fma_large\086\086659.mp3: 


59140it [3:20:01,  7.69it/s]

Error processing fma_large\086\086661.mp3: 


59143it [3:20:01,  7.82it/s]

Error processing fma_large\086\086664.mp3: 


59356it [3:20:45,  5.09it/s]

Error processing fma_large\087\087057.mp3: 


60695it [3:25:28,  4.08it/s]

Error processing fma_large\090\090244.mp3: 
Error processing fma_large\090\090245.mp3: 
Error processing fma_large\090\090247.mp3: 
Error processing fma_large\090\090248.mp3: 
Error processing fma_large\090\090250.mp3: 
Error processing fma_large\090\090252.mp3: 
Error processing fma_large\090\090253.mp3: 


60751it [3:25:38,  4.74it/s]

Error processing fma_large\090\090442.mp3: 


60753it [3:25:38,  5.88it/s]

Error processing fma_large\090\090445.mp3: 


61228it [3:27:19,  3.74it/s]

Error processing fma_large\091\091206.mp3: 


62118it [3:30:24,  4.75it/s]

Error processing fma_large\092\092479.mp3: 


62891it [3:33:09,  6.67it/s]

Error processing fma_large\094\094052.mp3: 


63039it [3:33:41,  7.11it/s]

Error processing fma_large\094\094234.mp3: 


63688it [3:35:58,  5.05it/s]

Error processing fma_large\095\095253.mp3: 


64345it [3:38:16,  4.05it/s]

Error processing fma_large\096\096203.mp3: 


64347it [3:38:16,  5.27it/s]

Error processing fma_large\096\096207.mp3: 


64349it [3:38:17,  6.09it/s]

Error processing fma_large\096\096210.mp3: 


65676it [3:43:55,  2.68it/s]

Error processing fma_large\098\098105.mp3: 


66003it [3:45:49,  8.43it/s]

Error processing fma_large\098\098558.mp3: 
Error processing fma_large\098\098559.mp3: 
Error processing fma_large\098\098560.mp3: 
Error processing fma_large\098\098562.mp3: 
Error processing fma_large\098\098565.mp3: 
Error processing fma_large\098\098566.mp3: 
Error processing fma_large\098\098567.mp3: 
Error processing fma_large\098\098568.mp3: 
Error processing fma_large\098\098569.mp3: 
Error processing fma_large\098\098571.mp3: 


66472it [3:48:18,  6.39it/s]

Error processing fma_large\099\099134.mp3: 


67273it [3:51:09,  6.70it/s]

Error processing fma_large\101\101265.mp3: 


67275it [3:51:10,  5.72it/s]

Error processing fma_large\101\101272.mp3: 
Error processing fma_large\101\101275.mp3: 


67584it [3:52:15,  5.03it/s]

Error processing fma_large\102\102241.mp3: 
Error processing fma_large\102\102243.mp3: 


67587it [3:52:15,  7.83it/s]

Error processing fma_large\102\102247.mp3: 


67591it [3:52:16,  8.69it/s]

Error processing fma_large\102\102249.mp3: 


67607it [3:52:19,  6.59it/s]

Error processing fma_large\102\102289.mp3: 


68836it [3:56:39,  4.46it/s]

Error processing fma_large\105\105247.mp3: 


69649it [3:59:32,  4.90it/s]

Error processing fma_large\106\106409.mp3: 


69654it [3:59:33,  6.90it/s]

Error processing fma_large\106\106412.mp3: 


69657it [3:59:33,  7.27it/s]

Error processing fma_large\106\106415.mp3: 


69834it [4:00:12,  6.12it/s]

Error processing fma_large\106\106628.mp3: 


71710it [4:06:49,  3.94it/s]

Error processing fma_large\108\108920.mp3: 


71712it [4:06:50,  5.17it/s]

Error processing fma_large\108\108925.mp3: 


71989it [4:07:46,  5.38it/s]

Error processing fma_large\109\109266.mp3: 


72665it [4:10:06,  6.77it/s]

Error processing fma_large\110\110236.mp3: 


76913it [4:24:33,  4.86it/s]

Error processing fma_large\115\115610.mp3: 


78311it [4:29:37,  6.16it/s]

Error processing fma_large\117\117441.mp3: 


80887it [4:38:43,  5.74it/s]

Error processing fma_large\120\120571.mp3: 


85604it [4:55:40,  4.30it/s]

Error processing fma_large\126\126981.mp3: 


85919it [4:56:46,  4.86it/s]

Error processing fma_large\127\127336.mp3: 


86383it [4:58:25,  4.57it/s]

Error processing fma_large\127\127928.mp3: 


87287it [5:01:36,  4.79it/s]

Error processing fma_large\129\129207.mp3: 


87719it [5:03:08,  5.74it/s]

Error processing fma_large\129\129800.mp3: 


88135it [5:04:36,  6.59it/s]

Error processing fma_large\130\130328.mp3: 


88487it [5:05:50,  4.87it/s]

Error processing fma_large\130\130748.mp3: 


88490it [5:05:51,  5.89it/s]

Error processing fma_large\130\130751.mp3: 


89160it [5:08:12,  4.39it/s]

Error processing fma_large\131\131545.mp3: 


90611it [5:13:22,  4.98it/s]

Error processing fma_large\133\133297.mp3: 


90940it [5:14:31,  5.03it/s]

Error processing fma_large\133\133641.mp3: 


90948it [5:14:33,  6.23it/s]

Error processing fma_large\133\133647.mp3: 


91891it [5:17:53,  4.90it/s]

Error processing fma_large\134\134887.mp3: 


96150it [5:33:03, 25.30it/s]

Error processing fma_large\140\140449.mp3: 
Error processing fma_large\140\140450.mp3: 
Error processing fma_large\140\140451.mp3: 
Error processing fma_large\140\140452.mp3: 
Error processing fma_large\140\140453.mp3: 
Error processing fma_large\140\140454.mp3: 
Error processing fma_large\140\140455.mp3: 
Error processing fma_large\140\140456.mp3: 
Error processing fma_large\140\140457.mp3: 
Error processing fma_large\140\140458.mp3: 
Error processing fma_large\140\140459.mp3: 
Error processing fma_large\140\140460.mp3: 
Error processing fma_large\140\140461.mp3: 
Error processing fma_large\140\140462.mp3: 
Error processing fma_large\140\140463.mp3: 
Error processing fma_large\140\140464.mp3: 
Error processing fma_large\140\140465.mp3: 
Error processing fma_large\140\140466.mp3: 
Error processing fma_large\140\140467.mp3: 
Error processing fma_large\140\140468.mp3: 
Error processing fma_large\140\140469.mp3: 
Error processing fma_large\140\140470.mp3: 
Error processing fma_large\140\1

97524it [5:37:53,  6.29it/s]

Error processing fma_large\142\142614.mp3: 


98651it [5:41:53,  5.85it/s]

Error processing fma_large\143\143992.mp3: 


98871it [5:42:41,  4.86it/s]

Error processing fma_large\144\144518.mp3: 


98965it [5:43:01,  4.75it/s]

Error processing fma_large\144\144619.mp3: 


99386it [5:44:32,  4.92it/s]

Error processing fma_large\145\145056.mp3: 


100174it [5:47:20,  6.14it/s]

Error processing fma_large\146\146056.mp3: 


101221it [5:51:05,  6.17it/s]

Error processing fma_large\147\147419.mp3: 


101226it [5:51:06,  6.45it/s]

Error processing fma_large\147\147424.mp3: 


102370it [5:55:10, 15.51it/s]

Error processing fma_large\148\148786.mp3: 
Error processing fma_large\148\148787.mp3: 
Error processing fma_large\148\148788.mp3: 
Error processing fma_large\148\148789.mp3: 
Error processing fma_large\148\148790.mp3: 
Error processing fma_large\148\148791.mp3: 
Error processing fma_large\148\148792.mp3: 
Error processing fma_large\148\148793.mp3: 
Error processing fma_large\148\148794.mp3: 
Error processing fma_large\148\148795.mp3: 


104381it [6:02:20,  4.22it/s]

Error processing fma_large\151\151920.mp3: 


106373it [6:09:28,  4.57it/s]

Error processing fma_large\155\155051.mp3: 


106574it [6:10:12,  4.80it/s]


PermissionError: [Errno 13] Permission denied: 'mfcc_features.csv'

In [8]:
df

Unnamed: 0,Feature,Label
0,"[-505.49918, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...",000002
1,"[-546.32404, 3.5036364, 3.391655, 3.2107925, 2...",000003
2,"[-528.1163, 0.02087694, 0.020843748, 0.0207859...",000005
3,"[-537.1972, 2.36311, 1.72684, 0.8106421, -0.20...",000010
4,"[-496.54355, 8.233688, 7.9586983, 7.515486, 6....",000020
...,...,...
106396,"[-190.92928, 171.06293, -18.026897, 38.069458,...",155316
106397,"[-153.39252, 129.0062, -16.280928, 47.61666, -...",155317
106398,"[-186.80803, 138.29863, -21.876831, 52.36566, ...",155318
106399,"[-162.47487, 129.75598, -2.489646, 38.75331, 1...",155319


In [11]:
len(df)  #   Checking the number of audio files in the pandas.DataFrame.


106401

In [28]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
new_Df=df
new_Df.head(1)

Unnamed: 0,Feature,Label
0,"[-505.49918, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -369.04953, 106.32715, -3.6316793, -6.880691, 8.136753, 0.504894, 7.810167, 7.916319, -1.8093027, 10.70998, 8.038971, -21.083294, -27.42471, -15.193598, -20.95707, -28.285276, -15.568397, -8.181971, -12.256559, -18.827843, -244.5656, 136.0405, -33.938076, 13.452998, 4.5233684, -0.28139162, -4.9782286, 8.261867, -5.819221, 7.0278664, 6.2641964, -17.067081, -18.82659, -15.8501005, -7.2980537, -20.223766, -4.0527067, -5.8691797, -13.106865, -18.857725, -213.77084, 134.94666, -44.719196, 17.682056, 2.8840108, 1.213661, -6.8941317, 15.03833, -4.91531, 4.844971, 1.9339306, -13.390538, -16.761171, -15.196309, 3.4678988, -12.701963, 1.5036604, -6.2001867, -11.495818, -19.432014, -226.78307, 137.82791, -46.830395, 13.150717, 0.5545795, -2.175369, 4.0567303, 18.691036, -0.77067435, -3.7546992, -2.1105182, -7.9100075, -10.268992, -14.9363365, -1.7742882, -10.833584, -3.1677642, -4.6668434, -12.416085, -15.950317, ...]",2


In [29]:
df.to_pickle("features.pkl") #   Saving the pandas.DataFrame as a pickle (.pkl) file.

In [30]:
import pandas as pd

# Read data from features.pkl into a DataFrame
daaf = pd.read_pickle('features.pkl')

# Print the first line of the DataFrame
print(daaf.head(1))


                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        