In [None]:
import librosa
import os
import pandas as pd
import numpy as np
import parselmouth  
from scipy.signal import lfilter
from scipy.fftpack import fft
from sklearn.preprocessing import StandardScaler

In [3]:
audio_directory = 'D://CyberAI 2024//Task 2'
n_mels = 128 
fmax = None   
feature_list = []
labels = []

In [None]:
for file_name in os.listdir(audio_directory):
    if file_name.endswith('.wav'):
        file_path = os.path.join(audio_directory, file_name)
        
        label = int(file_name[-5])  
        y, sr = librosa.load(file_path, sr=None)
        
        n_fft_value = min(512, max(256, len(y) // 2))
        
        # Extract features
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=n_fft_value)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr, n_fft=n_fft_value)
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr, n_fft=n_fft_value)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=n_fft_value)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr, n_fft=n_fft_value)
        spectral_flatness = librosa.feature.spectral_flatness(y=y, n_fft=n_fft_value)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=n_fft_value)
        mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft_value, n_mels=n_mels, fmax=fmax)
        tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr)

        # Flatten the features
        mfccs_mean = np.mean(mfcc, axis=1)
        chroma_mean = np.mean(chroma, axis=1)
        spectral_contrast_mean = np.mean(spectral_contrast, axis=1)
        spectral_centroid_mean = np.mean(spectral_centroid, axis=1)
        spectral_bandwidth_mean = np.mean(spectral_bandwidth, axis=1)
        spectral_flatness_mean = np.mean(spectral_flatness, axis=1)
        spectral_rolloff_mean = np.mean(spectral_rolloff, axis=1)
        mel_spectrogram_mean = np.mean(mel_spectrogram, axis=1)
        tonnetz_mean = np.mean(tonnetz, axis=1)

        # Combine all features into a single feature vector
        features = np.hstack([mfccs_mean, chroma_mean, spectral_contrast_mean,
                              spectral_centroid_mean, spectral_bandwidth_mean,
                              spectral_flatness_mean, spectral_rolloff_mean,
                              mel_spectrogram_mean, tonnetz_mean])
        
        # Append features and label to the feature list
        feature_list.append(features)
        labels.append(label)

In [6]:
num_mel_spectrogram = mel_spectrogram_mean.shape[0]
num_tonnetz = tonnetz_mean.shape[0]

# Construct columns dynamically
columns = [f'mfcc_{i}' for i in range(1, 14)] + \
          [f'chroma_{i}' for i in range(1, 13)] + \
          [f'spectral_contrast_{i}' for i in range(1, 8)] + \
          ['spectral_centroid', 'spectral_bandwidth', 'spectral_flatness', 'spectral_rolloff'] + \
          [f'mel_spectrogram_{i}' for i in range(1, num_mel_spectrogram + 1)] + \
          [f'tonnetz_{i}' for i in range(1, num_tonnetz + 1)]

columns.append('label')

df = pd.DataFrame(feature_list, columns=columns[:-1]) 
df['label'] = labels  

In [7]:
df.to_csv('task2.csv', index=False,header=True)