!pip install librosa --upgrade scipy 

In [15]:
import os
import pandas as pd
import librosa
import numpy as np
import scipy.stats

# Function to extract features from a single audio file
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)
        
        # Extract MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfccs_mean = np.mean(mfccs, axis=1)
        
        # Extract Chroma Features
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_mean = np.mean(chroma, axis=1)
        
        # Extract Spectral Features
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        spectral_centroid_mean = np.mean(spectral_centroid)
        spectral_bandwidth_mean = np.mean(spectral_bandwidth)
        spectral_contrast_mean = np.mean(spectral_contrast)
        
        # Extract Tempo Features
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        
        # Extract Statistical Features
        mean = np.mean(y)
        std_dev = np.std(y)
        skewness = scipy.stats.skew(y)
        kurtosis = scipy.stats.kurtosis(y)
        
        # Create feature dictionary
        features = {
            **{f'mfcc_{i}': mfcc for i, mfcc in enumerate(mfccs_mean)},
            **{f'chroma_{i}': chroma for i, chroma in enumerate(chroma_mean)},
            'spectral_centroid': spectral_centroid_mean,
            'spectral_bandwidth': spectral_bandwidth_mean,
            'spectral_contrast': spectral_contrast_mean,
            'tempo': tempo,
            'mean': mean,
            'std_dev': std_dev,
            'skewness': skewness,
            'kurtosis': kurtosis
        }
        
        return features
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

# Function to traverse directories and extract features
def process_directory(root_dir):
    data = []
    
    for label in os.listdir(root_dir):
        label_dir = os.path.join(root_dir, label)
        if os.path.isdir(label_dir):
            for file_name in os.listdir(label_dir):
                file_path = os.path.join(label_dir, file_name)
                if file_path.endswith('.wav'):  # Adjust if you use different formats
                    features = extract_features(file_path)
                    if features is not None:
                        features['file_name'] = file_name
                        features['label'] = label
                        data.append(features)
    
    return data

# Directory containing the subdirectories of audio files
root_dir = "genres_original"

# Process the directory and get the feature data
data = process_directory(root_dir)

# Convert the list of features into a DataFrame
df = pd.DataFrame(data)


desired_columns = ['file_name'] + [col for col in df.columns if col != 'file_name']
df = df[desired_columns]

# Expand MFCC and Chroma feature lists into separate columns if needed
mfcc_columns = [f'mfcc_{i}' for i in range(13)]
chroma_columns = [f'chroma_{i}' for i in range(12)]

# Ensure all expected feature columns are present
for column in mfcc_columns:
    if column not in df.columns:
        df[column] = None

for column in chroma_columns:
    if column not in df.columns:
        df[column] = None

# Save the DataFrame to a CSV file
df.to_csv('audio_features.csv', index=False)

print("CSV file has been created successfully.")