In [2]:
pip install librosa essentia

Collecting essentia
  Downloading essentia-2.1b6.dev1110-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.7/13.7 MB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: essentia
Successfully installed essentia-2.1b6.dev1110


In [10]:
import os
import shutil

# Parent directory containing the 10 folders
parent_dir = '/content/drive/MyDrive/genres_original'

# Destination directory where all audio files will be copied
destination_dir = '/content/drive/MyDrive/all_audio'

# Create the destination directory if it doesn't exist
if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

# Iterate over each subfolder in the parent directory
for subdir_name in os.listdir(parent_dir):
    subdir_path = os.path.join(parent_dir, subdir_name)

    # Check if it is a directory
    if os.path.isdir(subdir_path):
        # Iterate over each file in the subfolder
        for file_name in os.listdir(subdir_path):
            # Full file path
            source_file = os.path.join(subdir_path, file_name)

            # Check if it's a file (not a directory)
            if os.path.isfile(source_file):
                # Destination file path
                destination_file = os.path.join(destination_dir, file_name)

                # Copy the file
                shutil.copy(source_file, destination_file)

print("Audio files have been successfully copied to the destination directory.")


Audio files have been successfully copied to the destination directory.


In [9]:
import librosa
import essentia
import essentia.standard as ess
import numpy as np

# Load the audio file using librosa
file_path = '/content/blues.00000.wav'
y, sr = librosa.load(file_path)

# Using librosa to extract features
# Tempo
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
print('Tempo:', tempo)

# Energy (Root Mean Square Energy)
rmse = librosa.feature.rms(y=y)
energy = np.mean(rmse)
print('Energy:', energy)

# Loudness (average perceptual loudness)
S = np.abs(librosa.stft(y))
loudness = librosa.amplitude_to_db(S, ref=np.max)
avg_loudness = np.mean(loudness)
print('Loudness:', avg_loudness)

# Load the audio file using essentia
loader = ess.MonoLoader(filename=file_path)
audio = loader()

# Extract features using Essentia
extractor = ess.MusicExtractor(lowlevelStats=['mean', 'stdev'])

# Extract features
features, feature_metadata = extractor(file_path)

# Print features from Essentia
print('Danceability:', features['rhythm.danceability'])
print('Loudness (Essentia):', features['lowlevel.average_loudness'])


Tempo: [123.046875]
Energy: 0.13018432
Loudness: -59.057514
Danceability: 1.5240553617477417
Loudness (Essentia): 0.9618877172470093


In [1]:
import os
import librosa
import essentia
import essentia.standard as ess
import numpy as np
import pandas as pd

def extract_features(file_path):
    try:
        # Load the audio file using librosa
        y, sr = librosa.load(file_path)

        # Using librosa to extract features
        # Tempo
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

        # Energy (Root Mean Square Energy)
        rmse = librosa.feature.rms(y=y)
        energy = np.mean(rmse)

        # Loudness (average perceptual loudness)
        S = np.abs(librosa.stft(y))
        loudness = librosa.amplitude_to_db(S, ref=np.max)
        avg_loudness = np.mean(loudness)

        # Load the audio file using essentia
        loader = ess.MonoLoader(filename=file_path)
        audio = loader()

        # Extract features using Essentia
        extractor = ess.MusicExtractor(lowlevelStats=['mean', 'stdev'])
        features, feature_metadata = extractor(file_path)

        # Collect features
        feature_dict = {
            'file': file_path,
            'tempo': tempo,
            'energy': energy,
            'loudness_librosa': avg_loudness,
            'danceability': features['rhythm.danceability'],
            'loudness_essentia': features['lowlevel.average_loudness'],
        }

        return feature_dict

    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None

def process_audio_folder(folder_path):
    results = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith(('.wav', '.mp3')):
                file_path = os.path.join(root, file)
                features = extract_features(file_path)
                if features:
                    results.append(features)
                    print(features)  # Print the features for each file
    return results

# Path to the folder containing audio files
folder_path = '/content/drive/MyDrive/all_audio'

# Process the folder and extract features
audio_features = process_audio_folder(folder_path)

# Convert to DataFrame
df = pd.DataFrame(audio_features)

# Display the DataFrame
print(df)


{'file': '/content/drive/MyDrive/all_audio/jazz.00097.wav', 'tempo': array([86.1328125]), 'energy': 0.021176668, 'loudness_librosa': -73.37764, 'danceability': 1.1507781744003296, 'loudness_essentia': 0.7815133929252625}
{'file': '/content/drive/MyDrive/all_audio/jazz.00068.wav', 'tempo': array([73.828125]), 'energy': 0.09949384, 'loudness_librosa': -64.89327, 'danceability': 0.9578545689582825, 'loudness_essentia': 0.6659838557243347}
{'file': '/content/drive/MyDrive/all_audio/jazz.00040.wav', 'tempo': array([99.38401442]), 'energy': 0.13801351, 'loudness_librosa': -63.844234, 'danceability': 1.209382176399231, 'loudness_essentia': 0.9654508233070374}


  y, sr = librosa.load(file_path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error processing /content/drive/MyDrive/all_audio/jazz.00054.wav: 
{'file': '/content/drive/MyDrive/all_audio/jazz.00059.wav', 'tempo': array([107.66601562]), 'energy': 0.18228659, 'loudness_librosa': -63.5907, 'danceability': 1.3352532386779785, 'loudness_essentia': 0.9569924473762512}
{'file': '/content/drive/MyDrive/all_audio/jazz.00071.wav', 'tempo': array([151.99908088]), 'energy': 0.10030232, 'loudness_librosa': -65.64958, 'danceability': 0.9038035869598389, 'loudness_essentia': 0.888090968132019}
{'file': '/content/drive/MyDrive/all_audio/jazz.00045.wav', 'tempo': array([73.828125]), 'energy': 0.10030164, 'loudness_librosa': -66.014694, 'danceability': 0.9038109183311462, 'loudness_essentia': 0.8881118297576904}
{'file': '/content/drive/MyDrive/all_audio/jazz.00051.wav', 'tempo': array([95.703125]), 'energy': 0.069750376, 'loudness_librosa': -66.51852, 'danceability': 1.0147353410720825, 'loudness_essentia': 0.5248919129371643}
{'file': '/content/drive/MyDrive/all_audio/jazz.000

In [2]:
df.to_csv('/content/drive/MyDrive/song_features.csv')

In [None]:
import numpy as np

# Extract pitch (fundamental frequency) using librosa
pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)

# Extract the highest magnitude pitch at each time frame
pitch = [np.max(pitches[:, i]) for i in range(pitches.shape[1])]
pitch = [p if p > 0 else np.nan for p in pitch]  # Replace zeros with NaN for clarity
average_pitch = np.nanmean(pitch)  # Average pitch
print('Average Pitch: {:.2f} Hz'.format(average_pitch))
