In [1]:
!pip install librosa

Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl (260 kB)
                                              0.0/260.7 kB ? eta -:--:--
     -------------------------------------- 260.7/260.7 kB 8.1 MB/s eta 0:00:00
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.0.1-py3-none-any.whl (23 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl (1.0 MB)
                                              0.0/1.0 MB ? eta -:--:--
     ---------------------------------------- 1.0/1.0 MB 32.5 MB/s eta 0:00:00
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.5.0.post1-cp311-cp311-win_amd64.whl (166 kB)
                                              0.0/166.7 kB ? eta -:--:--
     ---------------------------------------- 166.7/166.7 kB ? eta 0:00:00
Installing collected packages: soxr, audioread, soundfile, librosa
Successfully installed audioread-3.0.1 librosa-0.11.0 soundfile-0.13.1 soxr-0.5.0.post1




In [2]:
import librosa
import numpy as np

In [3]:
def extract_all_audio_features(audio_path):
    y, sr = librosa.load(audio_path, sr=None)

    # Tempo and Beat
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)

    # Energy
    rms = librosa.feature.rms(y=y)
    energy = np.mean(rms)

    # Zero Crossing Rate (for Acousticness, Danceability)
    zcr = librosa.feature.zero_crossing_rate(y)
    zcr_mean = np.mean(zcr)

    # Spectral Features
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)

    # MFCC
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs, axis=1)

    # Chroma
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = np.mean(chroma, axis=1)

    # Feature approximations
    danceability = min(1.0, tempo / 200 + (1 - zcr_mean)) / 2
    acousticness = 1 - zcr_mean
    valence = np.mean(chroma_mean)
    instrumentalness = 1 - np.mean(spectral_centroid) / (np.max(spectral_centroid) + 1e-6)
    speechiness = np.mean(zcr)

    return {
        "tempo": float(tempo),
        "energy": float(energy),
        "danceability": float(danceability),
        "acousticness": float(acousticness),
        "valence": float(valence),
        "instrumentalness": float(instrumentalness),
        "speechiness": float(speechiness),
        "zcr_mean": float(zcr_mean),
        "spectral_centroid": float(np.mean(spectral_centroid)),
        "spectral_bandwidth": float(np.mean(spectral_bandwidth)),
        "spectral_rolloff": float(np.mean(spectral_rolloff)),
        "mfccs_mean": mfccs_mean.tolist(),
        "chroma_mean": chroma_mean.tolist(),
    }


In [4]:
extract_all_audio_features('Riptide.wav')

  "class": algorithms.Blowfish,


{'tempo': 102.27272727272727,
 'energy': 0.22227999567985535,
 'danceability': 0.5,
 'acousticness': 0.9351751061410035,
 'valence': 0.419386625289917,
 'instrumentalness': 0.7891563798048146,
 'speechiness': 0.06482489385899645,
 'zcr_mean': 0.06482489385899645,
 'spectral_centroid': 3254.024554556327,
 'spectral_bandwidth': 3741.2585790992925,
 'spectral_rolloff': 6853.425066572681,
 'mfccs_mean': [-124.12195587158203,
  133.31143188476562,
  -15.86912727355957,
  36.422176361083984,
  -10.803810119628906,
  23.340024948120117,
  -7.594180583953857,
  11.352689743041992,
  -0.581663191318512,
  -0.6595330834388733,
  -6.25246000289917,
  1.587208867073059,
  0.06509604305028915],
 'chroma_mean': [0.42346349358558655,
  0.4400040805339813,
  0.41401582956314087,
  0.45718392729759216,
  0.5416852235794067,
  0.5843068361282349,
  0.37975695729255676,
  0.35351577401161194,
  0.39304840564727783,
  0.3431132733821869,
  0.34801459312438965,
  0.35453128814697266]}