Use librosa and numpy

The following features are commonly used in beat detection algorithms:
1. Tempogram: A time-frequency representation that captures the local tempo (rate of beats) of a signal.
2. Onset strength: The strength of onsets (the beginning of a note or percussive event) in the audio signal. This is useful for detecting the location of beats.

Once we have the features, we can process them to detect beats. There are various algorithms for beat detection, such as dynamic programming-based methods, spectral methods, or autocorrelation methods. One simple approach is to use the librosa.beat module to estimate the tempo and detect beats based on the onset strength:

This script uses the onset strength to estimate the tempo (in BPM) and detect the beat times (in seconds). 

Keep in mind that this is a basic example of beat detection. You may need to experiment with different features and algorithms to achieve the desired accuracy and performance for your specific application.

In [None]:
import librosa
import numpy as np

def extract_features(audio_file):
    y, sr = librosa.load(audio_file)
    
    # Calculate the tempogram
    tempogram = librosa.feature.tempogram(y=y, sr=sr)
    
    # Calculate the onset strength envelope
    onset_strength = librosa.onset.onset_strength(y=y, sr=sr)
    
    return tempogram, onset_strength

audio_file = "path/to/your/audio.wav"
tempogram, onset_strength = extract_features(audio_file)

In [None]:
def detect_beats(onset_strength, sr):
    tempo, beat_frames = librosa.beat.beat_track(onset_strength, sr=sr)
    beat_times = librosa.frames_to_time(beat_frames, sr=sr)
    return tempo, beat_times

tempo, beat_times = detect_beats(onset_strength, sr)
print(f"Estimated tempo: {tempo} BPM")
print("Beat times (seconds):", beat_times)

Use essentia

In [None]:
import essentia.standard as es

def detect_beats_essentia(audio_file):
    # Load audio file
    loader = es.MonoLoader(filename=audio_file)
    audio = loader()

    # Calculate beat positions
    rhythm_extractor = es.RhythmExtractor2013(method="multifeature")
    bpm, beats, _, _ = rhythm_extractor(audio)
    beat_times = beats.tolist()

    return bpm, beat_times

audio_file = "path/to/your/audio.wav"
bpm, beat_times = detect_beats_essentia(audio_file)
print(f"Essentia - Estimated tempo: {bpm} BPM")
print("Essentia - Beat times (seconds):", beat_times)

Use madmom

In [None]:
import madmom

def detect_beats_madmom(audio_file):
    # Calculate beat positions
    proc = madmom.features.beats.DBNBeatTrackingProcessor(fps=100)
    act = madmom.features.beats.RNNBeatProcessor()(audio_file)
    beat_frames = proc(act)

    # Convert beat frames to time (seconds) and estimate tempo
    beat_times = beat_frames / 100.0
    tempo = madmom.features.tempo.tempo_estimate(act, 100)

    return tempo, beat_times

audio_file = "path/to/your/audio.wav"
tempo, beat_times = detect_beats_madmom(audio_file)
print(f"Madmom - Estimated tempo: {tempo} BPM")
print("Madmom - Beat times (seconds):", beat_times)