In [1]:
from google.colab import drive
import shutil
import os
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# prompt: git clone

!git clone https://github.com/LindaRahaoui/cnn-onset-detection.git


fatal: destination path 'cnn-onset-detection' already exists and is not an empty directory.


In [4]:
%cd /cnnsteph/cnn-onset-detection

/content/cnn-onset-detection


In [2]:
# prompt: run gen_songlist.py

!python /cnnsteph/cnn-onset-detection/gen_songlist.py


python: can't open file 'c:\\cnnsteph\\cnn-onset-detection\\gen_songlist.py': [Errno 2] No such file or directory


In [None]:
!python /cnnsteph/cnn-onset-detection/get_data_stats.py

In [None]:
!python /cnnsteph/cnn-onset-detection/split.py

In [None]:
!python/cnnsteph/cnn-onset-detection/gen_data.py

In [10]:
!python /cnnsteph/cnn-onset-detection/train.py 0

data
model
Cross-va
Fraction of positive examples: 0.028494
Generator done
Training...
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/usr/lib/python3.10/selectors.py", line 416, in select
    fd_event_list = self._selector.poll(timeout)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/content/cnn-onset-detection/train.py", line 142, in <module>
    fold = int(sys.argv[1])  # cmd line argument
  File "/content/cnn-onset-detection/train.py", line 84, in main
    n_train = 0
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 631, in __next__
    data = self._next_data()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1329, in _next_data
    idx, data = self._get_data()
  File "/usr/local/lib/python3.10/dist-packages/torch/uti

# Inference


In [None]:
%cd /content/cnn-onset-detection

In [None]:
import os
import torch
import librosa
import numpy as np
import matplotlib.pyplot as plt
from utils import onsetCNN

def load_model(model_path, device):
    model = onsetCNN().double().to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    return model

def preprocess_audio(audio_path, sr=44100, n_fft=1024, hop_length=441, n_mels=80, fmin=27.5, fmax=16000):
    print("preprocessing")
    y, sr = librosa.load(audio_path, sr=sr)
    mel_spectrogram1 = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, fmin=fmin, fmax=fmax)
    mel_spectrogram2 = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=hop_length, n_mels=n_mels, fmin=fmin, fmax=16000)
    mel_spectrogram3 = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=4096, hop_length=hop_length, n_mels=n_mels, fmin=27.5, fmax=16000)

    mel_spectrogram1_db = librosa.power_to_db(mel_spectrogram1, ref=np.max)
    mel_spectrogram2_db = librosa.power_to_db(mel_spectrogram2, ref=np.max)
    mel_spectrogram3_db = librosa.power_to_db(mel_spectrogram3, ref=np.max)

    return mel_spectrogram1_db, mel_spectrogram2_db, mel_spectrogram3_db

def predict_onsets(model, mel_spectrogram1_db, mel_spectrogram2_db, mel_spectrogram3_db, device, hop_length=441, sr=44100):
    contextlen = 7  # +- frames
    duration = 2 * contextlen + 1
    segment_length = duration  # As used during training
    num_segments = mel_spectrogram1_db.shape[1] - segment_length + 1

    onsets = np.zeros(mel_spectrogram1_db.shape[1])

    with torch.no_grad():
        for i in range(num_segments):
            segment1 = mel_spectrogram1_db[:, i:i+segment_length]
            segment2 = mel_spectrogram2_db[:, i:i+segment_length]
            segment3 = mel_spectrogram3_db[:, i:i+segment_length]
            segment = np.stack([segment1, segment2, segment3], axis=0)
            segment = torch.tensor(segment, dtype=torch.float64).unsqueeze(0).to(device)  # Add batch dimension
            prediction = model(segment).squeeze().cpu().numpy()
            onsets[i:i+segment_length] += prediction
    onsets = onsets * 10
    onsets = np.where(onsets > 0.02, 1, 0)
    return onsets

def plot_onsets(onsets, audio_path, sr=44100, hop_length=441):
    y, sr = librosa.load(audio_path, sr=sr)
    times = np.arange(len(y)) / sr

    plt.figure(figsize=(14, 8))

    plt.subplot(2, 1, 1)
    plt.plot(times, y, label='Waveform')
    plt.title('Audio Waveform')
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    plt.legend()

    onset_times = np.arange(len(onsets)) * hop_length / sr

    plt.subplot(2, 1, 2)
    plt.plot(onset_times, onsets, label='Onset Predictions')

    plt.title('Onset Predictions')
    plt.xlabel('Time (s)')
    plt.ylabel('Onset Confidence')
    plt.legend()

    plt.tight_layout()
    plt.show()

def main(audio_path, model_path,plot=False):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = load_model(model_path, device)
    mel_spectrogram1_db, mel_spectrogram2_db, mel_spectrogram3_db = preprocess_audio(audio_path)
    print("preprocessed")
    onsets = predict_onsets(model, mel_spectrogram1_db, mel_spectrogram2_db, mel_spectrogram3_db, device)
    print("Onsets detection done")
    # save onsets in a file .txt
    save_path = os.path.join(onsets_dir, file_name.replace('.wav', '_onsets.txt'))
    print("Onsets save to : " , save_path)
    np.savetxt(save_path, onsets)
    if plot==True :
      plot_onsets(onsets, audio_path)


if __name__ == "__main__":
    audio_dir = '/content/drive/MyDrive/Rakotozafi'
    model_path = '/content/drive/MyDrive/wav/saved_model_0_49.pt'

     # Créer le dossier Onsets s'il n'existe pas
    onsets_dir = os.path.join(audio_dir, 'Onsets')
    if not os.path.exists(onsets_dir):
        os.makedirs(onsets_dir)

    for file_name in os.listdir(audio_dir):
        if file_name.endswith('.wav'):
             print(file_name)
             audio_path = os.path.join(audio_dir, file_name)
             main(audio_path, model_path)
