## Script for automated video data collection and processing

#### Downloading raw video data (around 100 advertising videos):

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# !rm -rf /content/video_data

In [None]:
! unzip  /content/drive/MyDrive/video_data.zip -d /content/
! unzip  /content/drive/MyDrive/key_moments_base_velocity_8_max_velocity_24.zip -d /content/
! unzip  /content/drive/MyDrive/mid_wav_data.zip -d /content/

In [None]:
! cp /content/drive/MyDrive/wav_to_tempo_orig.json /content
! cp /content/drive/MyDrive/video_key_results_new.json /content
! cp /content/drive/MyDrive/mid_to_tempo_orig.json /content
! cp /content/drive/MyDrive/mid_to_vel_orig.json /content
! cp /content/drive/MyDrive/mid_to_vel_gen.json /content

In [None]:
! cp /content/drive/MyDrive/wav_to_tempo_orig_new.json /content

In [None]:
! mv /content/content/video_to_midi /content
! mv /content/content/video_to_wav /content
! rm -rf /content/content

#### From MP4 to MP3 format:

In [None]:
!pip install pillow os-sys moviepy

In [None]:
from PIL import Image, ImageTk
import moviepy
import moviepy.editor
import os

def MP4toMP3(filename=''):
    video = moviepy.editor.VideoFileClip(filename)
    onlyfilename = os.path.basename(filename)
    audio=video.audio

    aud_fname = ""
    for i in onlyfilename:
        if i == '.':
            break
        else:
            aud_fname = aud_fname + i
    audio.write_audiofile(f'{aud_fname}.mp3')


In [None]:
MP4toMP3('/content/video_data/video_ad1.mp4')

MoviePy - Writing audio in video_ad1.mp3




MoviePy - Done.


#### Excluding voice from mp3 data (leaving only the background music):

In [None]:
! wget https://github.com/tsurumeso/vocal-remover/releases/download/v5.0.2/vocal-remover-v5.0.2.zip
! unzip /content/vocal-remover-v5.0.2.zip

In [None]:
%cd /content

/content


In [None]:
%cd vocal-remover
# !pip install -r requirements.txt

/content/vocal-remover


In [None]:
!python inference.py --input /content/video_data/video_ad1.mp4

#### From MP3 -> MIDI format:

In [None]:
! pip install basic-pitch

In [None]:
! basic-pitch /content /content/vocal-remover/video_ad1_Instruments.wav


✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad1_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_ad1_Instruments_basic_pitch.mid

✨ Done ✨



In [None]:
! mkdir /content/video_to_midi

In [None]:
! mkdir /content/video_to_wav

#### Automated MP4 to MID pipeline:

In [None]:
def video_to_mid(basepath, filename):
  MP4toMP3(str(basepath+filename))
  path_to_mp3 = '/content/vocal-remover/' + filename.split('.')[0] + '.mp3'
  !python inference.py --input {path_to_mp3}
  path_to_music_mp3 = '/content/vocal-remover/'+filename.split('.')[0]+'_Instruments.wav'
  path_to_vocals_mp3 = '/content/vocal-remover/'+filename.split('.')[0]+'_Vocals.wav'
  ! rm -rf {path_to_mp3}
  ! rm -rf {path_to_vocals_mp3}
  ! basic-pitch /content/video_to_midi {path_to_music_mp3}
  old_name = '/content/video_to_midi/'+filename.split('.')[0]+'_Instruments_basic_pitch'+'.mid'
  new_mid_name = '/content/video_to_midi/'+'music_orig_'+filename.split('.')[0].split('_')[1]+'.mid'
  new_wav_name = '/content/video_to_wav/'+'music_orig_'+filename.split('.')[0].split('_')[1]+'.wav'
  ! mv {old_name} {new_mid_name}
  ! mv {path_to_music_mp3} {new_wav_name}


In [None]:
for num in range(1, 85):
  try:
    video_to_mid('/content/video_data/', 'video_ad'+str(num)+'.mp4')
  except:
    pass

MoviePy - Writing audio in video_ad1.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 9/9 [02:30<00:00, 16.70s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad1_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad1_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad2.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 5/5 [01:26<00:00, 17.24s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad2_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad2_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad3.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 5/5 [01:24<00:00, 16.95s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad3_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad3_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad4.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:55<00:00, 18.51s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad4_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad4_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad5.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:49<00:00, 16.64s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad5_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad5_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad6.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:27<00:00, 13.51s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad6_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad6_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad7.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 5/5 [01:29<00:00, 17.91s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad7_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad7_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad8.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 4/4 [01:12<00:00, 18.13s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad8_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad8_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad9.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 5/5 [01:22<00:00, 16.40s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad9_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad9_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad10.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:33<00:00, 16.54s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad10_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad10_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad11.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 6/6 [01:35<00:00, 15.91s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad11_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad11_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad12.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:31<00:00, 15.51s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad12_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad12_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad13.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:50<00:00, 16.74s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad13_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad13_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad14.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 5/5 [01:25<00:00, 17.03s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad14_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad14_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad15.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:28<00:00, 14.45s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad15_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad15_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad16.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [01:02<00:00, 20.71s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad16_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad16_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad17.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 6/6 [01:49<00:00, 18.27s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad17_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad17_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad18.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 6/6 [01:38<00:00, 16.39s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad18_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad18_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad19.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 6/6 [01:38<00:00, 16.48s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad19_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad19_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad20.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:54<00:00, 18.28s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad20_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad20_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad21.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 10/10 [02:56<00:00, 17.69s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad21_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad21_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad22.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:49<00:00, 16.54s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad22_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad22_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad23.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:25<00:00, 12.73s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad23_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad23_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad24.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 6/6 [01:33<00:00, 15.63s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad24_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad24_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad25.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 5/5 [01:24<00:00, 16.94s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad25_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad25_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad26.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 4/4 [01:02<00:00, 15.72s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad26_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad26_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad27.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 1/1 [00:14<00:00, 14.63s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad27_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad27_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad28.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 6/6 [01:42<00:00, 17.02s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad28_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad28_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad29.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:43<00:00, 14.48s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad29_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad29_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad30.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:23<00:00, 11.53s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad30_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad30_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad31.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:31<00:00, 15.99s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad31_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad31_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad32.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 6/6 [01:51<00:00, 18.62s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad32_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad32_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad33.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:51<00:00, 17.28s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad33_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad33_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad34.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:53<00:00, 17.86s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad34_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad34_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad35.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 1/1 [00:19<00:00, 19.50s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad35_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad35_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad36.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 6/6 [01:42<00:00, 17.01s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad36_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad36_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad37.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:51<00:00, 17.11s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad37_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad37_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad38.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 5/5 [01:19<00:00, 15.91s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad38_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad38_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad39.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:26<00:00, 13.05s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad39_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad39_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad40.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:49<00:00, 16.52s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad40_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad40_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad41.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 6/6 [01:37<00:00, 16.25s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad41_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad41_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad42.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 6/6 [01:37<00:00, 16.29s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad42_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad42_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad43.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:52<00:00, 17.36s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad43_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad43_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad44.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:28<00:00, 14.07s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad44_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad44_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad45.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:51<00:00, 17.25s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad45_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad45_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad46.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:51<00:00, 17.09s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad46_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad46_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad47.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:29<00:00, 14.53s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad47_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad47_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad48.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:52<00:00, 17.49s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad48_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad48_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad49.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 4/4 [01:09<00:00, 17.47s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad49_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad49_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad50.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:29<00:00, 14.54s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad50_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad50_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad51.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:29<00:00, 14.84s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad51_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad51_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad52.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 1/1 [00:11<00:00, 11.17s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad52_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad52_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad53.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:30<00:00, 15.24s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad53_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad53_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad54.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:26<00:00, 13.19s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad54_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad54_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad55.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:27<00:00, 13.62s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad55_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad55_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad56.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:52<00:00, 17.47s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad56_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad56_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad57.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:27<00:00, 13.61s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad57_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad57_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad58.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:27<00:00, 13.61s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad58_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad58_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad59.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:50<00:00, 16.85s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad59_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad59_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad60.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 5/5 [01:18<00:00, 15.75s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad60_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad60_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad61.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:33<00:00, 16.96s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad61_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad61_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad62.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:28<00:00, 14.08s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad62_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad62_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad63.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:52<00:00, 17.66s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad63_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad63_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad64.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:28<00:00, 14.35s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad64_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad64_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad65.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:28<00:00, 14.21s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad65_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad65_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad66.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:28<00:00, 14.26s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad66_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad66_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad67.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:33<00:00, 16.70s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad67_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad67_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad68.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:53<00:00, 17.93s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad68_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad68_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad69.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:54<00:00, 18.07s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad69_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad69_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad70.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 1/1 [00:17<00:00, 17.84s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad70_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad70_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad71.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:31<00:00, 15.52s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad71_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad71_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad72.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:45<00:00, 15.23s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad72_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad72_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad73.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:50<00:00, 16.70s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad73_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad73_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad74.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:33<00:00, 16.60s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad74_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad74_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad75.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:28<00:00, 14.24s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad75_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad75_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad76.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:27<00:00, 13.98s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad76_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad76_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad77.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 6/6 [01:39<00:00, 16.54s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad77_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad77_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad78.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:51<00:00, 17.32s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad78_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad78_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad79.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:28<00:00, 14.19s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad79_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad79_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad80.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 1/1 [00:18<00:00, 18.61s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad80_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad80_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad81.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:23<00:00, 11.97s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad81_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad81_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad82.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 2/2 [00:28<00:00, 14.36s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad82_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad82_Instruments_basic_pitch.mid

✨ Done ✨

MoviePy - Writing audio in video_ad83.mp3




MoviePy - Done.
loading model... done
loading wave source... done
stft of wave source... done
100% 3/3 [00:42<00:00, 14.20s/it]
validating output directory... done
inverse stft of instruments... done
inverse stft of vocals... done

✨✨✨✨✨✨✨✨✨
✨ Basic Pitch  ✨
✨✨✨✨✨✨✨✨✨

Importing Tensorflow (this may take a few seconds)...

Predicting MIDI for /content/vocal-remover/video_ad83_Instruments.wav...


  Creating midi...
  💅 Saved to /content/video_to_midi/video_ad83_Instruments_basic_pitch.mid

✨ Done ✨



In [None]:
! zip -r mid_wav_data.zip /content/video_to_midi /content/video_to_wav
! mv mid_wav_data.zip /content/drive/MyDrive

#### From raw videos extracting timings when key events occurred (list of timings for each video):

*Method №1: CNN for action recognition*

In [None]:
!git clone https://github.com/microsoft/computervision-recipes.git
!pip install decord einops
import torch
# Regular Python libraries
import sys
from collections import deque #
import io
import requests
import os
from time import sleep, time
from threading import Thread
from IPython.display import Video

# Third party tools
import decord #
import IPython.display #
# from ipywebrtc import CameraStream, ImageRecorder
from ipywidgets import HBox, HTML, Layout, VBox, Widget, Label
import numpy as np
from PIL import Image
import torch
import torch.cuda as cuda
import torch.nn as nn
from torchvision.transforms import Compose

# utils_cv
sys.path.append("/content/computervision-recipes")
from utils_cv.action_recognition.data import KINETICS, Urls
from utils_cv.action_recognition.dataset import get_transforms
from utils_cv.action_recognition.model import VideoLearner
from utils_cv.action_recognition.references import transforms_video as transforms
from utils_cv.common.gpu import system_info, torch_device
from utils_cv.common.data import data_path

Cloning into 'computervision-recipes'...
remote: Enumerating objects: 6255, done.[K
remote: Counting objects: 100% (486/486), done.[K
remote: Compressing objects: 100% (175/175), done.[K
remote: Total 6255 (delta 359), reused 406 (delta 307), pack-reused 5769[K
Receiving objects: 100% (6255/6255), 260.94 MiB | 16.48 MiB/s, done.
Resolving deltas: 100% (3896/3896), done.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting decord
  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m103.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops
  Downloading einops-0.6.1-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: einops, decord
Successfully installed decord-0.6.0 einops-0.6.1


In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
# Дополнения в коде /content/computervision-recipes/utils_cv/action_recognition/model.py в predict_frames
# top5 = None
# ...
# return top5[0][0] if top5 and len(top5) else ''
# or return (top5[0][0], set([el[0] for el in top5])) if top5 and len(top5) else ('', set())


In [None]:
from collections import defaultdict
import cv2

LABELS = KINETICS.class_names
NUM_FRAMES = 8  # 8 or 32.
IM_SCALE = 128  # resize then crop
INPUT_SIZE = 112  # input clip size: 3 x NUM_FRAMES x 112 x 112
SCORE_THRESHOLD = 0.16
AVERAGING_SIZE = 5

learner = VideoLearner(
    base_model="kinetics",
    sample_length=NUM_FRAMES,
)

transforms = get_transforms(train=False)
# d_caption = IPython.display.display("Preparing...", display_id=2)
l = len(LABELS)
# def update_println(println):
#   d_caption.update(IPython.display.HTML(println))

key_video_moments = defaultdict(list)

def VideoToMoments(video, SCORE_THRESHOLD=0.16):
  cap = cv2.VideoCapture(video)
  FPS = int(cap.get(cv2.CAP_PROP_FPS))
  print('Video: ', video, 'with fps:', FPS)
  video_reader = decord.VideoReader(video)
  window = deque()
  scores_cache = deque()
  scores_sum = np.zeros(len(LABELS))
  w_text = HTML(layout=Layout(padding="0 0 0 100px"))

  cur_frame = 0
  prev_frame = 0
  prev_sets = []
  num = 1
  prev_name = ''
  prev_set = set()
  video_name = video.split('/')[-1]
  while True:
    try:
      frame = video_reader.next().asnumpy()
      window.append(frame)
      def update_println(println):
          w_text.value = println

      if len(window) == NUM_FRAMES:
          a, set_a = learner.predict_frames(
              window,
              scores_cache,
              scores_sum,
              None,
              30,
              SCORE_THRESHOLD,
              LABELS,
              LABELS,
              get_transforms(train=False),
              update_println,
          )
          # print(a, set_a)
          cur_frame_sec = cur_frame//FPS + (cur_frame%FPS)/100
          if len(prev_sets) > FPS:
            prev_cumm_set = set().union(*prev_sets[-FPS:])
          else:
            prev_cumm_set = set().union(*prev_sets)
          if cur_frame == 0 or (a and a not in prev_cumm_set and cur_frame_sec - prev_frame > 0.5):
            key_video_moments[video_name].append(float("{:.2f}".format(cur_frame_sec)))
            print(num, ': Секунда: ', "{:.2f}".format(cur_frame_sec), ', началось действие: ', a)
            num += 1
            prev_frame = cur_frame_sec
            prev_name = a
            prev_set = set_a
            prev_sets.append(prev_set)
      else:
          w_text.value = "Preparing..."
      cur_frame += 1
    except:
      break


Loading r2plus1d_34_8_kinetics model


Using cache found in /root/.cache/torch/hub/moabitcoin_ig65m-pytorch_master


In [None]:
video = '/content/video_data/video_ad6.mp4'
VideoToMoments(video, 0.08)
print(key_video_moments)

Video:  /content/video_data/video_ad6.mp4 with fps: 25
1 : Секунда:  2.13 , началось действие:  blowing glass
2 : Секунда:  3.20 , началось действие:  belly dancing
3 : Секунда:  4.08 , началось действие:  contact juggling
4 : Секунда:  6.15 , началось действие:  bobsledding
5 : Секунда:  7.09 , началось действие:  yoga
6 : Секунда:  8.19 , началось действие:  robot dancing
7 : Секунда:  11.13 , началось действие:  brush painting
defaultdict(<class 'list'>, {'video_ad27.mp4': [1.13, 2.0, 3.2, 4.12], 'video_ad6.mp4': [2.15, 3.2, 4.08, 6.21, 7.09, 8.21, 2.13, 3.2, 4.08, 6.15, 7.09, 8.19, 11.13]})


In [None]:
# import cv2
# cap = cv2.VideoCapture('/content/video_data/video_ad28.mp4')
# FPS = int(cap.get(cv2.CAP_PROP_FPS))
# print("The total number of frames in this video is ", framespersecond)

The total number of frames in this video is  30


In [None]:
from pathlib import Path
key_video_moments = defaultdict(list)
all_paths_vid = list(Path('/content/video_data').absolute().glob('*.mp4'))

for i, path in enumerate(all_paths_vid):
  print('Number ', i, ':')
  video = str(path)
  video_name = video.split('/')[-1]
  VideoToMoments(video, 0.1)
  if len(key_video_moments[video_name]) < 7:
    key_video_moments[video_name] = []
    VideoToMoments(video, 0.08)
  elif len(key_video_moments[video_name]) > 20:
    key_video_moments[video_name] = []
    VideoToMoments(video, 0.23)

Number  0 :
Video:  /content/video_data/video_ad24.mp4 with fps: 24
1 : Секунда:  1.12 , началось действие:  riding mountain bike
2 : Секунда:  7.01 , началось действие:  biking through snow
3 : Секунда:  10.18 , началось действие:  abseiling
4 : Секунда:  11.11 , началось действие:  cleaning windows
5 : Секунда:  13.04 , началось действие:  riding unicycle
6 : Секунда:  15.11 , началось действие:  pushing wheelchair
7 : Секунда:  24.09 , началось действие:  tai chi
8 : Секунда:  25.00 , началось действие:  playing violin
9 : Секунда:  32.03 , началось действие:  motorcycling
10 : Секунда:  43.13 , началось действие:  texting
Number  1 :
Video:  /content/video_data/video_ad23.mp4 with fps: 23
1 : Секунда:  1.13 , началось действие:  driving car
2 : Секунда:  8.09 , началось действие:  paragliding
3 : Секунда:  9.03 , началось действие:  riding mountain bike
Video:  /content/video_data/video_ad23.mp4 with fps: 23
1 : Секунда:  1.13 , началось действие:  driving car
2 : Секунда:  8.09 , 

In [None]:
import json

with open("/content/video_key_results_new.json", "w") as outfile:
    json.dump(key_video_moments, outfile, indent=4, sort_keys=False)
!mv /content/video_key_results.json /content/drive/MyDrive

In [None]:
!mv /content/video_key_results_new.json /content/drive/MyDrive

*Method №2: Key Frame Detector*

In [None]:
!git clone https://github.com/joelibaceta/video-keyframe-detector.git
%cd /content/video-keyframe-detector
!pip install -v -e .
!rm -rv /content/res/keyFrames

In [None]:
%cd ..

/content


In [None]:
# Дополнение к коду: заменить на timeSpans.append(i/30) и verbose=True

In [None]:
!python /content/video-keyframe-detector/cli.py -s /content/video_data/video_ad1.mp4 -d /content/res  -t 0.9

58.266666666666666
60.2
63.1
75.06666666666666
75.36666666666666
81.7
83.7
92.46666666666667
92.53333333333333


#### From raw video to music genre:

In [None]:
GENRES_LIST = (
    'pop', 'jazz', 'rock', 'blues', 'classical', 'country',
    'soul', 'rap', 'latin', 'folk', 'electro'
)

In [None]:
# ! git clone https://github.com/ruohoruotsi/LSTM-Music-Genre-Classification.git
! git clone https://github.com/cetinsamet/music-genre-classification.git

Cloning into 'music-genre-classification'...
remote: Enumerating objects: 135, done.[K
remote: Total 135 (delta 0), reused 0 (delta 0), pack-reused 135[K
Receiving objects: 100% (135/135), 34.11 MiB | 7.67 MiB/s, done.
Resolving deltas: 100% (64/64), done.


In [None]:
# %cd /content/music-genre-classification/src

!python3 get_genre.py /content/video_to_wav/music_orig_ad1.wav

      rock: 	77.78	%
       pop: 	11.11	%
     blues: 	5.56	%
 classical: 	5.56	%


In [None]:
from IPython.display import Audio, display

display(Audio('/content/video_to_wav/music_orig_ad1.wav', autoplay=True))

#### Using markup algorithms for midi data to collect lists of key moments by dynamics, tempo, change of register, and long pauses:

Velocity:

In [None]:
import os
import pandas as pd
import json
from pathlib import Path
from music21 import converter, corpus, instrument, midi, note, chord, pitch
import tqdm
from collections import defaultdict

def open_midi(midi_path, remove_drums):

    mf = midi.MidiFile()
    mf.open(midi_path)
    mf.read()
    mf.close()
    if (remove_drums):
        for i in range(len(mf.tracks)):
            mf.tracks[i].events = [ev for ev in mf.tracks[i].events if ev.channel != 10]

    return midi.translate.midiFileToStream(mf)

mid_vel_orig = defaultdict(list)
def path2vel(path_to_midi, max_vel=0.65):
    path = path_to_midi.absolute()
    curr = open_midi(str(path), True)

    volsRTime = []
    isOpen = False
    waitTime = 2.0
    st, end = 0, 0
    for el in curr.flat.notes:
        if not isOpen and el.volume.getRealized() > max_vel: # relative velocity
            st = round(float(el.offset), 2)
            end = round(float(el.offset), 2)
            isOpen = True

        elif isOpen and el.volume.getRealized() > max_vel:
            end = round(float(el.offset), 2)

        elif isOpen and el.volume.getRealized() <= max_vel:
            if round(float(el.offset), 2) - end > waitTime:
                if end == st or end == 0:
                    volsRTime.append(st)
                else:
                    volsRTime.append(st)
                    volsRTime.append(end)
                isOpen = False
                st, end = 0, 0
    mid_vel_orig[str(path_to_midi).split('/')[-1]] = volsRTime

    print(str(path_to_midi).split('/')[-1], ' : ', volsRTime)
    return path, volsRTime

for idx in range(1, 84):
    _, res = path2vel(Path('/content/video_to_midi/music_orig_ad'+str(idx)+'.mid'), max_vel=0.6)
    if len(res) < 4:
      mid_vel_orig['music_orig_ad'+str(idx)+'.mid'] = []
      path2vel(Path('/content/video_to_midi/music_orig_ad'+str(idx)+'.mid'), max_vel=0.4)


with open("/content/mid_to_vel_orig.json", "w") as outfile:
    json.dump(mid_vel_orig, outfile, indent=4, sort_keys=False)
!cp /content/mid_to_vel_orig.json /content/drive/MyDrive

music_orig_ad1.mid  :  [9.0, 26.0, 40.0, 109.75, 110.75, 117.25, 119.67, 124.33]
music_orig_ad2.mid  :  [9.25, 13.25, 19.75, 23.75, 29.67, 40.25, 42.75, 55.25, 57.25, 65.25, 70.67, 73.5, 76.0, 81.5, 97.67, 101.75, 108.25]
music_orig_ad3.mid  :  [2.25, 17.5, 36.0, 40.75, 43.0, 60.0, 62.5, 64.75, 67.67, 69.75, 72.67, 75.0, 78.75, 86.5, 90.5, 101.0, 103.67, 108.5]
music_orig_ad4.mid  :  []
music_orig_ad4.mid  :  [46.25, 62.33]
music_orig_ad5.mid  :  [33.33, 57.33]
music_orig_ad5.mid  :  [7.0, 11.75, 16.33, 28.33, 32.25, 36.5, 39.0, 45.25]
music_orig_ad6.mid  :  [3.75, 6.25, 26.0]
music_orig_ad6.mid  :  []
music_orig_ad7.mid  :  [1.5, 4.0, 10.33, 10.67, 15.67, 16.0, 20.5, 21.25, 30.67, 32.0, 37.75, 40.67, 44.0, 50.33, 53.0, 62.67, 63.0, 67.33, 69.75, 75.5, 78.0, 82.75, 89.0, 93.5, 96.25, 100.5]
music_orig_ad8.mid  :  [0.25, 9.75, 14.75, 16.0, 18.33, 39.25, 50.5, 52.25, 66.33, 73.5, 74.33, 85.33, 85.67]
music_orig_ad9.mid  :  [30.25, 35.25, 38.33, 54.25, 59.0, 75.25, 75.5]
music_orig_ad10.m

In [None]:
mid_vel_gen = defaultdict(list)
def path2vel(path_to_midi, max_vel=0.65):
    path = path_to_midi.absolute()
    curr = open_midi(str(path), True)

    volsRTime = []
    isOpen = False
    waitTime = 2.0
    st, end = 0, 0
    for el in curr.flat.notes:
        if not isOpen and el.volume.getRealized() > max_vel: # relative velocity
            st = round(float(el.offset), 2)
            end = round(float(el.offset), 2)
            isOpen = True

        elif isOpen and el.volume.getRealized() > max_vel:
            end = round(float(el.offset), 2)

        elif isOpen and el.volume.getRealized() <= max_vel:
            if round(float(el.offset), 2) - end > waitTime:
                if end == st or end == 0:
                    volsRTime.append(st)
                else:
                    volsRTime.append(st)
                    volsRTime.append(end)
                isOpen = False
                st, end = 0, 0
    new_name = 'music_gen_'+str(path_to_midi).split('/')[-1].split('_')[1]+'.midi'
    mid_vel_gen[new_name] = volsRTime

    print(new_name, ' : ', volsRTime)
    return path, volsRTime

for idx in range(1, 51):
    try:
      _, res = path2vel(Path('/content/base_velocity_8_max_velocity_24/video_ad'+str(idx)+'_genre_pop_sentiment_1.midi'), max_vel=0.6)
      if len(res) < 4:
        mid_vel_gen['music_orig_ad'+str(idx)+'.mid'] = []
        path2vel(Path('/content/base_velocity_8_max_velocity_24/video_ad'+str(idx)+'_genre_pop_sentiment_1.midi'), max_vel=0.4)
    except:
      pass

with open("/content/mid_to_vel_gen.json", "w") as outfile:
    json.dump(mid_vel_gen, outfile, indent=4, sort_keys=False)
!cp /content/mid_to_vel_gen.json /content/drive/MyDrive

music_gen_ad1.midi  :  [2.0, 52.25, 80.42, 92.92, 95.33, 99.92, 106.42]
music_gen_ad2.midi  :  [2.33, 4.0, 17.0, 24.5, 84.75, 87.0, 89.67, 91.67, 94.0, 103.5, 111.0, 129.58, 148.08, 151.08, 155.08, 170.5, 180.75, 183.42, 186.0, 193.5, 198.42, 198.75]
music_gen_ad3.midi  :  [11.67, 16.25, 20.25, 21.92, 26.25, 28.83, 33.33, 35.17, 44.83, 58.17, 219.5, 405.42, 417.08, 420.08, 457.42, 460.17, 488.5, 491.83, 494.67, 518.25, 538.75, 542.75]
music_gen_ad5.midi  :  [2.0, 2.67, 5.0, 14.75, 18.0, 32.0, 34.75, 39.0, 44.0, 47.5, 58.5, 61.0]
music_gen_ad6.midi  :  [0.0, 4.33, 6.25, 8.33, 38.58, 60.42, 87.42, 100.67, 127.0, 130.0]
music_gen_ad8.midi  :  [1.75, 8.5, 11.67, 17.0, 25.25, 31.33, 43.5, 47.0, 51.0, 53.33, 56.25, 64.5, 93.5, 97.0, 101.0, 103.5, 105.75, 112.67, 113.75]
music_gen_ad9.midi  :  [1.0, 3.5, 9.0, 9.25, 31.33, 35.58, 41.08, 42.08, 47.0, 49.67, 52.67, 57.33, 72.42, 72.92, 126.92, 137.17, 146.17, 192.0, 196.33, 197.83, 234.83, 253.08, 264.67, 268.33, 271.0, 273.83, 275.83, 286.33]
m

Tempo:

In [None]:
! git clone  https://github.com/CPJKU/madmom.git
%cd madmom
! git submodule update --init --remote
! python setup.py develop --user

In [None]:
%cd madmom

/content/madmom


In [None]:
%cd ..

/content


In [None]:
from madmom.features.beats import RNNBeatProcessor, BeatTrackingProcessor, DBNBeatTrackingProcessor
from madmom.features.tempo import TempoEstimationProcessor
import numpy as np
import json

from collections import defaultdict

wav_to_tempo_orig = defaultdict(list)
def wav_to_tempo(basepath, audio_name, threshold = 0.07):
  audio_file = basepath + audio_name
  # beat_activations = RNNBeatProcessor()(audio_file)
  # beat_processor = BeatTrackingProcessor(fps=100)
  # beats = beat_processor(beat_activations)

  # tempo_changes = []
  # previous_beat_duration = None
  # for i in range(1, len(beats)):
  #     beat_duration = beats[i] - beats[i - 1]
  #     # print(beat_duration)
  #     if previous_beat_duration is None or (not np.isclose(beat_duration, previous_beat_duration, rtol=0.05) and not np.isclose(beats[i], tempo_changes[-1], rtol=0.1)):
  #         tempo_changes.append(beats[i])
  #         previous_beat_duration = beat_duration

  act = RNNBeatProcessor()(audio_file)
  beat_processor = DBNBeatTrackingProcessor(fps=100)
  beats = beat_processor(act)
  tempos = 60. / np.diff(beats)

  tempo_changes = []
  # threshold = 0.07
  for i in range(1, len(tempos)):
      relative_change = np.abs((tempos[i] - tempos[i - 1]) / tempos[i - 1])
      if relative_change > threshold:
          if not tempo_changes or beats[i] - tempo_changes[-1] > 1:
              tempo_changes.append(beats[i])
  print(audio_name, ' : ', tempo_changes)
  wav_to_tempo_orig[audio_name] = tempo_changes

for idx in range(1, 84):
    wav_to_tempo('/content/video_to_wav/', 'music_orig_ad'+str(idx)+'.wav')
    if len(wav_to_tempo_orig['music_orig_ad'+str(idx)+'.wav']) < 4:
      wav_to_tempo_orig['music_orig_ad'+str(idx)+'.wav'] = []
      wav_to_tempo('/content/video_to_wav/', 'music_orig_ad'+str(idx)+'.wav', 0.05)


with open("/content/wav_to_tempo_orig_new.json", "w") as outfile:
    json.dump(wav_to_tempo_orig, outfile, indent=4, sort_keys=False)
!cp /content/wav_to_tempo_orig_new.json /content/drive/MyDrive

Tempo from previous algo based on midi format features:

In [None]:
! pip install mido

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mido
  Downloading mido-1.2.10-py2.py3-none-any.whl (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.1/51.1 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mido
Successfully installed mido-1.2.10


In [None]:
import mido

def get_tempo_changes(midifile, threshold=30):
    tempo_changes = []
    current_tempo = 120
    current_bpm = 120
    current_time = 0
    ticks_per_beat = midifile.ticks_per_beat
    # print('ticks_per_beat: ', ticks_per_beat)
    notes_duration = []

    for track in midifile.tracks:
        time_ticks = 0
        last_tick = 0
        for msg in track:
#             print(msg.type)
#             if msg.type == 'control_change':
#                 print(msg)
            time_ticks += msg.time
            if msg.type == 'set_tempo':
                new_tempo = msg.tempo

                note_ticks = time_ticks - last_tick
                note_duration = note_ticks / ticks_per_beat / 2
                if note_duration == 0:
                    continue
                notes_duration.append(note_duration)

                new_bpm = mido.tempo2bpm(new_tempo)
                if current_tempo is not None and abs(new_bpm-current_bpm) > threshold:
                    if new_bpm > current_bpm:
                        # tempo_changes.append((note_duration, new_bpm, 'faster'))
                        tempo_changes.append(note_duration)
                    else:
                        # tempo_changes.append((note_duration, new_bpm, 'slower'))
                        tempo_changes.append(note_duration)
                current_bpm = new_bpm
                current_tempo = new_tempo
                current_time += note_duration #mido.tick2second(note_duration, midifile.ticks_per_beat, current_tempo)
                last_tick = time_ticks

            elif msg.type == 'note_on' or msg.type == 'note_off':
                if current_tempo is None:
                    continue
                note_ticks = time_ticks - last_tick
                if ticks_per_beat <= 384:
                    c = 2
                elif ticks_per_beat == 192:
                    c = 4
                else:
                    c = 1.2
                note_duration = note_ticks / ticks_per_beat / c
                if note_duration == 0:
                    continue
                notes_duration.append(note_duration)
                if len(notes_duration) >= 10:
                    avg_note_duration = sum(notes_duration[-10:]) / 10
                    new_tempo = 60*1000 / avg_note_duration #* 100
                    new_bpm = mido.tempo2bpm(new_tempo)
#                     if tempo_changes:
#                         print(sum(notes_duration[:-10]) - tempo_changes[-1][0])
                    if current_tempo is not None and abs(new_bpm-current_bpm) > threshold and (len(tempo_changes) == 0 or sum(notes_duration[:-10]) - tempo_changes[-1] > 1):
                        if new_bpm > current_bpm:
                            # tempo_changes.append((sum(notes_duration[:-10]), new_bpm, 'faster'))
                            tempo_changes.append(float("%.2f" % sum(notes_duration[:-10])))
                        else:
                            # tempo_changes.append((sum(notes_duration[:-10]), new_bpm, 'slower'))
                            tempo_changes.append(float("%.2f" % sum(notes_duration[:-10])))
                    current_bpm = new_bpm
                    current_tempo = new_tempo
                current_time += note_duration #mido.tick2second(note_duration, midifile.ticks_per_beat, current_tempo)
                last_tick = time_ticks
    return tempo_changes

# path = '/content/video_to_midi/music_orig_ad10.mid'
# mid = mido.MidiFile(path)
# tempo_changes = get_tempo_changes(mid, 55)
# # print(f'Duration (pretty midi method): {pretty_midi.PrettyMIDI(path).get_end_time():.2f}')

# for i, (tick, bpm, speed) in enumerate(tempo_changes):
#     print(f"{i}: Tempo changed at second {tick:.2f} with new tempo {bpm:.2f}, became {speed}")

mid_to_tempo_orig = defaultdict(list)

for idx in range(1, 84):
    path = '/content/video_to_midi/music_orig_ad'+str(idx)+'.mid'
    mid = mido.MidiFile(path)
    tempo_changes = get_tempo_changes(mid, 55)
    if len(tempo_changes) > 30:
      tempo_changes = get_tempo_changes(mid, 65)
    if len(tempo_changes) < 5:
      tempo_changes = get_tempo_changes(mid, 35)
    print('music_orig_ad'+str(idx)+'.mid',' : ', tempo_changes)
    mid_to_tempo_orig['music_orig_ad'+str(idx)+'.mid'] = tempo_changes

with open("/content/mid_to_tempo_orig.json", "w") as outfile:
    json.dump(mid_to_tempo_orig, outfile, indent=4, sort_keys=False)
!cp /content/mid_to_tempo_orig.json /content/drive/MyDrive

music_orig_ad1.mid  :  [0.0, 4.5, 6.05, 17.63, 35.19, 36.92, 39.77, 41.12, 46.2, 48.25, 50.95, 61.64, 70.01, 72.1, 74.82, 79.14, 81.7, 82.98, 85.62, 91.58, 95.29, 97.66]
music_orig_ad2.mid  :  [2.29, 4.6, 7.77, 11.93, 22.17, 24.32, 26.72, 30.25, 32.47, 40.05, 42.42, 44.57, 53.7, 55.69]
music_orig_ad3.mid  :  [28.98, 31.24, 33.54, 41.25, 42.65, 43.88, 46.52, 48.5, 51.76, 52.89]
music_orig_ad4.mid  :  [0.0, 4.42, 6.9, 8.42, 10.66, 15.75, 17.24, 18.48, 23.12, 26.65, 29.44]
music_orig_ad5.mid  :  [0.0, 3.29, 5.94, 7.97, 10.41, 13.2, 21.46, 24.15, 25.42]
music_orig_ad6.mid  :  [0.0, 6.03, 9.1, 11.45, 12.94]
music_orig_ad7.mid  :  [51.15]
music_orig_ad8.mid  :  [0.0, 1.08, 2.18, 3.23, 34.25]
music_orig_ad9.mid  :  [0.0, 3.0, 42.62, 44.24, 45.4]
music_orig_ad10.mid  :  [0.0, 3.44, 6.17, 8.22, 14.1, 15.44]
music_orig_ad11.mid  :  [11.43, 12.57, 15.95, 17.8, 19.12, 24.76, 26.77, 28.79, 34.36, 36.48, 38.75, 39.75, 41.51, 44.53]
music_orig_ad12.mid  :  [0.67, 3.13, 4.81, 11.15, 12.21]
music_orig_

### Generated MIDI with velocity accents to MP3:


In [None]:
! unzip /content/drive/MyDrive/key_moments_base_velocity_8_max_velocity_24.zip -d /content/

In [None]:
! pip install midi2audio

In [None]:
!sudo apt-get install fluidsynth

Processing triggers for man-db (2.9.1-1) ...
Processing triggers for mime-support (3.64ubuntu1) ...


In [None]:
!mkdir music_vel

Only piano version:

In [None]:
from midi2audio import FluidSynth
from pathlib import Path

all_paths_mid = list(Path('/content/base_velocity_8_max_velocity_24').absolute().glob('*.midi'))

for i, path in enumerate(all_paths_mid):
  items = str(path).split('/')[-1].split('.')[0].split('_')
  isSentiment = items[-1]
  vid_num = items[1]
  if isSentiment == '1':
    audio_file = '/content/music_vel/' + 'music_vel_' + vid_num + '.wav'
    fs = FluidSynth()
    fs.midi_to_audio(path, audio_file)

Converting from midi but with several instruments playing:

In [None]:
# ! pip install pyfluidsynth

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyfluidsynth
  Downloading pyFluidSynth-1.3.2-py3-none-any.whl (19 kB)
Installing collected packages: pyfluidsynth
Successfully installed pyfluidsynth-1.3.2


In [None]:
# ! python setup.py install

python3: can't open file '/content/setup.py': [Errno 2] No such file or directory


In [None]:
! unzip /content/drive/MyDrive/FluidR3_GM.zip -d /content

In [None]:
! pip install midi2audio

In [None]:
from midi2audio import FluidSynth
# import fluidsynth
from pathlib import Path

all_paths_mid = list(Path('/content/base_velocity_8_max_velocity_24').absolute().glob('*.midi'))[:1]

for i, path in enumerate(all_paths_mid):
  items = str(path).split('/')[-1].split('.')[0].split('_')
  isSentiment = items[-1]
  vid_num = items[1]
  if isSentiment == '1':
    audio_file = '/content/' + 'Gmusic_vel_' + vid_num + '.wav'
    # fs = FluidSynth('/content/derive/MyDrive/SGM-v2.01-NicePianosGuitarsBass-V1.2.sf2')
    fs = FluidSynth('/content/Naturally Decaying B-Guitars.sf2')
    # fs = FluidSynth()
    fs.midi_to_audio(path, audio_file)

    # fs = fluidsynth.Synth()
    # fs.start(driver = 'dsound')  # use DirectSound driver
    # sfid = fs.sfload('/content/FluidR3_GM.sf2')
    # fs.program_select(0, sfid, 0, 0)
    # fs.midi_to_audio(path, audio_file)

In [None]:
from IPython.display import Audio, display

display(Audio('/content/Gmusic_vel_ad6.wav', autoplay=True))

In [None]:
display(Audio('/content/music_vel_ad6.wav', autoplay=True))

### Evaluating generated data:


1. Comparing midis / audio (original, generated with MT with additional features, generated with PMT - input similar to REMI format)
- metric evaluation
- evaluation by listening and comparing

2. Comparing lists of key events in video and lists of events in corresponding midi (and list of events in generated midi) with my metric


JSON files with labeled data:
- video_key_results_manual.json (moments from video, manually detected)
- video_key_results_new.json (moments from video)
- mid_to_vel_orig.json (velocity moments from original music)
- mid_to_vel_gen.json (velocity moments from generated music)
- wav_to_tempo_orig.json (tempo moments from original music)
- mid_to_tempo_orig.json (tempo moments from original music with mid algo)
- wav_to_tempo_gen.json (tempo moments from generated music)


In [None]:
from collections import defaultdict

def compliance_score(video_moments, music_moments, alpha=1.0, offset=0.2, eps=0.001, version='MAX', symmetric=False):
  VandM = 0
  VnotM = 0
  MnotV = 0

  i, j = 0, 0
  video_moment_matches = [0] * len(video_moments)
  while i < len(video_moments) and j < len(music_moments):
    if video_moments[i] - offset <= music_moments[j] <= video_moments[i] + eps:
      j += 1
      VandM += 1
      video_moment_matches[i] = 1
      if symmetric:
        i += 1
    elif video_moments[i] - offset > music_moments[j]:
      MnotV += 1
      j += 1
    else:
      if video_moment_matches[i] != 1:
        VnotM += 1
      i += 1

  MnotV += max(0, len(music_moments)-j-1)
  VnotM += max(0, len(video_moments)-i-1)
  # print(VnotM, MnotV, VandM)
  if version == 'SUM':
    res = VandM / (alpha * VnotM + (1/alpha) * MnotV + VandM)
  elif version == 'MAX':
    res = VandM / (max(alpha * VnotM, (1/alpha) * MnotV) + VandM)
  else:
    return 'not correct version'
  return res


In [None]:
import json

with open('/content/video_key_results_new.json') as json_file:
    video_key_results = json.load(json_file)

with open('/content/mid_to_vel_orig.json') as json_file:
    mid_to_vel_orig = json.load(json_file)

with open('/content/mid_to_vel_gen.json') as json_file:
    mid_to_vel_gen = json.load(json_file)

with open('/content/wav_to_tempo_orig_new.json') as json_file:
    wav_to_tempo_orig = json.load(json_file)

with open('/content/mid_to_tempo_orig.json') as json_file:
    mid_to_tempo_orig = json.load(json_file)

Comparing original manually found events from the video (for a small part of the dataset) and selected by the final algorithm (based on action recognition using CNN):

In [None]:
# разметка вручную
res_sum = []
res_max = []
for r, m in zip(vid_real, vid_my):
  # print(len(r))
  res_sum.append(compliance_score(r, m,
                                alpha=1.0,
                                offset=1.9,
                                eps=1.9,
                                version='SUM', symmetric=True))
  res_max.append(compliance_score(r, m,
                                alpha=1.0,
                                offset=1.9,
                                eps=1.9,
                                version='MAX', symmetric=True))
print(res_sum)
print(res_max)
print(sum(res_sum)/len(res_sum), sum(res_max)/len(res_max))

In [None]:
res_sum = []
res_max = []
for r, m in zip(vid_real, vid_pixel):
  # print(len(r))
  res_sum.append(compliance_score(r, m,
                                alpha=1.0,
                                offset=0.9,
                                eps=0.9,
                                version='SUM', symmetric=True))
  res_max.append(compliance_score(r, m,
                                alpha=1.0,
                                offset=0.9,
                                eps=0.9,
                                version='MAX', symmetric=True))
print(res_sum)
print(res_max)
print(sum(res_sum)/len(res_sum), sum(res_max)/len(res_max))

Comparing velocity events from the original background tones and detected key events from the video and searching for the value of the hyperparameter:

In [None]:
# video_key_results, mid_to_vel_orig
CM_sum = []
CM_max = []

for video_name in video_key_results:
  music_name = 'music_orig_'+video_name.split('.')[0].split('_')[-1]+'.mid'
  if music_name in mid_to_vel_orig:
    cm_value_sum = compliance_score(video_key_results[video_name],
                                    mid_to_vel_orig[music_name],
                                    alpha=1.7,
                                    offset=2.5,
                                    eps=0.15,
                                    version='SUM')
    cm_value_max = compliance_score(video_key_results[video_name],
                                    mid_to_vel_orig[music_name],
                                    alpha=1.4,
                                    offset=2.0,
                                    eps=0.15,
                                    version='MAX')
    CM_sum.append(cm_value_sum)
    CM_max.append(cm_value_max)

print('Average CM (SUM) for Video moments VS Velocity moments (original): ', sum(CM_sum)/len(CM_sum))
print('Average CM (MAX) for Video moments VS Velocity moments (original): ', sum(CM_max)/len(CM_max))

Comparing velocity events from generated songs and detected key events from the video:

In [None]:
# video_key_results, mid_to_vel_gen
CM_sum = []
CM_max = []

for video_name in video_key_results:
  music_name = 'music_gen_'+video_name.split('.')[0].split('_')[-1]+'.midi'
  if music_name in mid_to_vel_gen:
    cm_value_sum = compliance_score(video_key_results[video_name],
                                    mid_to_vel_gen[music_name],
                                    alpha=1.3,
                                    offset=3.5,
                                    eps=0.15,
                                    version='SUM')
    cm_value_max = compliance_score(video_key_results[video_name],
                                    mid_to_vel_gen[music_name],
                                    alpha=1.4,
                                    offset=2.0,
                                    eps=0.05,
                                    version='MAX')
    print(cm_value_sum)
    CM_sum.append(cm_value_sum)
    CM_max.append(cm_value_max)

print('Average CM (SUM) for Video moments VS Velocity moments (generated): ', sum(CM_sum)/len(CM_sum))
print('Average CM (MAX) for Video moments VS Velocity moments (generated): ', sum(CM_max)/len(CM_max))

Comparing velocity events from the original background tones and generated songs:

In [None]:
# mid_to_vel_orig, mid_to_vel_gen
CM_sum = []
CM_max = []

for name in mid_to_vel_orig:
  music_name = 'music_gen_'+name.split('_')[-1]+'i'
  if music_name in mid_to_vel_gen:
    cm_value_sum = compliance_score(mid_to_vel_orig[name],
                                    mid_to_vel_gen[music_name],
                                    alpha=1,
                                    offset=2.0,
                                    eps=0.05,
                                    version='SUM', symmetric= True)
    cm_value_max = compliance_score(mid_to_vel_orig[name],
                                    mid_to_vel_gen[music_name],
                                    alpha=1,
                                    offset=2.0,
                                    eps=0.05,
                                    version='MAX')
    CM_sum.append(cm_value_sum)
    CM_max.append(cm_value_max)

print('Average CM (SUM) for Velocity moments (original) VS Velocity moments (generated): ', sum(CM_sum)/len(CM_sum))
print('Average CM (MAX) for Velocity moments (original) VS Velocity moments (generated): ', sum(CM_max)/len(CM_max))

Comparing tempo events from the original background tones and detected key events from the video and searching for the value of the hyperparameter:

In [None]:
# video_key_results, wav_to_tempo_orig
CM_sum = []
CM_max = []

for video_name in video_key_results:
  music_name = 'music_orig_'+video_name.split('.')[0].split('_')[-1]+'.wav'
  if music_name in wav_to_tempo_orig:
    cm_value_sum = compliance_score(video_key_results[video_name],
                                    wav_to_tempo_orig[music_name],
                                    alpha=1.5,
                                    offset=2.5,
                                    eps=1.5,
                                    version='SUM')
    cm_value_max = compliance_score(video_key_results[video_name],
                                    wav_to_tempo_orig[music_name],
                                    alpha=1.4,
                                    offset=2.0,
                                    eps=0.05,
                                    version='MAX')
    print(cm_value_sum)
    if cm_value_sum != 0:
      CM_sum.append(cm_value_sum)
      CM_max.append(cm_value_max)

print('Average CM (SUM) for Video moments VS Tempo moments (original, dl algo): ', sum(CM_sum)/len(CM_sum))
print('Average CM (MAX) for Video moments VS Tempo moments (original, dl algo): ', sum(CM_max)/len(CM_max))

In [None]:
# video_key_results, wav_to_tempo_orig
# здесь посчитаны с последней версией алгоритма
CM_sum = []
CM_max = []

for video_name in video_key_results:
  music_name = 'music_orig_'+video_name.split('.')[0].split('_')[-1]+'.wav'
  if music_name in wav_to_tempo_orig:
    cm_value_sum = compliance_score(video_key_results[video_name],
                                    wav_to_tempo_orig[music_name],
                                    alpha=1.2,
                                    offset=2.5,
                                    eps=0.5,
                                    version='SUM')
    cm_value_max = compliance_score(video_key_results[video_name],
                                    wav_to_tempo_orig[music_name],
                                    alpha=1.4,
                                    offset=2.0,
                                    eps=0.05,
                                    version='MAX')
    CM_sum.append(cm_value_sum)
    CM_max.append(cm_value_max)

print('Average CM (SUM) for Video moments VS Tempo moments (original, dl algo): ', sum(CM_sum)/len(CM_sum))
print('Average CM (MAX) for Video moments VS Tempo moments (original, dl algo): ', sum(CM_max)/len(CM_max))

In [None]:
# key_video_moments = [1.11, 2.21, 5.15, 9.06, 11.12, 13.03, 14.17, 16.23, 20.0, 23.18, 25.24, 30.14, 34.06, 35.03, 39.07, 40.17, 41.16, 43.06]
# key_music_moments_tempo = [0.63, 0.99, 1.38, 2.19, 2.97, 4.45, 6.31, 17.59, 19.46, 28.81, 32.19, 39.67, 43.81, 48.29, 53.44]

# print(compliance_score(key_video_moments, key_music_moments_tempo, alpha=1.4, offset=2.0, eps=0.05, version='SUM'))
# print(compliance_score(key_video_moments, key_music_moments_tempo, alpha=1.4, offset=2.0, eps=0.05, version='MAX'))


Comparing tempo events from the original background tones and detected key events from the video:

In [None]:
# video_key_results, mid_to_tempo_orig
CM_sum = []
CM_max = []

for video_name in video_key_results:
  music_name = 'music_orig_'+video_name.split('.')[0].split('_')[-1]+'.mid'
  if music_name in mid_to_tempo_orig:
    cm_value_sum = compliance_score(video_key_results[video_name],
                                    mid_to_tempo_orig[music_name],
                                    alpha=1.2,
                                    offset=2.5,
                                    eps=0.5,
                                    version='SUM')
    cm_value_max = compliance_score(video_key_results[video_name],
                                    mid_to_tempo_orig[music_name],
                                    alpha=1.4,
                                    offset=2.0,
                                    eps=0.05,
                                    version='MAX')
    CM_sum.append(cm_value_sum)
    CM_max.append(cm_value_max)

print('Average CM (SUM) for Video moments VS Tempo moments (original, midi algo): ', sum(CM_sum)/len(CM_sum))
print('Average CM (MAX) for Video moments VS Tempo moments (original, midi algo): ', sum(CM_max)/len(CM_max))

Comparing tempo events detected from original background tones with different algorithms:

In [None]:
# wav_to_tempo_orig, mid_to_tempo_orig
CM_sum = []
CM_max = []

for name in wav_to_tempo_orig:
  music_name = name.split('.')[0]+'.mid'
  if music_name in mid_to_tempo_orig:
    cm_value_sum = compliance_score(wav_to_tempo_orig[name],
                                    mid_to_tempo_orig[music_name],
                                    alpha=1,
                                    offset=3.5,
                                    eps=3.5,
                                    version='MAX', symmetric=True)
    cm_value_max = compliance_score(wav_to_tempo_orig[name],
                                    mid_to_tempo_orig[music_name],
                                    alpha=1,
                                    offset=2.0,
                                    eps=0.05,
                                    version='MAX')
    CM_sum.append(cm_value_sum)
    CM_max.append(cm_value_max)

print('Average CM (SUM) for Tempo moments (original, dl algo) VS Tempo moments (original, midi algo): ', sum(CM_sum)/len(CM_sum))
print('Average CM (MAX) for Tempo moments (original, dl algo) VS Tempo moments (original, midi algo): ', sum(CM_max)/len(CM_max))