In [17]:
%pip install librosa

Collecting librosa
  Downloading librosa-0.10.0.post2-py3-none-any.whl (253 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting audioread>=2.1.9
  Downloading audioread-3.0.0.tar.gz (377 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m377.0/377.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting numba>=0.51.0
  Downloading numba-0.56.4-cp37-cp37m-macosx_10_14_x86_64.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting soundfile>=0.12.1
  Downloading soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting typing-

In [1]:
from os import listdir
from os.path import isfile, join

def get_directory_files(dir, with_extenton=False):

  directoy_files = [f for f in listdir(dir) if isfile(join(dir, f))]

  # remove extension
  if not with_extenton:
    directoy_files = [f.split(".")[0] for f in directoy_files]

  return directoy_files

In [2]:
import re

def note_to_midi(note):
    note_map = {'C': 0, 'C#': 1, 'D': 2, 'D#': 3, 'E': 4, 'F': 5, 'F#': 6, 'G': 7, 'G#': 8, 'A': 9, 'A#': 10, 'B': 11}
    match = re.match(r'([A-G]#?)(\d)v(\d+)', note)
    if match:
        note_name, octave, version = match.groups()
        if version == '1':  # only consider version 1
            midi_number = note_map[note_name] + int(octave) * 12
            return midi_number
    return None  # return None if the note doesn't match the pattern or is not version 1

In [3]:
import librosa
import soundfile as sf

def transpose_audio_file(filepath, semitones):
    # Load the audio file
    y, sr = librosa.load(filepath, sr=None)
    
    # Transpose it by the desired number of semitones
    y_transposed = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=semitones)
    
    # Save the transposed file
    new_filepath = filepath.replace('.wav', f'_transposed{semitones}.wav')
    sf.write(new_filepath, y_transposed, sr)

    return new_filepath



In [7]:
import shutil
import os

def create_lib(original_files):
  new_dir = 'lib'
  os.makedirs(new_dir, exist_ok=True)
  
  # Save existing files in new directory with MIDI number as filename
  for file, midi_num in original_files:
      new_filename = os.path.join(new_dir, f'key-{midi_num:03d}.wav')
      shutil.copyfile(file, new_filename)

  # Fill in missing MIDI numbers by transposing previous .wav file
  for midi_num in range(9, 97):  # MIDI numbers from 9 to 96
      filepath = os.path.join(new_dir, f'key-{midi_num:03d}.wav')
      
      # If the .wav file doesn't exist, transpose the previous .wav file
      if not os.path.exists(filepath):
          previous_filepath = os.path.join(new_dir, f'key-{(midi_num-1):03d}.wav')
          new_filepath = transpose_audio_file(previous_filepath, 1)  # Transpose up by 1 semitone
          os.rename(new_filepath, filepath)  # Rename the new file to have the correct MIDI number

In [8]:
raw_dir = '44.1khz16bit/'
files = get_directory_files(raw_dir, with_extenton=True)
files = [(raw_dir+f, note_to_midi(f)) for f in files]
files = [f for f in files if f[1] is not None]
files = sorted(files, key=lambda x: x[1])
create_lib(files)