## Generate single instrument soundtrack

In [1]:
from music21 import instrument

# Instrument list
instruments = {"Piano": instrument.Piano(),
               "Harpsichord": instrument.Harpsichord(),
               "Glockenspiel": instrument.Glockenspiel(),
               "Xylophone": instrument.Xylophone(),
               "ElectricOrgan": instrument.ElectricOrgan(),
               "Harmonica": instrument.Harmonica(),
               "AcousticGuitar": instrument.AcousticGuitar(),
               "ElectricGuitar": instrument.ElectricGuitar(),
               "ElectricBass": instrument.ElectricBass(),
               "Violin": instrument.Violin(),
               "Viola": instrument.Viola(),
               "Violoncello": instrument.Violoncello(),
               "Harp": instrument.Harp(),
               "Timpani": instrument.Timpani(),
               "Trombone": instrument.Trombone(),
               "Tuba": instrument.Tuba(),
               "SopranoSaxophone": instrument.SopranoSaxophone(),
               "Flute": instrument.Flute(),
               "Koto": instrument.Koto(),
               "Soprano": instrument.Soprano()}

# Tmpo list
tmpo_fctr = {"original": 1.00,
             "x125": 1.25,
             "x150": 1.50,
             "x075": 0.75,
             "x050": 0.50}
# Key
keys = [-2, -1, 0, 1, 2]

# major conversions
majors = dict([("A-", 4), ("A", 3), ("B-", 2), ("B", 1), ("C", 0), ("D-", -1),
              ("D", -2), ("E-", -3), ("E", -4), ("F", -5), ("G-", 6), ("G", 5)])
minors = dict([("A-", 1), ("A", 0), ("B-", -1), ("B", -2), ("C", -3),
              ("D-", -4), ("D", -5), ("E-", 6), ("E", 5), ("F", 4), ("G-", 3), ("G", 2)])

## Synthesize MIDI to audio (mp3)

In [2]:
from midi2audio import FluidSynth


def midi_to_audio(input_midi, output_audio):
    '''
    Using the default sound font in 44100 Hz sample rate
    '''
    fs = FluidSynth()
    fs.midi_to_audio(input_midi, output_audio)


## Generate single instrument soundtrack

In [3]:
from music21 import converter
from tqdm import tqdm
from pathlib import Path


_TARGET_AUDIO_PATH = rf'audio'
_MIDI_PATH = rf'audio/tmp'
_AUDIO_PATH = rf'audio/instruments'
_MID = rf'.midi'
_MP3 = rf'.mp3'
_MIDI_SOLO_CHOIR = rf'HighwayToHell_solo_choir'

score = converter.parse(Path(_TARGET_AUDIO_PATH,f'{_MIDI_SOLO_CHOIR}{_MID}'))

# Create instrument
for inst_name, inst_obj in tqdm(instruments.items()):
    score.parts[0].insert(0, inst_obj)
    # With diferent tempo
    # scale (in this case stretch) the overall tempo by this factor
    for tmpo_name, tempo in tmpo_fctr.items():
        mod_score = score.scaleOffsets(tempo).scaleDurations(tempo)

        # With diferent key
        for halfstep in keys:
            _SOLO_MIDI = ''
            if (halfstep == 0) and tmpo_name == 'original':
                _SOLO_MIDI = rf'HighwayToHell_solo_{inst_name}_{tmpo_name}{_MID}'
                mod_score.write('midi', Path(_MIDI_PATH, _SOLO_MIDI))
            else:
                new_key_score = mod_score.transpose(halfstep)
                key = new_key_score.analyze('key')
                _SOLO_MIDI = rf'HighwayToHell_solo_{inst_name}_{tmpo_name}_{key.tonic}{key.mode}{_MID}'
                new_key_score.write('midi', Path(_MIDI_PATH, _SOLO_MIDI))

            midi_to_audio(Path(_MIDI_PATH, _SOLO_MIDI), Path(
                _AUDIO_PATH, f'{_SOLO_MIDI[:-5]}{_MP3}'))


100%|██████████| 20/20 [08:26<00:00, 25.34s/it]


## Load a trained neural network model

In [4]:
import tensorflow as tf
from musicnn_keras.tagger import top_tags


musicnn = tf.keras.models.load_model(
    './musicnn_keras/keras_checkpoints/MSD_musicnn.h5')



In [31]:
import json

_MP3_PATH = rf'audio/instruments'

audios = Path(_MP3_PATH).glob('*.mp3')
audio_list = []
audio_list.append(str(f).split('/')[2] for f in audios)
audio_json = dict()

for af in tqdm(audios):
    # Get Tags
    audio_dict = dict()
    audio_dict['tags'] = top_tags(str(af),
                                  model='MTT_musicnn',
                                  topN=10,
                                  print_tags=False)

    # Save result
    audio_name = str(af).split('/')[-1]
    meta_data = audio_name.split('.')[0].split('_')
    if meta_data[-1] == 'original':
        audio_dict['instrumrnt'] = meta_data[2]
        audio_dict['tempo'] = 'original'
        audio_dict['key'] = 'original'
    else:
        audio_dict['instrumrnt'] = meta_data[2]
        audio_dict['tempo'] = meta_data[3]
        audio_dict['key'] = meta_data[4]
    audio_json[audio_name.split('.')[0]] = audio_dict

with open('result.json', 'w', encoding='utf-8') as f:
    json.dump(audio_json, f, ensure_ascii=False, indent=4)


0it [00:00, ?it/s]



1it [00:03,  3.97s/it]



2it [00:09,  4.75s/it]



3it [00:17,  6.17s/it]



4it [00:20,  4.89s/it]



5it [00:22,  4.11s/it]



6it [00:26,  3.89s/it]



7it [00:28,  3.49s/it]



8it [00:31,  3.08s/it]



9it [00:34,  3.03s/it]



10it [00:36,  3.00s/it]



11it [00:39,  2.76s/it]



12it [00:42,  2.92s/it]



13it [00:44,  2.71s/it]



14it [00:46,  2.57s/it]



15it [00:49,  2.58s/it]



16it [00:52,  2.80s/it]



17it [00:54,  2.53s/it]



18it [00:56,  2.44s/it]



19it [01:00,  2.70s/it]



20it [01:03,  2.77s/it]



21it [01:06,  2.95s/it]



22it [01:09,  2.86s/it]



23it [01:12,  2.88s/it]



24it [01:15,  2.91s/it]



25it [01:18,  2.90s/it]



26it [01:20,  2.81s/it]



27it [01:23,  2.84s/it]



28it [01:25,  2.50s/it]



29it [01:29,  3.05s/it]



30it [01:31,  2.71s/it]



31it [01:34,  2.91s/it]



32it [01:37,  2.94s/it]



33it [01:41,  3.13s/it]



34it [01:43,  2.81s/it]



35it [01:47,  3.13s/it]



36it [01:49,  2.74s/it]



37it [01:51,  2.70s/it]



38it [01:53,  2.49s/it]



39it [01:57,  2.79s/it]



40it [01:59,  2.63s/it]



41it [02:02,  2.68s/it]



42it [02:06,  3.03s/it]



43it [02:09,  3.04s/it]



44it [02:12,  3.04s/it]



45it [02:16,  3.32s/it]



46it [02:18,  2.91s/it]



47it [02:21,  3.14s/it]



48it [02:27,  3.75s/it]



49it [02:31,  3.85s/it]



50it [02:33,  3.47s/it]



51it [02:36,  3.15s/it]



52it [02:39,  3.14s/it]



53it [02:43,  3.33s/it]



54it [02:46,  3.22s/it]



55it [02:49,  3.34s/it]



56it [02:52,  3.19s/it]



57it [02:57,  3.85s/it]



58it [03:00,  3.34s/it]



59it [03:06,  4.36s/it]



60it [03:12,  4.65s/it]



61it [03:14,  4.07s/it]



62it [03:16,  3.49s/it]



63it [03:20,  3.59s/it]



64it [03:23,  3.28s/it]



65it [03:26,  3.29s/it]



66it [03:30,  3.50s/it]



67it [03:34,  3.68s/it]



68it [03:41,  4.46s/it]



69it [03:45,  4.47s/it]



70it [03:49,  4.30s/it]



71it [03:52,  3.83s/it]



72it [03:57,  4.19s/it]



73it [04:02,  4.42s/it]



74it [04:05,  4.06s/it]



75it [04:09,  4.23s/it]



76it [04:13,  4.04s/it]



77it [04:16,  3.75s/it]



78it [04:19,  3.50s/it]



79it [04:22,  3.35s/it]



80it [04:26,  3.49s/it]



81it [04:29,  3.28s/it]



82it [04:32,  3.27s/it]



83it [04:34,  2.97s/it]



84it [04:37,  2.80s/it]



85it [04:40,  3.09s/it]



86it [04:43,  2.89s/it]



87it [04:47,  3.20s/it]



88it [04:50,  3.36s/it]



89it [04:54,  3.40s/it]



90it [04:58,  3.62s/it]



91it [05:02,  3.81s/it]



92it [05:06,  3.65s/it]



93it [05:08,  3.32s/it]



94it [05:11,  3.30s/it]



95it [05:14,  3.14s/it]



96it [05:17,  3.14s/it]



97it [05:23,  3.84s/it]



98it [05:26,  3.57s/it]



99it [05:30,  3.78s/it]



100it [05:34,  3.72s/it]



101it [05:37,  3.49s/it]



102it [05:41,  3.68s/it]



103it [05:45,  3.76s/it]



104it [05:47,  3.22s/it]



105it [05:49,  2.93s/it]



106it [05:53,  3.42s/it]



107it [05:58,  3.75s/it]



108it [06:03,  4.02s/it]



109it [06:11,  5.39s/it]



110it [06:15,  5.00s/it]



111it [06:18,  4.21s/it]



112it [06:20,  3.60s/it]



113it [06:30,  5.48s/it]



114it [06:35,  5.46s/it]



115it [06:37,  4.49s/it]



116it [06:42,  4.57s/it]



117it [06:47,  4.58s/it]



118it [06:49,  3.86s/it]



119it [06:52,  3.57s/it]



120it [06:54,  3.28s/it]



121it [06:57,  3.06s/it]



122it [07:00,  3.22s/it]



123it [07:04,  3.28s/it]



124it [07:08,  3.39s/it]



125it [07:11,  3.40s/it]



126it [07:14,  3.20s/it]



127it [07:18,  3.45s/it]



128it [07:22,  3.67s/it]



129it [07:25,  3.54s/it]



130it [07:27,  3.16s/it]



131it [07:32,  3.59s/it]



132it [07:36,  3.65s/it]



133it [07:38,  3.32s/it]



134it [07:41,  3.11s/it]



135it [07:43,  2.93s/it]



136it [07:49,  3.79s/it]



137it [07:53,  3.63s/it]



138it [07:55,  3.35s/it]



139it [07:59,  3.46s/it]



140it [08:02,  3.44s/it]



141it [08:05,  3.11s/it]



142it [08:06,  2.72s/it]



143it [08:10,  2.95s/it]



144it [08:13,  3.07s/it]



145it [08:18,  3.52s/it]



146it [08:22,  3.59s/it]



147it [08:24,  3.34s/it]



148it [08:27,  2.98s/it]



149it [08:30,  3.11s/it]



150it [08:35,  3.54s/it]



151it [08:39,  3.85s/it]



152it [08:43,  3.82s/it]



153it [08:47,  3.97s/it]



154it [08:50,  3.72s/it]



155it [08:53,  3.47s/it]



156it [08:56,  3.17s/it]



157it [08:59,  3.18s/it]



158it [09:04,  3.64s/it]



159it [09:08,  3.78s/it]



160it [09:12,  3.81s/it]



161it [09:16,  3.96s/it]



162it [09:18,  3.31s/it]



163it [09:21,  3.27s/it]



164it [09:29,  4.80s/it]



165it [09:34,  4.70s/it]



166it [09:37,  4.36s/it]



167it [09:42,  4.43s/it]



168it [09:48,  5.06s/it]



169it [09:52,  4.78s/it]



170it [09:56,  4.26s/it]



171it [09:58,  3.60s/it]



172it [10:00,  3.38s/it]



173it [10:03,  3.21s/it]



174it [10:06,  3.07s/it]



175it [10:08,  2.62s/it]



176it [10:09,  2.29s/it]



177it [10:12,  2.40s/it]



178it [10:14,  2.31s/it]



179it [10:17,  2.42s/it]



180it [10:23,  3.57s/it]



181it [10:32,  5.20s/it]



182it [10:39,  5.66s/it]



183it [10:41,  4.63s/it]



184it [10:46,  4.84s/it]



185it [10:49,  4.32s/it]



186it [10:53,  4.16s/it]



187it [10:58,  4.30s/it]



188it [11:04,  4.88s/it]



189it [11:07,  4.31s/it]



190it [11:10,  4.02s/it]



191it [11:14,  3.97s/it]



192it [11:16,  3.45s/it]



193it [11:21,  3.80s/it]



194it [11:29,  4.96s/it]



195it [11:39,  6.74s/it]



196it [11:45,  6.31s/it]



197it [11:48,  5.51s/it]



198it [11:56,  6.24s/it]



199it [12:06,  7.26s/it]



200it [12:13,  7.30s/it]



201it [12:19,  6.65s/it]



202it [12:23,  5.98s/it]



203it [12:30,  6.27s/it]



204it [12:36,  6.13s/it]



205it [12:42,  6.12s/it]



206it [12:46,  5.46s/it]



207it [12:47,  4.35s/it]



208it [12:51,  4.07s/it]



209it [12:56,  4.45s/it]



210it [12:59,  3.89s/it]



211it [13:02,  3.66s/it]



212it [13:05,  3.38s/it]



213it [13:07,  3.02s/it]



214it [13:09,  2.86s/it]



215it [13:16,  4.05s/it]



216it [13:28,  6.51s/it]



217it [13:32,  5.51s/it]



218it [13:46,  8.22s/it]



219it [13:58,  9.29s/it]



220it [14:03,  7.91s/it]



221it [14:09,  7.43s/it]



222it [14:14,  6.63s/it]



223it [14:16,  5.47s/it]



224it [14:20,  4.85s/it]



225it [14:24,  4.54s/it]



226it [14:30,  5.13s/it]



227it [14:33,  4.35s/it]



228it [14:39,  4.91s/it]



229it [14:46,  5.45s/it]



230it [14:50,  5.10s/it]



231it [14:53,  4.54s/it]



232it [14:58,  4.76s/it]



233it [15:03,  4.63s/it]



234it [15:10,  5.40s/it]



235it [15:12,  4.47s/it]



236it [15:16,  4.13s/it]



237it [15:19,  3.90s/it]



238it [15:27,  5.05s/it]



239it [15:31,  4.91s/it]



240it [15:36,  4.73s/it]



241it [15:39,  4.44s/it]



242it [15:42,  4.05s/it]



243it [15:46,  4.04s/it]



244it [15:51,  4.11s/it]



245it [15:56,  4.43s/it]



246it [15:59,  4.01s/it]



247it [16:01,  3.50s/it]



248it [16:04,  3.42s/it]



249it [16:09,  3.61s/it]



250it [16:12,  3.58s/it]



251it [16:16,  3.84s/it]



252it [16:26,  5.42s/it]



253it [16:30,  5.07s/it]



254it [16:35,  4.98s/it]



255it [16:39,  4.70s/it]



256it [16:50,  6.68s/it]



257it [17:02,  8.15s/it]



258it [17:10,  8.28s/it]



259it [17:21,  9.07s/it]



260it [17:24,  7.30s/it]



261it [17:34,  7.99s/it]



262it [17:44,  8.78s/it]



263it [17:52,  8.56s/it]



264it [17:55,  6.69s/it]



265it [17:57,  5.46s/it]



266it [18:01,  4.94s/it]



267it [18:05,  4.76s/it]



268it [18:26,  9.60s/it]



269it [18:34,  9.10s/it]



270it [18:37,  7.17s/it]



271it [18:41,  6.26s/it]



272it [18:46,  5.98s/it]



273it [18:49,  4.97s/it]



274it [19:02,  7.26s/it]



275it [19:09,  7.43s/it]



276it [19:14,  6.47s/it]



277it [19:18,  5.74s/it]



278it [19:22,  5.42s/it]



279it [19:25,  4.69s/it]



280it [19:29,  4.37s/it]



281it [19:35,  4.97s/it]



282it [19:38,  4.38s/it]



283it [19:40,  3.66s/it]



284it [19:43,  3.46s/it]



285it [19:46,  3.07s/it]



286it [19:51,  3.87s/it]



287it [19:55,  3.89s/it]



288it [19:59,  3.75s/it]



289it [20:02,  3.74s/it]



290it [20:05,  3.31s/it]



291it [20:08,  3.34s/it]



292it [20:14,  4.15s/it]



293it [20:16,  3.49s/it]



294it [20:21,  3.87s/it]



295it [20:25,  3.84s/it]



296it [20:29,  4.07s/it]



297it [20:37,  5.31s/it]



298it [20:43,  5.45s/it]



299it [20:47,  4.88s/it]



300it [20:51,  4.62s/it]



301it [20:58,  5.38s/it]



302it [21:01,  4.79s/it]



303it [21:08,  5.47s/it]



304it [21:13,  5.11s/it]



305it [21:15,  4.31s/it]



306it [21:17,  3.67s/it]



307it [21:27,  5.41s/it]



308it [21:34,  5.94s/it]



309it [21:38,  5.36s/it]



310it [21:42,  4.88s/it]



311it [21:45,  4.45s/it]



312it [21:48,  4.04s/it]



313it [21:54,  4.68s/it]



314it [21:58,  4.42s/it]



315it [22:00,  3.69s/it]



316it [22:02,  3.21s/it]



317it [22:05,  3.18s/it]



318it [22:09,  3.46s/it]



319it [22:13,  3.46s/it]



320it [22:15,  3.02s/it]



321it [22:17,  2.74s/it]



322it [22:19,  2.65s/it]



323it [22:23,  3.03s/it]



324it [22:26,  2.90s/it]



325it [22:29,  3.07s/it]



326it [22:32,  3.08s/it]



327it [22:34,  2.73s/it]



328it [22:39,  3.23s/it]



329it [22:43,  3.52s/it]



330it [22:47,  3.54s/it]



331it [22:49,  3.20s/it]



332it [22:52,  3.09s/it]



333it [22:54,  2.78s/it]



334it [23:00,  3.72s/it]



335it [23:03,  3.63s/it]



336it [23:06,  3.24s/it]



337it [23:09,  3.36s/it]



338it [23:13,  3.41s/it]



339it [23:17,  3.63s/it]



340it [23:19,  3.22s/it]



341it [23:22,  3.15s/it]



342it [23:25,  2.93s/it]



343it [23:28,  3.04s/it]



344it [23:31,  3.16s/it]



345it [23:34,  3.17s/it]



346it [23:38,  3.43s/it]



347it [23:43,  3.64s/it]



348it [23:45,  3.28s/it]



349it [23:48,  3.13s/it]



350it [23:51,  3.00s/it]



351it [23:54,  3.02s/it]



352it [23:58,  3.43s/it]



353it [24:02,  3.63s/it]



354it [24:06,  3.63s/it]



355it [24:09,  3.41s/it]



356it [24:12,  3.27s/it]



357it [24:14,  3.16s/it]



358it [24:17,  2.98s/it]



359it [24:20,  3.08s/it]



360it [24:24,  3.12s/it]



361it [24:29,  3.75s/it]



362it [24:32,  3.53s/it]



363it [24:36,  3.67s/it]



364it [24:39,  3.54s/it]



365it [24:42,  3.52s/it]



366it [24:46,  3.46s/it]



367it [24:48,  3.17s/it]



368it [24:50,  2.81s/it]



369it [24:53,  2.82s/it]



370it [24:59,  3.62s/it]



371it [25:01,  3.37s/it]



372it [25:05,  3.36s/it]



373it [25:09,  3.52s/it]



374it [25:13,  3.64s/it]



375it [25:15,  3.25s/it]



376it [25:18,  3.23s/it]



377it [25:21,  2.99s/it]



378it [25:25,  3.43s/it]



379it [25:29,  3.58s/it]



380it [25:32,  3.37s/it]



381it [25:34,  2.95s/it]



382it [25:36,  2.79s/it]



383it [25:39,  2.78s/it]



384it [25:43,  3.12s/it]



385it [25:46,  3.12s/it]



386it [25:48,  2.90s/it]



387it [25:51,  2.75s/it]



388it [25:55,  3.13s/it]



389it [25:57,  2.89s/it]



390it [26:00,  2.78s/it]



391it [26:06,  4.00s/it]



392it [26:10,  3.92s/it]



393it [26:13,  3.64s/it]



394it [26:17,  3.68s/it]



395it [26:20,  3.40s/it]



396it [26:23,  3.27s/it]



397it [26:26,  3.22s/it]



398it [26:29,  3.22s/it]



399it [26:32,  3.09s/it]



400it [26:35,  3.05s/it]



401it [26:37,  2.76s/it]



402it [26:40,  2.90s/it]



403it [26:44,  3.09s/it]



404it [26:46,  3.02s/it]



405it [26:50,  3.05s/it]



406it [26:53,  3.25s/it]



407it [26:57,  3.42s/it]



408it [27:00,  3.24s/it]



409it [27:03,  3.22s/it]



410it [27:06,  3.09s/it]



411it [27:09,  3.11s/it]



412it [27:12,  2.93s/it]



413it [27:17,  3.65s/it]



414it [27:20,  3.58s/it]



415it [27:23,  3.23s/it]



416it [27:26,  3.22s/it]



417it [27:28,  2.84s/it]



418it [27:30,  2.70s/it]



419it [27:33,  2.62s/it]



420it [27:35,  2.55s/it]



421it [27:38,  2.64s/it]



422it [27:41,  2.81s/it]



423it [27:44,  2.93s/it]



424it [27:48,  3.12s/it]



425it [27:51,  3.16s/it]



426it [27:54,  2.94s/it]



427it [27:56,  2.80s/it]



428it [27:59,  2.72s/it]



429it [28:02,  3.06s/it]



430it [28:04,  2.71s/it]



431it [28:08,  3.05s/it]



432it [28:10,  2.82s/it]



433it [28:13,  2.88s/it]



434it [28:17,  2.96s/it]



435it [28:20,  3.09s/it]



436it [28:23,  3.21s/it]



437it [28:27,  3.35s/it]



438it [28:32,  3.93s/it]



439it [28:35,  3.64s/it]



440it [28:37,  3.10s/it]



441it [28:40,  3.14s/it]



442it [28:45,  3.61s/it]



443it [28:49,  3.60s/it]



444it [28:52,  3.60s/it]



445it [28:56,  3.51s/it]



446it [29:00,  3.78s/it]



447it [29:06,  4.52s/it]



448it [29:13,  5.24s/it]



449it [29:19,  5.48s/it]



450it [29:26,  5.79s/it]



451it [29:33,  6.20s/it]



452it [29:42,  7.02s/it]



453it [29:55,  8.83s/it]



454it [29:58,  7.25s/it]



455it [30:05,  6.89s/it]



456it [30:08,  5.90s/it]



457it [30:12,  5.23s/it]



458it [30:15,  4.68s/it]



459it [30:25,  6.13s/it]



460it [30:32,  6.40s/it]



461it [30:34,  5.14s/it]



462it [30:37,  4.42s/it]



463it [30:41,  4.46s/it]



464it [30:44,  4.09s/it]



465it [30:48,  4.06s/it]



466it [30:51,  3.52s/it]



467it [30:53,  3.07s/it]



468it [30:56,  3.14s/it]



469it [30:58,  2.91s/it]



470it [31:02,  3.11s/it]



471it [31:06,  3.33s/it]



472it [31:08,  2.94s/it]



473it [31:11,  3.10s/it]



474it [31:14,  3.08s/it]



475it [31:19,  3.48s/it]



476it [31:21,  3.24s/it]



477it [31:25,  3.46s/it]



478it [31:32,  4.26s/it]



479it [31:35,  4.15s/it]



480it [31:38,  3.55s/it]



481it [31:40,  3.18s/it]



482it [31:43,  3.18s/it]



483it [31:47,  3.50s/it]



484it [31:53,  4.14s/it]



485it [31:58,  4.52s/it]



486it [32:03,  4.54s/it]



487it [32:07,  4.33s/it]



488it [32:15,  5.51s/it]



489it [32:18,  4.87s/it]



490it [32:23,  4.84s/it]



491it [32:35,  7.05s/it]



492it [32:44,  7.37s/it]



493it [32:46,  5.80s/it]



494it [32:50,  5.31s/it]



495it [32:52,  4.36s/it]



496it [32:57,  4.52s/it]



497it [33:02,  4.71s/it]



498it [33:11,  6.09s/it]



499it [33:15,  5.39s/it]



500it [33:18,  4.00s/it]
