In [None]:
import sys
sys.path.append("..")
import json
import random
import dadagp
import os
from model_ead import TransformerXL
import make_loops as loops
import guitarpro
import pickle
import torch
import yaml

In [None]:
#Generates a folder of inferences attempts and runs the loop extraction algorithm on each of them. Also reports loop density statistics

In [None]:
FILELIST_PATH = "D:\\Documents\\DATA\\DadaGP-4-only-lps-3-dens-per-inst\\file_list_loops.json" #list of files in loops dataset
ROOT_PATH = "D:\\Documents\\DATA\\DadaGP-4-only-lps-3-dens-per-inst" #path to loops dataset
OUTPUT_PATH = "D:\\Documents\\DATA\\dadagp-generation-test" #where to output generation results
NUM_SAMPLES = 25 #how many excerpts generate
LOOP_LENGTH = 4 #assumes all loops in FILELIST_PATH are this length

In [None]:
with open(FILELIST_PATH, "r") as f:
    file_list =  json.load(f)
num_files = len(file_list)
print("{} files".format(num_files))

In [None]:
#randomly sample tracks from the trainind data
sampled_idxes = random.sample(range(num_files), NUM_SAMPLES)

In [None]:
human_sample_data = {}
primers = []
for id, idx in enumerate(sampled_idxes):
    filepath = file_list[idx]
    file = os.path.join(ROOT_PATH, file_list[idx])

    with open(file, "r") as f:
        text = f.read()
    list_words = text.split("\n")

    header_data = list_words[:4]
    main_data = list_words[4:]

    #calculate the number of loops in the song
    num_measures = 0
    measure_idx = []
    for i,token in enumerate(main_data):
        if token == "new_measure":
            num_measures += 1
            measure_idx.append(i)
    num_segments = int(num_measures / LOOP_LENGTH)
    measure_idx.append(len(main_data))

    #choose a random loop in the song
    rand_segment = random.randint(0, num_segments - 1)
    human_sample_data[id] = rand_segment, filepath
    measure_start = rand_segment * LOOP_LENGTH
    start_idx = measure_idx[measure_start]
    end_idx = measure_idx[measure_start + 4]

    #use the first note of each instrument in the loop as a primer
    final_loop = header_data + main_data[start_idx:end_idx]
    if final_loop[-1] != "end":
        final_loop.append("end")
    primer = []
    for token in final_loop:
        if "artist:" in token:
            primer.append("artist:unknown_artist")
        else:
            primer.append(token)
        if "wait:" in token:
            break
    primers.append(primer)

    #save the loop we took the primer from
    token_path = os.path.join(OUTPUT_PATH, "human", "ex_" + str(id) + ".txt")
    dadagp_path = os.path.join(OUTPUT_PATH, "human", "ex_" + str(id) + ".gp5")
    file_out = open(token_path, "w")
    file_out.write("\n".join(final_loop))
    file_out.close()
    dadagp.dadagp_decode(token_path, dadagp_path)

#save a list of the primers
path_json = os.path.join(OUTPUT_PATH, "sampled_loops_info.json")
with open(path_json, 'w') as f:
    json.dump(human_sample_data, f)


In [None]:
#load the model
cfg = yaml.full_load(open("../full-data-config_5_lat1024.yml", 'r')) 
inferenceConfig = cfg['INFERENCE']

os.environ['CUDA_VISIBLE_DEVICES'] = inferenceConfig['gpuID']

CHECKPOINT_FOLDER = "D:\\Documents\\Queen Mary\\dev\\msc_thesis\\dadaGP-generation\\model-weights"
EPOCH = 40
NAME = "ep_40"
model_path = os.path.join(CHECKPOINT_FOLDER, 'ep_{}.pth.tar'.format(str(EPOCH)))

pretrainCfg = yaml.full_load(open(os.path.join("..", CHECKPOINT_FOLDER,"full-data-config.yml"), 'r')) 
modelConfig = pretrainCfg['MODEL']

event2word = pickle.load(open(os.path.join("..", inferenceConfig['vocab_data_path']), 'rb'))
word2event = pickle.load(open(os.path.join("..", inferenceConfig['rev_vocab_data_path']), 'rb'))

device = torch.device("cuda" if not inferenceConfig["no_cuda"] and torch.cuda.is_available() else "cpu")
print('Device to generate:', device)

model =  TransformerXL(
        modelConfig,
        inferenceConfig['gpuID'],
        event2word=event2word, 
        word2event=word2event, 
        is_training=False)

In [None]:
#calculate average number of notes per instrument in each measure
def calc_density(token_list):
    num_meas = 0
    timestamp = 0
    num_notes = {}
    for i in range(len(token_list)):
        t = token_list[i]
        if "note" in t:
            instrument = t.split(":")[0]
            if instrument not in num_notes:
                num_notes[instrument] = 1
            else:
                num_notes[instrument] += 1
        if t == "new_measure":
            num_meas += 1

    total_notes = 0
    for inst in num_notes.keys():
        total_notes += num_notes[inst]
    curr_density = total_notes * 1.0 / len(num_notes)

    return curr_density / num_meas


In [None]:
#loop extraction parameters
LOOP_SIZE = 4
MIN_LEN = 4
MIN_REP_BEATS = 2.0
DENSITY = 1

#generate NUM_BARS from each primer, keeping track of number of loops extracted and density
NUM_BARS = 16
total_segments = 0
total_density = 0
for idx, primer in enumerate(primers):
    print(idx, primer)
    generated = model.inference_single_from_primer(os.path.join("..",model_path), ['temperature', 'nucleus'], {'t': 1.2 ,'p': 0.9, 'num_bars': NUM_BARS}, primer)

    #save raw generation as GuitarPro file
    song = dadagp.tokens2guitarpro(generated, verbose=False)
    song.artist = generated[0]
    song.album = 'Generated by DadaGP'
    song.title = "untitled"
    dadagp_path = os.path.join(OUTPUT_PATH, NAME, "ex_" + str(idx) + "_full" + ".gp5")
    guitarpro.write(song, dadagp_path)

    #extract loops
    track_list, time_signatures = loops.create_track_list(song)
    beats_per_bar = 4 #inference forces 4/4
    min_beats = beats_per_bar * LOOP_SIZE
    max_beats = beats_per_bar * LOOP_SIZE
    lead_mat, lead_dur, melody_seq = loops.calc_correlation(track_list, 0) 
    _, loop_endpoints = loops.get_valid_loops(melody_seq, lead_mat, lead_dur, min_len=MIN_LEN, min_beats=min_beats, max_beats=max_beats, min_rep_beats=MIN_REP_BEATS)
    token_list = loops.unify_loops(generated, loop_endpoints, density=DENSITY)
    token_list_repeats = loops.get_repeats(generated, min_meas=LOOP_SIZE, max_meas=LOOP_SIZE, density=DENSITY)
    token_list = token_list + token_list_repeats
    if token_list[-1] != "end":
        token_list.append("end")

    loops_length = len(token_list)
    if loops_length > 10:
        header_data = token_list[:4]
        main_data = token_list[4:]

        num_measures = 0
        split_loops = []
        current_loop = []
        for i,token in enumerate(main_data):
            if token == "new_measure":
                if num_measures > 0 and num_measures % LOOP_SIZE == 0: #end of a loop
                    split_loops.append(current_loop)
                    current_loop = []
                num_measures += 1
            if token == "end":
                split_loops.append(current_loop)
                break
            current_loop.append(token)

        token_list = header_data
        
        num_segments = 0
        for i,loop in enumerate(split_loops):
            duplicate = False
            current = " ". join(split_loops[i])
            for j in range(0,i):
                comparison = " ".join(split_loops[j])
                if comparison == current:
                    duplicate = True
                    break
            if not duplicate:
                token_list += loop
                num_segments += 1
        token_list.append("end")

        density = calc_density(token_list)
        total_density += density
        print("FOUND {} loops in ex_{}, density {}".format(num_segments, idx, density))

        #save extracted loops
        song = dadagp.tokens2guitarpro(token_list, verbose=False)
        song.artist = generated[0]
        song.album = 'Generated by DadaGP'
        song.title = "untitled"
        dadagp_path = os.path.join(OUTPUT_PATH, NAME, "ex_" + str(idx) + "_loops" + ".gp5")
        guitarpro.write(song, dadagp_path)

        total_segments += num_segments

print("{} total loops {} avg loops {} avg density from {} primers".format(total_segments, total_segments * 1.0 / len(primers), total_density / len(primers), len(primers)))
    