In [1]:
import guitarpro
import dadagp as dada
import numpy as np
import matplotlib.pyplot as plt
import math
import os
import json

In [2]:
DADAGP_PATH = "acdc_ex.txt"
GP_PATH = "../util/test_acdc.gp5"
PREFIX = "./test_loops_out/acdc_test"

In [3]:
#snippet for extracting time signatures

song = guitarpro.parse(GP_PATH) #check if one time signature throughout, calculate min/max beat lengths based on it
timeSignatures = {}
for measure in song.tracks[0].measures[0:10]:
    signature = (measure.timeSignature.numerator, measure.timeSignature.denominator.value)
    if signature in timeSignatures.keys():
        timeSignatures[signature] += 1
    else:
        timeSignatures[signature] = 1
    print(measure.timeSignature.numerator, measure.timeSignature.denominator.value)
    #print(measure.keySignature) #doesn't seem like this is accurate, just defaulting to CMaj. Could we auto extract key?
#what to do if time signature changes? could just use most prominent? or split up song?

4 4
4 4
4 4
4 4
4 4
4 4
4 4
4 4
4 4
4 4


In [None]:
#create the json file for names of all the loop files

root_dir = "D:\Documents\DATA\DadaGP-Loops"
file_list = []
for subdir, dirs, files in os.walk(root_dir):
    short_subdir = subdir.replace(root_dir + "\\", "")
    for file in files:
        if file.endswith(".txt"):
            full_path = os.path.join(short_subdir, file)
            file_list.append(full_path)

#save that file list to json
root_dir = "D:\Documents\DATA\DadaGP-Loops"
path_json = os.path.join(root_dir, "_DadaGP_LOOPS_all_filenames.json")
with open(path_json, 'w') as f:
    json.dump(file_list, f)

In [7]:
#helper function for finding number of empty bars in a track

def calc_empty_tracks(test_file):
    song = guitarpro.parse(test_file)
    empty_tracks = 0
    total_tracks = len(song.tracks)
    for track in song.tracks:
        empty_bars = 0
        total_bars = len(track.measures)
        for measure in track.measures:
            non_rests = 0
            for beat in measure.voices[0].beats:
                for note in beat.notes:
                    if note.type != guitarpro.NoteType.rest:
                        non_rests = non_rests + 1
            if non_rests == 0:
                empty_bars = empty_bars + 1
        if empty_bars == total_bars:
            empty_tracks = empty_tracks + 1
    return empty_tracks, total_tracks

In [17]:
# report the number of empty bars across all tracks

root_path = "D:\Documents\DATA\DadaGP-Loops"
allfiles_path = os.path.join(root_path,"file_list.json" )
with open(allfiles_path, "r") as f:
    allfiles =  json.load(f)

n_files = len(allfiles)
total_tracks = 0
empty_tracks = 0
for fidx in range(n_files):   
    fn = allfiles[fidx].replace(".txt", ".gp5")
    file = os.path.join(root_path, fn)
    empty, total = calc_empty_tracks(file)
    total_tracks = total_tracks + total
    empty_tracks = empty_tracks + empty

print("{} empty tracks out of {} total tracks".format(empty_tracks, total_tracks))

0 empty tracks out of 140179 total tracks


In [9]:
35003.0 / 176703.0 #percentage of empty tracks in original loop dataset

0.19808944952830457

In [19]:
#get max number of tokens in dataset and average

root_path = "D:\Documents\DATA\DadaGP-Loops"
allfiles_path = os.path.join(root_path,"file_list.json" )
with open(allfiles_path, "r") as f:
    allfiles =  json.load(f)

n_files = len(allfiles)
max_lines = 0
total_lines = 0
max_song = None
for fidx in range(n_files):   
    fn = allfiles[fidx]
    file = os.path.join(root_path, fn)
    with open(file) as f:
        num_lines = len(f.readlines())
        total_lines += num_lines
        if num_lines > max_lines:
            max_lines = num_lines
            max_song = fn
print("{} maximum token length from {}".format(max_lines, max_song))
print("{} average tokens per song".format(total_lines * 1.0 /n_files))

19629 maximum token length from R/Ride/Ride - Drive Blind.gp4.tokens_loop2.txt
1839.2832630410655 average tokens per song


In [3]:
#average tracks per loop

root_path = "D:\Documents\DATA\DadaGP-Loops"
allfiles_path = os.path.join(root_path,"file_list.json" )
with open(allfiles_path, "r") as f:
    allfiles =  json.load(f)

n_files = len(allfiles)
total_tracks = 0
for fidx in range(n_files):   
    fn = allfiles[fidx].replace(".txt", ".gp5")
    file = os.path.join(root_path, fn)
    song = guitarpro.parse(file)
    total_tracks = total_tracks + len(song.tracks)
print("{} average tracks per file".format(total_tracks * 1.0 / n_files))

3.1116315205327414 average tracks per file


In [7]:
#print out all the repeats in a test song
test_file = "D:\\Documents\\DATA\\DadaGP-v1.1\\1\\1349\\1349 - Deathmarch.gp4"
song = guitarpro.parse(test_file)
for i, measure in enumerate(song.tracks[0].measures):
    if measure.header.isRepeatOpen:
        print("repeat open at {}".format(i))
    elif measure.header.repeatClose > -1:
        print("repeat close {}x at {}".format(measure.header.repeatClose, i))
#isRepeatOpen will be true at the beginning of the repeat. repeatClose is an int that appears in the measure of the closing repeat.number repeats not including OG


repeat open at 0
repeat close 2x at 1
repeat open at 4
repeat close 2x at 5


In [6]:
#analyze test file with alternate endings

test_file = "D:\\Documents\\DATA\\DadaGP-v1.1\\3\\3 Inches Of Blood\\3 Inches Of Blood - Balls Of Ice.gp4"
song = guitarpro.parse(test_file)
for i, measure in enumerate(song.tracks[0].measures):
    if measure.header.isRepeatOpen:
        print("repeat open at {}".format(i))
    elif measure.header.repeatClose > -1:
        print("repeat close {}x at {}".format(measure.header.repeatClose, i))
    elif measure.header.repeatAlternative > 0:
        print("repeat alt {} at {}".format(measure.header.repeatAlternative, i))

repeat open at 0
repeat alt 1 at 6
repeat close 1x at 7
repeat alt 2 at 8
repeat open at 10
repeat close 3x at 13
repeat open at 14
repeat close 3x at 17
repeat open at 18
repeat close 1x at 25
repeat open at 26
repeat close 3x at 29
repeat open at 50
repeat alt 3 at 52
repeat close 1x at 53
repeat alt 4 at 54
repeat open at 56
repeat close 3x at 59
repeat open at 60
repeat close 3x at 63
repeat open at 64
repeat close 1x at 71
repeat open at 72
repeat close 3x at 75


In [12]:
#code for reformatting the data parallel model
import torch
from collections import OrderedDict

# original saved file with DataParallel
model = torch.load('../model-weights/parallel_ep_205.pth.tar')
# create new OrderedDict that does not contain `module.`
new_state_dict = OrderedDict()
for k, v in model['state_dict'].items():
    name = k[7:] # remove `module.`
    new_state_dict[name] = v
# load params
model['state_dict'] = new_state_dict
torch.save(model, "../model-weights/ep_205.pth.tar")

In [None]:
#extract 25 "real" loops following genre distribution
#each loop should be 4 bars long (should we restrict to 4/4 time signature?)
#TODO: how to get the genre distribution? we have it as JSON files but #2 genre is "unknown." Should we filter out and only use top few genres? and do this in training too?
#for generation, was thinking for each real example we generate artifical one with the same tempo and starting beat (since a primer is needed)
#should we include genre/artist tags in the artificial generation and does this matter? 
#try prompting with an open repeat symbol, could make the training dataset have the repeat
#training only on guitar parts?
#average number of instruments per track? Could be predominantly only guitar or something
#try training on different checkpoints other than 200
#try changing the group size