In [1]:
from model_functions import analyze_token_sequence, predict, write_midi
import json
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from transformers import GPT2Config, GPT2Tokenizer, GPT2LMHeadModel, DataCollatorForLanguageModeling, TrainingArguments, Trainer

PATH_PRED = "predictions"
PATH_TOKENS = "predictions/tokens"
PATH_MIDI = "predictions/midi"

if not os.path.exists(PATH_PRED):
    os.makedirs(PATH_PRED)
if not os.path.exists(PATH_TOKENS):
    os.makedirs(PATH_TOKENS)
if not os.path.exists(PATH_MIDI):
    os.makedirs(PATH_MIDI)

## Tokeinzer

In [2]:
tokenizer = GPT2Tokenizer(
    vocab_file="vocab.json", 
    merges_file="merges.txt")
tokenizer.add_special_tokens({'pad_token': 'PAD', 'bos_token': 'BOS', 'eos_token': 'EOS',})
tokenizer.vocab_size

120

## Get Vocabulary

In [3]:
with open('vocab.json', 'r') as fp:
    vocab = json.load(fp)
token2word = {token: word for word, token in vocab.items()}

## Make Predictions

In [4]:
model_df = pd.read_excel("model_stats.xlsx", index_col="Unnamed: 0")
model_df

Unnamed: 0,name,max_length,emb_dim,attention_heads,layers,dropout,learning_rate,epochs,batch_size,ran,runtime,runtime_min,min_loss,at_epoch,incorrect_notes,correct_notes,correct_rate
0,1_short_small_50,256,128,2,3,0.01,0.001,50,4,yes,185.8307,3.1,1.162286,46,,,
1,2_short_medium_50,256,256,4,6,0.01,0.001,50,4,yes,304.2706,5.07,1.208278,30,,,
2,3_short_large_50,256,512,8,12,0.01,0.001,50,4,yes,559.9953,9.33,1.841974,48,,,
3,4_middle_small_50,1024,128,2,3,0.01,0.001,50,4,yes,241.443,4.02,1.09383,50,,,
4,5_middle_medium_50,1024,256,4,6,0.01,0.001,50,4,yes,614.6323,10.24,1.120662,37,,,
5,6_middle_large_50,1024,512,8,12,0.01,0.001,50,4,yes,2237.4356,37.29,1.883723,49,,,
6,7_long_small_50,2048,128,2,3,0.01,0.001,50,4,yes,482.1712,8.04,1.120337,49,,,
7,8_long_medium_50,2048,256,4,6,0.01,0.001,50,4,yes,1494.6219,24.91,1.156429,35,,,
8,9_long_large_50,2048,512,8,12,0.01,0.001,50,4,yes,too big - cuda error,too big - cuda error,too big - cuda error,too big - cuda error,,,


In [5]:
rows = []

for index, row in model_df.iterrows():

    # only models that ran
    if row["ran"] == "no" or pd.notnull(row["incorrect_notes"]) or row["runtime"] == "too big - cuda error":
        continue
    
    # get model name directories
    model_name = row["name"]
    #model_dirs = {
    #    "out": f"out/{model_name}",
    #    "tokens": f"{PATH_TOKENS}/{model_name}",
    #    "midi": f"{PATH_MIDI}/{model_name}"
    #}
    
    #print(f"{PATH_MIDI}/{model_name}")
    if not os.path.exists(f"{PATH_MIDI}/{model_name}"):
        os.makedirs(f"{PATH_MIDI}/{model_name}")
    
    # load model
    model = GPT2LMHeadModel.from_pretrained(f"out/{model_name}/end_version")
    
    # make predictions save
    output = predict(model, tokenizer, samples=5, max_length=row["max_length"])
    data_generated = {"data": output}
    with open(f"{PATH_TOKENS}/{model_name}.json", "w") as fp:
        json.dump(data_generated, fp)
    
    # analyze tokens and save as midi_files
    correct_notes = 0
    incorrect_notes = 0
    for idx, pred in enumerate(output):
        an = analyze_token_sequence(pred)
        correct_notes += an["start-pos-pitch-duration"] + an["start-pos-pitch-duration-dtriole"] + an["start-pos-ptriole-pitch-duration"] + an["start-pos-ptriole-pitch-duration-dtriole"]
        incorrect_notes += write_midi(output[idx], token2word, f"{PATH_MIDI}/{model_name}/generated_midi_{idx}.midi")
    
    model_df.at[index,"correct_notes"] = (correct_notes/5).__round__(2)
    model_df.at[index,"incorrect_notes"] = (incorrect_notes/5).__round__(2)
    model_df.at[index,"correct_rate"] = (correct_notes/(correct_notes+incorrect_notes)).__round__(2)
    
model_df

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:122 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:122 for open-end generation.


midi saved in predictions/midi/1_short_small_50/generated_midi_0.midi
Number of incorrect notes: 0
midi saved in predictions/midi/1_short_small_50/generated_midi_1.midi
Number of incorrect notes: 1
midi saved in predictions/midi/1_short_small_50/generated_midi_2.midi
Number of incorrect notes: 0
midi saved in predictions/midi/1_short_small_50/generated_midi_3.midi
Number of incorrect notes: 0
midi saved in predictions/midi/1_short_small_50/generated_midi_4.midi
Number of incorrect notes: 0
midi saved in predictions/midi/2_short_medium_50/generated_midi_0.midi
Number of incorrect notes: 0
midi saved in predictions/midi/2_short_medium_50/generated_midi_1.midi
Number of incorrect notes: 0
midi saved in predictions/midi/2_short_medium_50/generated_midi_2.midi
Number of incorrect notes: 0
midi saved in predictions/midi/2_short_medium_50/generated_midi_3.midi
Number of incorrect notes: 0
midi saved in predictions/midi/2_short_medium_50/generated_midi_4.midi
Number of incorrect notes: 0


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:122 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:122 for open-end generation.


midi saved in predictions/midi/3_short_large_50/generated_midi_0.midi
Number of incorrect notes: 0
midi saved in predictions/midi/3_short_large_50/generated_midi_1.midi
Number of incorrect notes: 0
midi saved in predictions/midi/3_short_large_50/generated_midi_2.midi
Number of incorrect notes: 0
midi saved in predictions/midi/3_short_large_50/generated_midi_3.midi
Number of incorrect notes: 0
midi saved in predictions/midi/3_short_large_50/generated_midi_4.midi
Number of incorrect notes: 0


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:122 for open-end generation.


midi saved in predictions/midi/4_middle_small_50/generated_midi_0.midi
Number of incorrect notes: 0
midi saved in predictions/midi/4_middle_small_50/generated_midi_1.midi
Number of incorrect notes: 0
midi saved in predictions/midi/4_middle_small_50/generated_midi_2.midi
Number of incorrect notes: 0
midi saved in predictions/midi/4_middle_small_50/generated_midi_3.midi
Number of incorrect notes: 0
midi saved in predictions/midi/4_middle_small_50/generated_midi_4.midi
Number of incorrect notes: 2
midi saved in predictions/midi/5_middle_medium_50/generated_midi_0.midi
Number of incorrect notes: 0
midi saved in predictions/midi/5_middle_medium_50/generated_midi_1.midi
Number of incorrect notes: 1
midi saved in predictions/midi/5_middle_medium_50/generated_midi_2.midi
Number of incorrect notes: 1
midi saved in predictions/midi/5_middle_medium_50/generated_midi_3.midi
Number of incorrect notes: 0
midi saved in predictions/midi/5_middle_medium_50/generated_midi_4.midi
Number of incorrect note

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:122 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:122 for open-end generation.


midi saved in predictions/midi/6_middle_large_50/generated_midi_0.midi
Number of incorrect notes: 0
midi saved in predictions/midi/6_middle_large_50/generated_midi_1.midi
Number of incorrect notes: 0
midi saved in predictions/midi/6_middle_large_50/generated_midi_2.midi
Number of incorrect notes: 0
midi saved in predictions/midi/6_middle_large_50/generated_midi_3.midi
Number of incorrect notes: 1
midi saved in predictions/midi/6_middle_large_50/generated_midi_4.midi
Number of incorrect notes: 1
midi saved in predictions/midi/7_long_small_50/generated_midi_0.midi
Number of incorrect notes: 3
midi saved in predictions/midi/7_long_small_50/generated_midi_1.midi
Number of incorrect notes: 1
midi saved in predictions/midi/7_long_small_50/generated_midi_2.midi
Number of incorrect notes: 0
midi saved in predictions/midi/7_long_small_50/generated_midi_3.midi
Number of incorrect notes: 1
midi saved in predictions/midi/7_long_small_50/generated_midi_4.midi
Number of incorrect notes: 0


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:122 for open-end generation.


midi saved in predictions/midi/8_long_medium_50/generated_midi_0.midi
Number of incorrect notes: 0
midi saved in predictions/midi/8_long_medium_50/generated_midi_1.midi
Number of incorrect notes: 0
midi saved in predictions/midi/8_long_medium_50/generated_midi_2.midi
Number of incorrect notes: 2
midi saved in predictions/midi/8_long_medium_50/generated_midi_3.midi
Number of incorrect notes: 0
midi saved in predictions/midi/8_long_medium_50/generated_midi_4.midi
Number of incorrect notes: 1


Unnamed: 0,name,max_length,emb_dim,attention_heads,layers,dropout,learning_rate,epochs,batch_size,ran,runtime,runtime_min,min_loss,at_epoch,incorrect_notes,correct_notes,correct_rate
0,1_short_small_50,256,128,2,3,0.01,0.001,50,4,yes,185.8307,3.1,1.162286,46,0.2,68.4,1.0
1,2_short_medium_50,256,256,4,6,0.01,0.001,50,4,yes,304.2706,5.07,1.208278,30,0.0,67.8,1.0
2,3_short_large_50,256,512,8,12,0.01,0.001,50,4,yes,559.9953,9.33,1.841974,48,0.0,74.2,1.0
3,4_middle_small_50,1024,128,2,3,0.01,0.001,50,4,yes,241.443,4.02,1.09383,50,0.4,278.2,1.0
4,5_middle_medium_50,1024,256,4,6,0.01,0.001,50,4,yes,614.6323,10.24,1.120662,37,0.4,253.8,1.0
5,6_middle_large_50,1024,512,8,12,0.01,0.001,50,4,yes,2237.4356,37.29,1.883723,49,0.4,287.8,1.0
6,7_long_small_50,2048,128,2,3,0.01,0.001,50,4,yes,482.1712,8.04,1.120337,49,1.0,538.2,1.0
7,8_long_medium_50,2048,256,4,6,0.01,0.001,50,4,yes,1494.6219,24.91,1.156429,35,0.6,635.0,1.0
8,9_long_large_50,2048,512,8,12,0.01,0.001,50,4,yes,too big - cuda error,too big - cuda error,too big - cuda error,too big - cuda error,,,


In [6]:
model_df.to_excel("model_stats.xlsx")

run "tar chvfz predictions_midi.tar.gz *" in terminal midi folder to create and download zip