In [1]:
from model_functions import analyze_token_sequence, predict, write_midi
import json
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from transformers import GPT2Config, GPT2Tokenizer, GPT2LMHeadModel, DataCollatorForLanguageModeling, TrainingArguments, Trainer

PATH_VOCAB = "../0_data/5_vocabs"
PATH_MODELS = "../0_data/7_models"
PATH_MODELS_CONFIG = "../0_data/7_models/config"
PATH_PRED = "../0_data/8_predictions"
PATH_TOKENS = "../0_data/8_predictions/tokens"
PATH_MIDI = "../0_data/8_predictions/midi"

for path in [PATH_PRED, PATH_TOKENS, PATH_MIDI]:
    if not os.path.exists(path):
        os.makedirs(path)

## Tokeinzer

In [2]:
tokenizer = GPT2Tokenizer(
    vocab_file=f"{PATH_VOCAB}/vocab_d.json", 
    merges_file=f"{PATH_VOCAB}/merges.txt")
tokenizer.add_special_tokens({'pad_token': 'PAD', 'bos_token': 'BOS', 'eos_token': 'EOS',})
tokenizer.vocab_size

88

## Get Vocabulary

In [3]:
with open(f"{PATH_VOCAB}/vocab_d.json", "r") as fp:
    vocab = json.load(fp)
token2word = {token: word for word, token in vocab.items()}

## Make Predictions

In [4]:
model_df = pd.read_excel(f"{PATH_MODELS}/model_stats.xlsx", index_col="Unnamed: 0")
model_df

Unnamed: 0,name,max_length,emb_dim,attention_heads,layers,dropout,learning_rate,epochs,batch_size,ran,runtime,runtime_min,min_loss,at_epoch,incorrect_notes,correct_notes,correct_rate
0,1_short_small_50,256,128,2,3,0.01,0.001,50,4,yes,185.6872,3.09,1.163706,50,0.0,65.8,1.0
1,2_short_medium_50,256,256,4,6,0.01,0.001,50,4,yes,286.7629,4.78,1.251897,24,0.2,70.4,1.0
2,3_short_large_50,256,512,8,12,0.01,0.001,50,4,yes,568.0452,9.47,1.973589,50,0.0,74.6,1.0
3,4_middle_small_50,1024,128,2,3,0.01,0.001,50,4,yes,248.7667,4.15,1.087204,50,0.0,264.8,1.0
4,5_middle_medium_50,1024,256,4,6,0.01,0.001,50,4,yes,628.1603,10.47,1.120312,40,0.2,265.6,1.0
5,6_middle_large_50,1024,512,8,12,0.01,0.001,50,4,yes,2247.7578,37.46,1.87243,48,0.4,289.8,1.0
6,7_long_small_50,2048,128,2,3,0.01,0.001,50,4,yes,491.8461,8.2,1.202734,50,1.6,546.8,1.0
7,8_long_medium_50,2048,256,4,6,0.01,0.001,50,4,yes,1506.3536,25.11,1.154148,37,2.8,526.8,0.99
8,9_long_large_50,2048,512,8,12,0.01,0.001,50,4,yes,5933.9392,98.9,1.284094,50,0.2,552.2,1.0


In [5]:
TICKS_PER_BEAT = 1024
TICKS_PER_MIN_DURATION = TICKS_PER_BEAT*4/32
    
for index, row in model_df.iterrows():

    # only models that ran
    if row["ran"] == "no" or pd.notnull(row["incorrect_notes"]) or row["runtime"] == "too big - cuda error":
        continue
    
    model_name = row["name"]
    
    token_flags = {
        "start_pitch_token": 1,
        "end_pitch_token": 36,
        "start_duration_token": 37,
        "end_duration_token": 68,
        "duration_triole": 69,
        "start_position_token": 70,
        "end_position_token": 85,
        "position_triole_1": 86,
        "position_triole_2": 87,
    }
    
    duration_steps = 64    
    duration_bins = np.arange(TICKS_PER_MIN_DURATION, (TICKS_PER_MIN_DURATION*duration_steps)+1, TICKS_PER_MIN_DURATION, dtype=int)

    if not os.path.exists(f"{PATH_MIDI}/{model_name}"):
        os.makedirs(f"{PATH_MIDI}/{model_name}")
    
    # load model
    model = GPT2LMHeadModel.from_pretrained(f"{PATH_MODELS_CONFIG}/{model_name}/end_version")
    
    # make predictions save
    output = predict(model, tokenizer, samples=5, max_length=row["max_length"])
    data_generated = {"data": output}
    
    with open(f"{PATH_TOKENS}/{model_name}.json", "w") as fp:
        json.dump(data_generated, fp)
    
    # analyze tokens and save as midi_files
    correct_notes = 0
    incorrect_notes = 0
    for idx, pred in enumerate(output):
        an = analyze_token_sequence(pred, token_flags)
        correct_notes += an["start-pos-pitch-duration"] + an["start-pos-pitch-duration-dtriole"] + an["start-pos-ptriole-pitch-duration"] + an["start-pos-ptriole-pitch-duration-dtriole"]
        incorrect_notes += write_midi(output[idx], token2word, duration_bins, f"{PATH_MIDI}/{model_name}/generated_midi_{idx}.midi")
 
    model_df.at[index,"correct_notes"] = (correct_notes/5).__round__(2)
    model_df.at[index,"incorrect_notes"] = (incorrect_notes/5).__round__(2)
    model_df.at[index,"correct_rate"] = (correct_notes/(correct_notes+incorrect_notes)).__round__(2)

model_df

Unnamed: 0,name,max_length,emb_dim,attention_heads,layers,dropout,learning_rate,epochs,batch_size,ran,runtime,runtime_min,min_loss,at_epoch,incorrect_notes,correct_notes,correct_rate
0,1_short_small_50,256,128,2,3,0.01,0.001,50,4,yes,185.6872,3.09,1.163706,50,0.0,65.8,1.0
1,2_short_medium_50,256,256,4,6,0.01,0.001,50,4,yes,286.7629,4.78,1.251897,24,0.2,70.4,1.0
2,3_short_large_50,256,512,8,12,0.01,0.001,50,4,yes,568.0452,9.47,1.973589,50,0.0,74.6,1.0
3,4_middle_small_50,1024,128,2,3,0.01,0.001,50,4,yes,248.7667,4.15,1.087204,50,0.0,264.8,1.0
4,5_middle_medium_50,1024,256,4,6,0.01,0.001,50,4,yes,628.1603,10.47,1.120312,40,0.2,265.6,1.0
5,6_middle_large_50,1024,512,8,12,0.01,0.001,50,4,yes,2247.7578,37.46,1.87243,48,0.4,289.8,1.0
6,7_long_small_50,2048,128,2,3,0.01,0.001,50,4,yes,491.8461,8.2,1.202734,50,1.6,546.8,1.0
7,8_long_medium_50,2048,256,4,6,0.01,0.001,50,4,yes,1506.3536,25.11,1.154148,37,2.8,526.8,0.99
8,9_long_large_50,2048,512,8,12,0.01,0.001,50,4,yes,5933.9392,98.9,1.284094,50,0.2,552.2,1.0


In [6]:
model_df.to_excel(f"{PATH_MODELS}/model_stats.xlsx")

run "tar chvfz predictions_midi.tar.gz *" in terminal midi folder to create and download zip

In [12]:
######## Andere Variante ########

In [13]:
outputs = model(inputs)
outputs.logits.shape

torch.Size([1, 1, 120])

In [14]:
outputs.logits

tensor([[[ 1.1162, -2.1350, -3.5313, -1.9780, -3.5414, -1.5776, -1.8704,
          -3.5441, -1.1535, -2.6672, -0.8764, -2.2812, -1.6335, -0.8708,
          -2.7905, -1.4720, -2.4843, -0.5825, -2.0775, -2.7744, -1.5724,
          -3.7235, -1.8095, -2.4802, -2.3428, -2.3748, -3.2765, -2.9800,
          -3.1671, -2.6769, -3.6342, -3.3965, -2.9433, -3.3021, -3.6102,
          -3.8760, -3.8078, -2.8941, -0.2965, -3.4592, -0.6409, -3.7882,
          -1.4499, -3.6240, -0.8940, -3.3144, -1.9948, -3.0505, -2.1582,
          -3.2950, -2.2917, -3.4952, -1.5516, -3.7166, -2.3756, -3.3413,
          -2.5331, -3.7758, -2.4212, -3.4614, -2.8825, -3.7335, -2.9825,
          -2.8619, -2.9172, -3.3198, -3.0439, -2.9400, -1.7772, -3.4107,
          -3.0528, -3.1332, -3.2200, -3.2291, -2.9769, -3.1939, -1.9817,
          -2.9937, -2.8389, -3.1996, -2.9903, -2.9885, -3.7754, -3.6936,
          -3.1062, -3.3149, -3.9010, -3.4983, -3.7158, -3.4427, -3.5639,
          -3.5325, -1.8639, -3.7709, -3.4870, -3.22

In [15]:
# Temperature value
temperature = 13

# Convert logits to probabilities using softmax with temperature
probs = F.softmax(outputs.logits / temperature, dim=-1)

# Sample a token from the probability distribution for each position in the sequence
predicted_tokens = torch.multinomial(probs.view(-1, probs.shape[-1]), num_samples=1).view(*probs.shape[:-1])
predicted_tokens

tensor([[24]], device='cuda:0')

In [16]:
tokenizer.decode(predicted_tokens[0], skip_special_tokens=False)

'Note-On_83'