In [4]:
import pandas as pd
import pretty_midi
import numpy as np
from sklearn.preprocessing import MinMaxScaler,OneHotEncoder

In [5]:
meta=pd.read_csv('commu_meta.csv')

In [6]:
meta.head(10)

Unnamed: 0.1,Unnamed: 0,audio_key,chord_progressions,pitch_range,num_measures,bpm,genre,track_role,inst,sample_rhythm,time_signature,min_velocity,max_velocity,split_data,id
0,0,aminor,"[['Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'A...",mid,8,120,cinematic,main_melody,string_ensemble,standard,4/4,101,102,train,commu00001
1,1,cmajor,"[['C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'Dm'...",mid_low,8,80,newage,accompaniment,acoustic_piano,standard,4/4,23,30,train,commu00002
2,2,aminor,"[['Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'A...",mid_high,8,150,cinematic,riff,string_violin,standard,4/4,123,127,train,commu00003
3,3,cmajor,"[['C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'Gm'...",mid,8,110,cinematic,pad,choir,standard,4/4,45,46,train,commu00004
4,4,aminor,"[['Am', 'Am', 'Am', 'Am', 'Em', 'Em', 'Em', 'E...",mid_low,4,60,cinematic,pad,acoustic_piano,standard,4/4,21,22,train,commu00005
5,5,aminor,"[['Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'A...",mid_high,8,120,cinematic,riff,string_ensemble,standard,4/4,121,122,train,commu00006
6,6,aminor,"[['Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'A...",mid_low,8,110,cinematic,accompaniment,string_ensemble,standard,4/4,87,96,train,commu00007
7,7,cmajor,"[['C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'Dm'...",mid,4,120,cinematic,sub_melody,brass_ensemble,standard,4/4,68,92,train,commu00008
8,8,cmajor,"[['C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'Dm'...",mid_high,8,120,cinematic,riff,string_violin,standard,4/4,117,126,train,commu00009
9,9,aminor,"[['Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'A...",mid_low,8,150,cinematic,accompaniment,string_ensemble,standard,4/4,113,122,train,commu00010


In [7]:
MAJOR_KEY = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
MINOR_KEY = [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]

KEY_MAP = {
    "cmajor": 0,
    "c#major": 1,
    "dbmajor": 1,
    "dmajor": 2,
    "d#major": 3,
    "ebmajor": 3,
    "emajor": 4,
    "fmajor": 5,
    "f#major": 6,
    "gbmajor": 6,
    "gmajor": 7,
    "g#major": 8,
    "abmajor": 8,
    "amajor": 9,
    "a#major": 10,
    "bbmajor": 10,
    "bmajor": 11,
    "cminor": 12,
    "c#minor": 13,
    "dbminor": 13,
    "dminor": 14,
    "d#minor": 15,
    "ebminor": 15,
    "eminor": 16,
    "fminor": 17,
    "f#minor": 18,
    "gbminor": 18,
    "gminor": 19,
    "g#minor": 20,
    "abminor": 20,
    "aminor": 21,
    "a#minor": 22,
    "bbminor": 22,
    "bminor": 23,
}

TIME_SIG_MAP = {
    "4/4": 0,
    "3/4": 1,
    "6/8": 2,
    "12/8": 3,
}
PITCH_RANGE_MAP = {
    "very_low": 0,
    "low": 1,
    "mid_low": 2,
    "mid": 3,
    "mid_high": 4,
    "high": 5,
    "very_high": 6,
}
INST_MAP = {
    "accordion": 1,
    "acoustic_bass": 3,
    "acoustic_guitar": 3,
    "acoustic_piano": 0,
    "banjo": 3,
    "bassoon": 5,
    "bell": 2,
    "brass_ensemble": 5,
    "celesta": 2,
    "choir": 7,
    "clarinet": 5,
    "drums_full": 6,
    "drums_tops": 6,
    "electric_bass": 3,
    "electric_guitar_clean": 3,
    "electric_guitar_distortion": 3,
    "electric_piano": 0,
    "fiddle": 4,
    "flute": 5,
    "glockenspiel": 2,
    "harp": 3,
    "harpsichord": 0,
    "horn": 5,
    "keyboard": 0,
    "mandolin": 3,
    "marimba": 2,
    "nylon_guitar": 3,
    "oboe": 5,
    "organ": 0,
    "oud": 3,
    "pad_synth": 4,
    "percussion": 6,
    "recorder": 5,
    "sitar": 3,
    "string_cello": 4,
    "string_double_bass": 4,
    "string_ensemble": 4,
    "string_viola": 4,
    "string_violin": 4,
    "synth_bass": 3,
    "synth_bass_808": 3,
    "synth_bass_wobble": 3,
    "synth_bell": 2,
    "synth_lead": 1,
    "synth_pad": 4,
    "synth_pluck": 7,
    "synth_voice": 7,
    "timpani": 6,
    "trombone": 5,
    "trumpet": 5,
    "tuba": 5,
    "ukulele": 3,
    "vibraphone": 2,
    "whistle": 7,
    "xylophone": 2,
    "zither": 3,
    "orgel": 2,
    "synth_brass": 5,
    "sax": 5,
    "bamboo_flute": 5,
    "yanggeum": 3,
    "vocal": 8,
}
GENRE_MAP = {
    "newage": 0,
    "cinematic": 1,
}
TRACK_ROLE_MAP = {
    "main_melody": 0,
    "sub_melody": 1,
    "accompaniment": 2,
    "bass": 3,
    "pad": 4,
    "riff": 5,
}
RHYTHM_MAP = {
    "standard": 0,
    "triplet": 1,
}


In [8]:
meta=meta.drop(meta.columns[[0]],axis=1)

In [9]:
scaler=MinMaxScaler()
meta[['num_measures','bpm','min_velocity','max_velocity']]=scaler.fit_transform(meta[['num_measures','bpm','min_velocity','max_velocity']])

In [10]:
def map_instruments(instrument_data):
    for key in INST_MAP:
        if key in instrument_data:
            return INST_MAP[key]  
meta['inst'] = meta['inst'].apply(map_instruments)

def map_time_sig(time_data):
    for key in TIME_SIG_MAP:
        if key in time_data:
            return TIME_SIG_MAP[key]
meta['time_signature'] = meta['time_signature'].apply(map_time_sig)

def map_track_role(track_role):
    for key in TRACK_ROLE_MAP:
        if key in track_role:
            return TRACK_ROLE_MAP[key]
meta['track_role'] = meta['track_role'].apply(map_track_role)

def map_genre(genre):
    for key in GENRE_MAP:
        if key in genre:
            return GENRE_MAP[key]
meta['genre'] = meta['genre'].apply(map_genre)

def map_pitch_range(pitch_range):
    for key in PITCH_RANGE_MAP:
        if key in pitch_range:
            return PITCH_RANGE_MAP[key]
meta['pitch_range'] = meta['pitch_range'].apply(map_pitch_range)

def map_rythm(rythm):
    for key in RHYTHM_MAP:
        if key in rythm:
            return RHYTHM_MAP[key]
meta['sample_rhythm'] = meta['sample_rhythm'].apply(map_rythm)

def map_audio_key(audio_key):
    for key in KEY_MAP:
        if key in audio_key:
            return KEY_MAP[key]
meta['audio_key'] = meta['audio_key'].apply(map_audio_key)

In [11]:
event_to_token = {'note_on': 1, 'note_off': 2, 'metadata_delimiter': 3} 

In [20]:
combined_data=[]
for idx, row in meta.iterrows():
    try:
        midi_path = f"commu_midi/commu_midi/train/raw/{row['id']}.mid"
        midi_data = pretty_midi.PrettyMIDI(midi_path)
    
        tokenized_midi = []
        for inst in midi_data.instruments:
            for note in inst.notes:
                tokenized_midi.append(event_to_token['note_on'])
                tokenized_midi.append(note.pitch)
                tokenized_midi.append(event_to_token['note_off'])
                tokenized_midi.append(note.pitch)
    
        combined_sequence = np.concatenate([row.values, [event_to_token['metadata_delimiter']], tokenized_midi])
        combined_data.append(combined_sequence)
    except:
        pass

In [21]:
input_data = []
target_data = []

sequence_length = 100

for seq in combined_data:
    for i in range(0, len(seq) - sequence_length):
        input_data.append(seq[i:i + sequence_length])
        target_data.append(seq[i + 1:i + sequence_length + 1])

input_data = np.array(input_data)
target_data = np.array(target_data)

In [22]:
input_data

array([[21,
        "[['Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'Dm', 'Dm', 'Dm', 'Dm', 'Dm', 'Dm', 'Dm', 'Dm', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D']]",
        3, ..., 2, 60, 1],
       ["[['Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'Dm', 'Dm', 'Dm', 'Dm', 'Dm', 'Dm', 'Dm', 'Dm', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'Am', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D']]",
        3, 0.3076923076923077, ..., 60, 1, 72],
       [3, 0.3076923076923077, 0.7083333333333333, ..., 1, 72, 2],
       ...,
       [72, 2, 72, ..., 2, 72, 1],
       [2, 72, 1, ..., 72, 1, 76],
       [72, 1, 57, ..., 1, 76, 2]], dtype=object

In [28]:
import torch
import torch.nn as nn
from torch.nn import TransformerXL

class MusicTransformer(nn.Module):
    def __init__(self, n_token, d_model, n_head, d_head, d_inner, n_layer, dropout):
        super(MusicTransformer, self).__init__()
        
        self.transformer = TransformerXL(
            n_token, d_model, n_head, d_head, d_inner, n_layer, dropout)
        
        self.decoder = nn.Linear(d_model, n_token)
        
        self.init_weights()
        
    def init_weights(self):
        initrange = 0.1
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, input):
        output = self.transformer(input)
        output = self.decoder(output[-1])
        return output

ImportError: cannot import name 'TransformerXL' from 'torch.nn' (C:\Users\zayed\anaconda3\lib\site-packages\torch\nn\__init__.py)

In [27]:
dataset = MusicDataset(input_data, target_data)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

NameError: name 'MusicDataset' is not defined