In [44]:
import mido
import os
import string
import matplotlib.pyplot as plt
import numpy as np

from torch.utils.data import Dataset, DataLoader
import torch
from torch import nn, optim

from sklearn.metrics import accuracy_score, precision_score, f1_score, confusion_matrix

In [122]:
class MidiTranscriber():
    def __init__(self):
        pass
    


    def Sequence(self, track):
        result = []
        notes = {}
        time = 0
        final_result = []
        data = [] 
        state = [0] * 128
        tick_counter = 0
        
        tick_interval = 25
        
        for i in range(16):
            data.append(state)
            
        for msg in track:
            time += msg.time
            tick_counter += msg.time
            
            if(tick_counter > tick_interval):
                final_result.append(data)
                tick_counter = 0
            
            if msg.type == 'note_on':
                if msg.velocity != 0:
                    past = notes.get(msg.note)
                    if(past == None):
                        past = []
                    notes[msg.note] = past + [time , 1] 
                    result.append([time, msg.note, msg.velocity,  msg.channel, msg.type])
                    data[msg.channel][msg.note] = msg.velocity
                    
                else:
                    pass
            if msg.type == 'note_off':
                past = notes.get(msg.note)
                if(past == None):
                    past = []
                notes[msg.note] = past + [time , 0]
                result.append([time ,msg.note, msg.velocity ,  msg.channel, msg.type])
                data[msg.channel][msg.note] = 0
                
        return result, data, final_result
        
        
    def BuildArray(self,midi):
        Lenghth = [len(tr) for tr in midi.tracks]
        track_length = max(Lenghth)
        min_track =  track_length/10 #Only accept tracks 1/10 of length.
        all_arys = []
        training = []
        for track in range(len(midi.tracks)):
            out = []
            valid_track = (len(midi.tracks[track]) > min_track)
            if(valid_track):
                track = midi.tracks[track]
                res, data, final_result = self.Sequence(track)
                
                #print(len(final_result))
                
                
                all_arys.append(res)

        return all_arys, data, final_result

        builder = []
        
        

In [123]:
scriber = MidiTranscriber()

In [124]:
def Parser(composer, path):
    midis = os.listdir(path)
    
    pair = []
    
    for file in midis:
        if(file == ".DS_Store"):
            continue 
        midis_path = path + "/" + file
        #print(midis_path)
        
        midi = mido.MidiFile(midis_path)
        #print(len(midi.tracks[0]) + len(midi.tracks[1]) + len(midi.tracks[2]))
        #print(len(midi.tracks[0]))
        arr, data, final_result = scriber.BuildArray(midi)
        
        
        sequence_size = 1000
        
        div = len(final_result) / sequence_size
        last = 0
        for i in range(int(div)):

            clip = final_result[last:sequence_size*int((i+1))]
            last = int(sequence_size)*(i+1)
            
            x = clip
            y= composer
 
            pair.append([x,y])
    
            #if(not len(x) == 100):
             #   print(len(x))
        
    return arr, data, final_result, pair
        


In [125]:
dataset = "Composer_Dataset/NN_midi_files_extended/train"
composers = ['bach', 'bartok', 'byrd', 'chopin', 'handel', 'hummel', 'mendelssohn', 'mozart', 'schumann']

pairs = []

for composer in composers:
    path = dataset +"/"+ composer
    print(composer)
    arr, data, final_result,pair = Parser(composer, path)
    #print(len(pair))
    pairs.append(pair)



bach
bartok
byrd
chopin
handel
hummel
mendelssohn
mozart
schumann


In [126]:
dataset_test = "Composer_Dataset/NN_midi_files_extended/test"

test = []

for composer in composers:
    path = dataset_test +"/"+ composer
    print(composer)
    arr, data, final_result,pair = Parser(composer, path)
    #print(len(pair))
    test.append(pair)

bach
bartok
byrd
chopin
handel
hummel
mendelssohn
mozart
schumann


In [127]:
data = np.array(data)
data.shape

(16, 128)

In [128]:
final_result = np.array(final_result)

In [129]:
final_result.shape

(2726, 16, 128)

In [130]:
pair = []
for i in pairs:
    for j in i:
        pair.append(j)

In [131]:
len(pair)

372

In [132]:
# testing
test_pair = []
for i in test:
    for j in i:
        test_pair.append(j)

In [148]:
class Dataset(Dataset):
     
    def enum(self,y):
        composers = ["bach", "bartok", "byrd", "chopin", "handel", "hummel", "mendelssohn","mozart", "schumann"]
        #print(y)
        for i in range(len(composers)):
            if composers[i] == y:
                slot = i
        out = [0] * 9
        out[slot] = 1
        
        return out
        
    def __init__(self, pair):
        self.pair = pair
    def __len__(self):
        return len(self.pair)
    
    def __getitem__(self,idx):
        
        x = torch.tensor(self.pair[idx][0])
        x = torch.swapaxes(x, 0, 1)
        y = torch.tensor(self.enum(self.pair[idx][1]))
        return x,y
        

In [149]:
train_data = Dataset(pair)

In [150]:
test_data = Dataset(test_pair)

In [151]:
len(train_data)

372

In [152]:
train_data[100][0].shape

torch.Size([16, 1000, 128])

In [153]:
len(test_data)

40

In [154]:
for i in range(100):
    train_data

In [155]:
train_dataloader = DataLoader(train_data,batch_size=3,shuffle=True)

In [156]:
test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)

In [157]:
len(train_dataloader)

124

### Model Creation
___

In [206]:
# Model
# class CNN(nn.Module):
#     # Constructor
#     def __init__(self):
#         super(CNN, self).__init__()
#         # Self initialize the Convolution, Pooling, and Fully Connected Layer
#         self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
#         self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
#         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
#         self.fc1 = nn.Linear(16 * 128 , 128)
#         self.fc2 = nn.Linear(128, len(train_dataloader))

#     def forward(self, x):
#         # Normalize, Pool, Flatten to 1D and then create Fully Connected Layer
#         x = self.pool(nn.functional.relu(self.conv1(x)))
#         x = self.pool(nn.functional.relu(self.conv2(x)))
#         x = torch.flatten(x, 1)
#         x = nn.functional.relu(self.fc1(x))
#         x = self.fc2(x)

#         return nn.functional.log_softmax(x, dim=1)

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 1, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.LSTM= nn.LSTM(32, 10, 4, batch_first=True)
                           
        self.fc1 = nn.Linear(32000, 128) 
        self.fc3 = nn.Linear(128, 9)  

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        #print(x.shape)         

        
        print(x.shape)
        x =  x[:,0,:,:]
        print(x.shape)
        h0 = torch.zeros(1, x.size(0), 10)
        c0 = torch.zeros(1, x.size(0), 10)
        x, _ = self.LSTM(x, (h0, c0))               
                                       
        print(hidden_state, cell_state)
        
        x = torch.flatten(x, 1)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc3(x)
        return nn.functional.sigmoid(x)


In [207]:
# Create model object
model = CNN() 

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adagrad(model.parameters(),lr=0.001)

In [208]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train() 
    running_loss = 0.0
    for inputs, labels in train_dataloader:
        # Zero the parameter gradients
        optimizer.zero_grad()

        inputs = inputs.to(torch.float)
        labels = labels.to(torch.float)

        # Forward pass
        outputs = model(inputs)

        # Reshpae the label tensor to 1D
        #labels = labels.view(-1)
        #print("Outputs:",outputs.shape)
        #print("Labels:", labels.shape)
        
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print the average loss for the epoch
    epoch_loss = running_loss / len(train_dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f}")

print("Training complete!")

torch.Size([3, 1, 250, 32])
torch.Size([3, 250, 32])


RuntimeError: Expected hidden[0] size (4, 3, 10), got [1, 3, 10]

In [202]:
# Evaluating the model
model.eval()

prediction_list = list()
labels_list = list()

real = []
pred = []

# Disable gradient computation to save memory
with torch.no_grad():
    for inputs, labels in test_dataloader:
        # Forward pass
        outputs = model(inputs.float())
        _, predicted = torch.max(outputs.data, 1)
        
        print(outputs)
        
        outputs = outputs.tolist()
        labels = labels.tolist()
        
        pred_labal = outputs[0].index(max(outputs[0]))
        
        
        print(pred_labal)
        
        print(labels)
        
        real_label = labels[0].index(max(labels[0]))
        
        real.append(real_label)
        pred.append(pred_labal)
        
        


torch.Size([1, 1, 250, 32])
torch.Size([1, 250, 32])


RuntimeError: Expected hidden[0] size (4, 1, 10), got [1, 1, 10]

In [167]:
real

[0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 7,
 8,
 8,
 8,
 8,
 8,
 8]

In [168]:
pred

[8,
 8,
 8,
 8,
 8,
 8,
 0,
 0,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 8,
 8,
 0,
 0,
 0,
 0,
 8,
 8]

In [171]:
print("The model had a accuracy_score socre of : ", round(accuracy_score(pred, real),4))


The model had a accuracy_score socre of :  0.05
