In [25]:
import mido
import os
import string
import matplotlib.pyplot as plt
import numpy as np

from torch.utils.data import Dataset, DataLoader
import torch
from torch import nn, optim

from sklearn.metrics import accuracy_score, precision_score, f1_score, confusion_matrix

In [2]:
class MidiTranscriber():
    def __init__(self):
        pass
    


    def Sequence(self, track):
        result = []
        notes = {}
        time = 0
        final_result = []
        data = [] 
        state = [0] * 128
        tick_counter = 0
        
        tick_interval = 20
        
        for i in range(16):
            data.append(state)
            
        for msg in track:
            time += msg.time
            tick_counter += msg.time
            
            if(tick_counter > tick_interval):
                final_result.append(data)
                tick_counter = 0
            
            if msg.type == 'note_on':
                if msg.velocity != 0:
                    past = notes.get(msg.note)
                    if(past == None):
                        past = []
                    notes[msg.note] = past + [time , 1] 
                    result.append([time, msg.note, msg.velocity,  msg.channel, msg.type])
                    data[msg.channel][msg.note] = msg.velocity
                    
                else:
                    pass
            if msg.type == 'note_off':
                past = notes.get(msg.note)
                if(past == None):
                    past = []
                notes[msg.note] = past + [time , 0]
                result.append([time ,msg.note, msg.velocity ,  msg.channel, msg.type])
                data[msg.channel][msg.note] = 0
                
        return result, data, final_result
        
        
    def BuildArray(self,midi):
        Lenghth = [len(tr) for tr in midi.tracks]
        track_length = max(Lenghth)
        min_track =  track_length/10 #Only accept tracks 1/10 of length.
        all_arys = []
        training = []
        for track in range(len(midi.tracks)):
            out = []
            valid_track = (len(midi.tracks[track]) > min_track)
            if(valid_track):
                track = midi.tracks[track]
                res, data, final_result = self.Sequence(track)
                
                #print(len(final_result))
                
                
                all_arys.append(res)
                training.append(data)
            
        #sums = all_arys.sum(axis=1)
        #ends = np.where(sums > 0)[0]
        return all_arys, data, final_result

        builder = []
        
        

In [3]:
scriber = MidiTranscriber()

In [4]:
def Parser(composer, path):
    midis = os.listdir(path)
    
    pair = []
    
    for file in midis:
        if(file == ".DS_Store"):
            continue 
        midis_path = path + "/" + file
        #print(midis_path)
        
        midi = mido.MidiFile(midis_path)
        #print(len(midi.tracks[0]) + len(midi.tracks[1]) + len(midi.tracks[2]))
        #print(len(midi.tracks[0]))
        arr, data, final_result = scriber.BuildArray(midi)
        
        
        sequence_size = 100
        
        div = len(final_result) / sequence_size
        last = 0
        for i in range(int(div)):

            clip = final_result[last:sequence_size*int((i+1))]
            last = int(sequence_size)*(i+1)
            
            x = clip
            y= composer
 
            pair.append([x,y])
    
            if(not len(x) == 100):
                print(len(x))
        
    return arr, data, final_result, pair
        


In [5]:
dataset = "Composer_Dataset/NN_midi_files_extended/train"
composers = ['bach', 'bartok', 'byrd', 'chopin', 'handel', 'hummel', 'mendelssohn', 'mozart', 'schumann']

pairs = []

for composer in composers:
    path = dataset +"/"+ composer
    print(composer)
    arr, data, final_result,pair = Parser(composer, path)
    #print(len(pair))
    pairs.append(pair)



bach
bartok
byrd
chopin
handel
hummel
mendelssohn
mozart
schumann


In [24]:
dataset_test = "Composer_Dataset/NN_midi_files_extended/test"

test = []

for composer in composers:
    path = dataset_test +"/"+ composer
    print(composer)
    arr, data, final_result,pair = Parser(composer, path)
    #print(len(pair))
    test.append(pair)

bach
bartok
byrd
chopin
handel
hummel
mendelssohn
mozart
schumann


In [6]:
data = np.array(data)
data.shape

(16, 128)

In [7]:
final_result = np.array(final_result)

In [8]:
final_result.shape

(1736, 16, 128)

In [9]:
pair = []
for i in pairs:
    for j in i:
        pair.append(j)

In [10]:
len(pair)

5598

In [26]:
# testing
test_pair = []
for i in test:
    for j in i:
        test_pair.append(j)

In [11]:
test = np.array(pair)

  test = np.array(pair)


In [31]:
class Dataset(Dataset):
     
    def enum(self,y):
        composers = ["bach", "bartok", "byrd", "chopin", "handel", "hummel", "mendelssohn","mozart", "schumann"]
        #print(y)
        for i in range(len(composers)):
            if composers[i] == y:
                slot = i
        out = [0] * 9
        out[slot] = 1
        
        return out
        
    def __init__(self, pair):
        self.pair = pair
    def __len__(self):
        return len(self.pair)
    
    def __getitem__(self,idx):
        
        x = torch.tensor(self.pair[idx][0])
        y = torch.tensor(self.enum(self.pair[idx][1]))
        return x,y
        

In [13]:
train_data = Dataset(pair)

In [32]:
test_data = Dataset(test)

In [14]:
train_data[1000][0].shape

torch.Size([100, 16, 128])

In [15]:
for i in range(100):
    train_data

In [16]:
train_dataloader = DataLoader(train_data,batch_size=3,shuffle=True)

In [33]:
test_dataloader = DataLoader(test_data, batch_size=3, shuffle=False)

In [17]:
len(train_dataloader)

1866

### Model Creation
___

In [18]:
# Model
# class CNN(nn.Module):
#     # Constructor
#     def __init__(self):
#         super(CNN, self).__init__()
#         # Self initialize the Convolution, Pooling, and Fully Connected Layer
#         self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
#         self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
#         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
#         self.fc1 = nn.Linear(16 * 128 , 128)
#         self.fc2 = nn.Linear(128, len(train_dataloader))

#     def forward(self, x):
#         # Normalize, Pool, Flatten to 1D and then create Fully Connected Layer
#         x = self.pool(nn.functional.relu(self.conv1(x)))
#         x = self.pool(nn.functional.relu(self.conv2(x)))
#         x = torch.flatten(x, 1)
#         x = nn.functional.relu(self.fc1(x))
#         x = self.fc2(x)

#         return nn.functional.log_softmax(x, dim=1)

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(100, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 2 * 64, 128) 
        self.fc2 = nn.Linear(128, 9)  

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return nn.functional.log_softmax(x, dim=1)

# Creating the Long Short Term Memory model class
class LSTM(nn.Module):
    # create the constructor of our RNN
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM,self).__init__()
        # self initialize the hidden size LSTM, and fully connected layer
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self,x):
        # Initialize the hidden state and cell state
        hidden_state = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        cell_state = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)

        # grabbing the ouput of the hidden state
        output_state, _ = self.lstm(x, (hidden_state, cell_state))

        # pass output state into through the fully connected layer
        output_state = self.fc(output_state[:, -1, :])

        return output_state


In [19]:
# Create model object
model = CNN() 

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)

In [23]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train() 
    running_loss = 0.0
    for inputs, labels in train_dataloader:
        # Zero the parameter gradients
        optimizer.zero_grad()

        inputs = inputs.to(torch.float)
        labels = labels.to(torch.float)

        # Forward pass
        outputs = model(inputs)

        # Reshpae the label tensor to 1D
        #labels = labels.view(-1)
        #print("Outputs:",outputs.shape)
        #print("Labels:", labels.shape)
        
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print the average loss for the epoch
    epoch_loss = running_loss / len(train_dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f}")

print("Training complete!")

Epoch [1/10] Loss: 5.4232
Epoch [2/10] Loss: 5.4219
Epoch [3/10] Loss: 5.4205
Epoch [4/10] Loss: 5.4197
Epoch [5/10] Loss: 5.4193
Epoch [6/10] Loss: 5.4190
Epoch [7/10] Loss: 5.4188
Epoch [8/10] Loss: 5.4187
Epoch [9/10] Loss: 5.4185
Epoch [10/10] Loss: 5.4184
Training complete!


In [34]:
# Evaluating the model
model.eval()

prediction_list = list()
labels_list = list()

# Disable gradient computation to save memory
with torch.no_grad():
    for inputs, labels in test_dataloader:
        # Forward pass
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)

        prediction_list.extend(predicted.numpy())
        labels_list.extend(labels.numpy())

# Finding accuracy, precision, f1 score and confusion matrix
accuracy = accuracy_score(labels_list, prediction_list)
precision = precision_score(labels_list, prediction_list, average='weighted')
f1_score = f1_score(labels_list, prediction_list, average='weighted')
confusion_mtrx = confusion_matrix(labels_list, prediction_list)

TypeError: new(): invalid data type 'str'