# Import packages

In [1]:
import numpy as np
import os, sys, time, datetime, pickle, copy, random
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from midi2seq import piano2seq, random_piano, process_midi_seq
from torch.utils.data import DataLoader, TensorDataset
from model_base import * # ComposerBase, CriticBase

# Task 1

In [2]:
"""
#   (''Class "Critic" should be a subclass of the class CriticBase. You must use the exact class name.) 
#   You should implement a multi-layer (2 or 3 layers) LSTM model in this class. 
#   The Model (the score function) takes a sequence of envents as input and outputs a score judging 
#   whether the piano music corresponding to the sequence is good music or bad music. 
#   A function to generate random music is provided in the "midi2seq.py". 
#   Use the function to create a collection of random piano plays as examples of bad music. 
#   Use the piano plays in the downloaded data as example of good music. 
#   (You don't need to use all the downloaded data. A sufficiently large subset will be enough.) 
#   Train the model in this class using both the good and the bad examples.
"""

'\n#   (\'\'Class "Critic" should be a subclass of the class CriticBase. You must use the exact class name.) \n#   You should implement a multi-layer (2 or 3 layers) LSTM model in this class. \n#   The Model (the score function) takes a sequence of envents as input and outputs a score judging \n#   whether the piano music corresponding to the sequence is good music or bad music. \n#   A function to generate random music is provided in the "midi2seq.py". \n#   Use the function to create a collection of random piano plays as examples of bad music. \n#   Use the piano plays in the downloaded data as example of good music. \n#   (You don\'t need to use all the downloaded data. A sufficiently large subset will be enough.) \n#   Train the model in this class using both the good and the bad examples.\n'

### Training Data

### 1. Good piano music

In [3]:
# Test midi to sequence
seq01 = piano2seq('./maestro-v1.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi')
seq01.shape

(40777,)

In [4]:
seq01

array([355, 265, 368, ..., 259, 256, 190], dtype=int32)

In [5]:
seq02 = piano2seq('./maestro-v1.0.0/2004/MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_04_Track04_wav.midi')
print(f"Shape: {seq02.shape}")
seq02

Shape: (32208,)


array([355, 258, 256, ..., 184, 256, 161], dtype=int32)

In [6]:
seq03 = piano2seq('./maestro-v1.0.0/2004/MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_02_Track02_wav.midi')
print(f"Shape: {seq03.shape}")
seq03

Shape: (17977,)


array([315, 256, 375, ..., 212, 188, 185], dtype=int32)

In [7]:
good_music = process_midi_seq(all_midis=None, datadir='.', n=10000, maxlen=100)
print(f"Shape: {good_music.shape}")

Shape: (10022, 101)


In [8]:
good_music

array([[257, 354, 256, ..., 256, 363,  56],
       [256, 368,  67, ..., 256, 197, 357],
       [160, 256, 196, ..., 257, 256, 162],
       ...,
       [371,  63, 370, ...,  82, 375,  75],
       [ 39, 372,  51, ...,  39, 374,  46],
       [ 75, 256, 373, ..., 256, 357,  90]], dtype=int32)

### 2. Bad music

In [9]:
random_piano

<function midi2seq.random_piano(n=100)>

In [10]:
import glob

all_midis = glob.glob('./maestro-v1.0.0/**/*.midi')
len(all_midis)

1184

In [11]:
# generate 100 random piano music
bad_music_midi = []
for i in range(len(all_midis)):
    bad_music_midi.append(random_piano(n = 100))

In [12]:
bad_music = process_midi_seq(all_midis=bad_music_midi, datadir='.', n=10000, maxlen=100)
print(f"Shape: {bad_music.shape}")

Shape: (10007, 101)


In [13]:
bad_music

array([[257, 305, 256, ..., 256, 160, 273],
       [229, 253, 267, ..., 256, 360,  92],
       [256, 360,  49, ..., 169, 258, 256],
       ...,
       [206, 297, 256, ..., 223, 264, 233],
       [ 10, 261, 256, ..., 268, 256, 368],
       [233, 279, 256, ..., 275, 256, 186]], dtype=int32)

### train model

In [14]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [15]:
# inputs
good_music = torch.tensor(good_music, dtype=torch.float32)
bad_music = torch.tensor(bad_music, dtype=torch.float32)
# labels, good music as 1 and bad music as 0
good_labels = torch.ones((len(good_music), 1))
bad_labels = torch.zeros((len(bad_music), 1))
# combine good and bad music
all_data = torch.cat([good_music, bad_music], dim=0)
all_labels = torch.cat([good_labels, bad_labels], dim=0)


In [16]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [17]:
features_train, features_test, label_train, label_test = train_test_split(all_data, all_labels, test_size=0.2, random_state=42, shuffle=True) # split data

scaler = MinMaxScaler() # normalize data
features_train = scaler.fit_transform(features_train)
features_test = scaler.fit_transform(features_test)

print(f"Train shape: {features_train.shape, label_train.shape}")
print(f"Test shape: {features_test.shape, label_test.shape}")

Train shape: ((16023, 101), torch.Size([16023, 1]))
Test shape: ((4006, 101), torch.Size([4006, 1]))


In [20]:
train_dataset = TensorDataset(torch.Tensor(features_train), torch.Tensor(label_train)) # convert to dataset
test_dataset = TensorDataset(torch.Tensor(features_test), torch.Tensor(label_test)) # convert to dataset


In [28]:
train_dataset[0]

(tensor([0.6755, 0.9868, 0.2243, 0.4855, 0.6772, 0.5608, 0.5172, 0.6852, 0.6772,
         0.5620, 0.6887, 0.6737, 0.9894, 0.2354, 0.6903, 0.9735, 0.1530, 0.9709,
         0.1852, 0.5092, 0.6772, 0.6772, 0.5224, 0.6799, 0.9921, 0.2480, 0.4908,
         0.6789, 0.6772, 0.5741, 0.6947, 0.9868, 0.2540, 0.6807, 0.6772, 0.5858,
         0.6984, 0.9578, 0.1296, 0.6755, 0.9841, 0.2566, 0.5910, 0.6825, 0.6755,
         0.4683, 0.6825, 0.6719, 0.5952, 0.6958, 0.9789, 0.2672, 0.6755, 0.6042,
         0.7222, 0.6755, 0.9657, 0.0952, 0.6799, 0.9604, 0.1266, 0.6868, 0.6772,
         0.4339, 0.6745, 0.4656, 0.7467, 0.6772, 0.9657, 0.1583, 0.6745, 0.9762,
         0.1715, 0.6772, 0.9815, 0.1900, 0.4960, 0.9684, 0.1504, 0.9709, 0.1263,
         0.5092, 0.6772, 0.4881, 0.4644, 0.6772, 0.5291, 0.7704, 0.6719, 0.9789,
         0.1693, 0.6825, 0.6755, 0.5079, 0.6984, 0.2037, 0.9761, 0.1706, 0.6825,
         0.6772, 0.5106]),
 tensor([1.]))

In [None]:
# convert to dataset
dataset = TensorDataset(all_data, all_labels)
dataloader = DataLoader(dataset, shuffle=True, batch_size=32)

In [18]:
for batch_idx, (data, target) in enumerate(dataloader):
    print(f"Batch: {batch_idx}, Data: {data.shape}, Target: {target.shape}")

Batch: 0, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 1, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 2, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 3, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 4, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 5, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 6, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 7, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 8, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 9, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 10, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 11, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 12, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 13, Data: torch.Size([32, 101]), Target: torch.Size([32, 1])
Batch: 14, Data: torch.Size([32, 101]), Target: torch.Size

In [57]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch
from torch.autograd import Variable

from torch.utils.data import DataLoader, TensorDataset
from model_base import CriticBase

class Critic(nn.Module, CriticBase):
    def __init__(self, seq_length, num_classes=1, input_size=51, hidden_size=2, num_layers=1):
        super(Critic, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        self.seq_length = seq_length #sequence length

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm
        self.fc_1 =  nn.Linear(hidden_size, 128) #fully connected 1
        self.fc = nn.Linear(128, num_classes) #fully connected last layer

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self,x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
        # Propagate input through LSTM
        output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
        hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
        out = self.relu(hn)
        out = self.fc_1(out) #first Dense
        out = self.sigmoid(out) #relu
        out = self.fc(out) #Final Output
        return out


    def score(self,x):
        return(self.forward(x))

    def train_model(self, good_music_data, bad_music_data, epochs=10, lr=0.001):
        device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))
        self.to(device)

        criterion = nn.BCELoss()
        optimizer = optim.Adam(self.parameters(), lr=lr)

        # Label good music as 1 and bad music as 0
        good_labels = torch.ones((len(good_music_data), 1))
        bad_labels = torch.zeros((len(bad_music_data), 1))

        all_data = torch.cat([good_music_data, bad_music_data], dim=0)
        all_labels = torch.cat([good_labels, bad_labels], dim=0)

        dataset = TensorDataset(all_data, all_labels)
        dataloader = DataLoader(dataset, shuffle=True, batch_size=32)

        print("Start training")
        running_loss = 0.0
        for epoch in range(epochs):
            for batch_idx, (data, target) in enumerate(dataloader):
                data, target = data.to(device), target.to(device)

                optimizer.zero_grad()
                outputs = self.forward(data)
                loss = criterion(outputs, target)
                loss.backward()
                optimizer.step()

            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")
        print("Finished training")


In [58]:
model = Critic(seq_length=all_data.shape[1], hidden_size=128, num_layers=3, num_classes=1)

model.train_model(good_music_data=good_music, bad_music_data=bad_music, epochs=10, lr=0.001)

Start training


RuntimeError: For unbatched 2-D input, hx and cx should also be 2-D but got (3-D, 3-D) tensors

In [47]:
import torch.nn.functional as F

class Critic(nn.Module, CriticBase):
    def __init__(self, seq_length, hidden_size, num_layers=3, n_classes=1):
        super(Critic, self).__init__()
        self.seq_length = seq_length
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = n_classes
        self.lstm = nn.LSTM(input_size = self.seq_length, hidden_size = self.hidden_size, num_layers = self.num_layers, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, self.hidden_size)
        #self.fc2 = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x):
        # Initial state with shape: (batch_size, num_hiddens)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()

        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()

        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

        print(f"out: {out.shape}")
        
        out = self.fc(out[:, -1, :]) 

        return F.sigmoid(out)           # (batch_size, output_size)         
    
    def score(self,x):
        return(self.forward(x))

    def train_model(self, good_music_data, bad_music_data, epochs=10, lr=0.001):
        device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))
        self.to(device)

        criterion = nn.BCELoss()
        optimizer = optim.Adam(self.parameters(), lr=lr)

        # Label good music as 1 and bad music as 0
        good_labels = torch.ones((len(good_music_data), 1))
        bad_labels = torch.zeros((len(bad_music_data), 1))

        all_data = torch.cat([good_music_data, bad_music_data], dim=0)
        all_labels = torch.cat([good_labels, bad_labels], dim=0)

        dataset = TensorDataset(all_data, all_labels)
        dataloader = DataLoader(dataset, shuffle=True, batch_size=32)

        print("Start training")
        running_loss = 0.0
        for epoch in range(epochs):
            for batch_idx, (data, target) in enumerate(dataloader):
                data, target = data.to(device), target.to(device)

                optimizer.zero_grad()
                outputs = self.forward(data)
                loss = criterion(outputs, target)
                loss.backward()
                optimizer.step()

            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")
        print("Finished training")
        
        torch.save(self.state_dict(), 'critic.pth')

In [54]:
model = Critic(seq_length=all_data.shape[1], hidden_size=128, num_layers=3, n_classes=1)

model.train_model(good_music_data=good_music, bad_music_data=bad_music, epochs=10, lr=0.001)

TypeError: __init__() got an unexpected keyword argument 'n_classes'

# Task 2

In [33]:
##*************************Task 2*********************#
#   (Class "Composer" should be a subclass of the class ComposerBase. You must use the exact class name.) 
#   You should implement a multi-layer (2 or 3 layers) LSTM model in this class. When the compose member 
#   function is called, it should return a sequence of events. Randomness is require in the implementation 
#   of the compose function such that each call to the function should generate a different sequence. 
#   The function "seq2piano" in "midi2seq.py" can be used to convert the sequence into a midi object, 
#   which can be written to a midi file and played on a computer. Train the model as a language model 
#   (autoregression) using the downloaded piano plays.

In [39]:
class Composer(nn.Module, ComposerBase):
    def __init__(self, seq_length, hidden_size, num_layers=3, n_classes=1):
        super(Composer, self).__init__()
        self.seq_length = seq_length # seq_length
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = n_classes

        self.lstm = nn.LSTM(input_size = self.seq_length, hidden_size = self.hidden_size, num_layers = self.num_layers, batch_first=True)
        self.fc1 = nn.Linear(self.hidden_size, self.hidden_size)
        self.fc2 = nn.Linear(self.hidden_size, self.n_classes)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = F.relu(out)
        out = self.fc1(out)
        out = F.relu(out)
        out = self.fc2(out)
        return out
    
    def compose(self, seq_length=100):
        device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))
        self.to(device)
        self.eval()

        # Generate random input
        input = torch.randint(0, 128, (1, seq_length, 1)).to(device)

        # Generate output
        output = self.forward(input)

        # Convert output to piano sequence
        output = output.cpu().detach().numpy()
        output = np.argmax(output, axis=2)
        output = output.reshape(-1)
        output = output.tolist()

        return output
    
    def train_model(self, good_music_data, epochs=10, lr=0.001):
        device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))
        self.to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.parameters(), lr=lr)

        dataset = TensorDataset(good_music_data)
        dataloader = DataLoader(dataset, shuffle=True, batch_size=32)

        print("Start training")
        running_loss = 0.0
        for epoch in range(epochs):
            for batch_idx, data in enumerate(dataloader):
                data = data.to(device)

                optimizer.zero_grad()
                outputs = self.forward(data)
                loss = criterion(outputs, data)
                loss.backward()
                optimizer.step()

            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")
        print("Finished training")

        torch.save(self.state_dict(), 'composer.pth')

In [40]:
composer = Composer(seq_length=all_data.shape[1], hidden_size=128, num_layers=3, n_classes=128)

AttributeError: 'Composer' object has no attribute 'n_classes'