<a href="https://colab.research.google.com/github/Rishikeshvishnu/project-SSY340/blob/xiaoying/Pure_Draft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch tensorflow pretty_midi mido

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
import torch
import numpy as np
import pretty_midi
import mido
import glob
from random import randint


In [7]:
# Preprocessing Code

def sample_data(seqs, lth, factor=6):
    """
    Randomly samples sequences of length ~lth from an input set of sequences.
    """
    data = []
    for seq in seqs:
        length = randint(lth - lth // factor, lth + lth // factor)
        idx = randint(0, max(0, len(seq) - length))
        data.append(seq[idx:idx+length])
    return data

def preprocess_midi_files(source_dir, length):
    """
    Preprocess MIDI files and convert them into sequences.
    """
    DATA = []
    for file in glob.iglob(source_dir + '**/*.mid*', recursive=True):
        try:
            idx_list, event_list, _ = midi_parser(file)
            DATA.append(idx_list)
        except Exception as e:
            print(f"Error processing file {file}: {e}")

    # Sample the data
    DATA = sample_data(DATA, length)
    return DATA


In [8]:
# Tokenizer Code
def midi_parser(fname=None):
    """
    Translates a single-track MIDI file into a tokenized event vocabulary.
    """
    if fname is not None:
        mid = mido.MidiFile(fname)

    index_list = []
    event_list = []
    delta_time = 0
    for track in mid.tracks:
        for msg in track:
            delta_time += msg.time
            if msg.type == "note_on":
                idx = msg.note + 1
                index_list.append(idx)
            elif msg.type == "note_off":
                idx = msg.note + 1 + 128
                index_list.append(idx)

    return torch.LongTensor(index_list), event_list, delta_time


In [9]:
import tensorflow as tf
from tensorflow.keras import layers

# LSTM Model definition
def create_lstm_model(input_shape, output_dim):
    model = tf.keras.Sequential([
        layers.LSTM(128, input_shape=input_shape, return_sequences=True),
        layers.LSTM(128),
        layers.Dense(128, activation='relu'),
        layers.Dense(output_dim, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Example input shape
input_shape = (25, 3)
output_dim = 128  # for pitch prediction
lstm_model = create_lstm_model(input_shape, output_dim)
lstm_model.summary()


  super().__init__(**kwargs)


In [10]:
import torch
from torch import nn

# Transformer Model definition
class MusicTransformer(nn.Module):
    def __init__(self, d_model=128, num_layers=3, num_heads=8, d_ff=512, vocab_size=416):
        super(MusicTransformer, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.decoder = nn.TransformerDecoder(
            nn.TransformerDecoderLayer(d_model=d_model, nhead=num_heads, dim_feedforward=d_ff),
            num_layers=num_layers
        )
        self.final_layer = nn.Linear(d_model, vocab_size)

    def forward(self, x, mask=None):
        x = self.embedding(x)
        x = self.decoder(x, x, tgt_mask=mask)
        return self.final_layer(x)

# Create transformer model
transformer_model = MusicTransformer()
print(transformer_model)


MusicTransformer(
  (embedding): Embedding(416, 128)
  (decoder): TransformerDecoder(
    (layers): ModuleList(
      (0-2): 3 x TransformerDecoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (multihead_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=512, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=512, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm3): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
        (dropout3): Dropout(p=0.1

In [20]:
source_dir = '/content/drive/MyDrive/maestro-v3.0.0'
seq_length = 25  # Define sequence length

# Use the preprocessing function to get the processed data
preprocessed_data = preprocess_midi_files(source_dir, seq_length)

# Create X_train and y_train
X_train = []
y_train = []

for sequence in preprocessed_data:
    for i in range(len(sequence) - seq_length):
        X_train.append(sequence[i:i+seq_length])
        y_train.append(sequence[i+seq_length])

# Convert to numpy arrays
X_train = np.array(X_train)
y_train = np.array(y_train)


In [21]:
# Train the LSTM Model
# Parameters need to be modified
X_train = np.random.rand(1000, 25, 3)  # Dummy data
y_train = np.random.randint(0, 128, (1000, 128))

history = lstm_model.fit(X_train, y_train, epochs=20, batch_size=4)


Epoch 1/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.0166 - loss: 15628184.0000
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.0033 - loss: 15737880.0000
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.0078 - loss: 17325768.0000
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.0019 - loss: 19535542.0000
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.0033 - loss: 22073636.0000
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.0114 - loss: 24345094.0000
Epoch 7/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.0088 - loss: 25879864.0000
Epoch 8/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.0056 

In [23]:
import torch
from torch.utils.data import Dataset

class MidiDataset(Dataset):
    def __init__(self, sequences, seq_length):
        # Filter sequences that are shorter than the seq_length
        self.sequences = [seq for seq in sequences if len(seq) > seq_length]
        self.seq_length = seq_length

    def __len__(self):
        # Calculate the total number of valid slices (those with sufficient length)
        return sum(len(seq) - self.seq_length for seq in self.sequences)

    def __getitem__(self, idx):
        # Find which sequence the index corresponds to
        seq_idx, token_idx = self._find_sequence_and_token_idx(idx)

        # Get input and target sequences from that specific sequence
        input_seq = self.sequences[seq_idx][token_idx:token_idx+self.seq_length]
        target_seq = self.sequences[seq_idx][token_idx+1:token_idx+self.seq_length+1]

        # Convert to tensors
        return torch.LongTensor(input_seq), torch.LongTensor(target_seq)

    def _find_sequence_and_token_idx(self, idx):
        """
        Finds the sequence index and the starting token index based on the global index.
        """
        cumulative_length = 0
        for i, seq in enumerate(self.sequences):
            length = len(seq) - self.seq_length
            if idx < cumulative_length + length:
                return i, idx - cumulative_length
            cumulative_length += length
        raise IndexError(f"Index {idx} out of range")


# Example usage with preprocessed_data
seq_length = 25
dataset = MidiDataset(preprocessed_data, seq_length)
train_loader = DataLoader(dataset, batch_size=4, shuffle=True)


In [None]:
# Train the Transformer Model
# Change the values
optimizer = torch.optim.Adam(transformer_model.parameters(), lr=1e-3)

# Training loop for transformer
for epoch in range(20):
    transformer_model.train()
    for batch in train_loader:
        inputs, targets = batch
        optimizer.zero_grad()
        output = transformer_model(inputs)
        loss = nn.CrossEntropyLoss()(output.view(-1, output.size(-1)), targets.view(-1))
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch}: Loss = {loss.item()}")


Epoch 0: Loss = 3.5879385471343994
Epoch 1: Loss = 3.3744726181030273
Epoch 2: Loss = 3.294386386871338
Epoch 3: Loss = 3.37375545501709
Epoch 4: Loss = 3.7287027835845947
Epoch 5: Loss = 3.3724241256713867
Epoch 6: Loss = 3.7472317218780518
Epoch 7: Loss = 3.6279358863830566
Epoch 8: Loss = 3.727896213531494
Epoch 9: Loss = 3.570587158203125
Epoch 10: Loss = 3.4100406169891357
Epoch 11: Loss = 3.0404720306396484
Epoch 12: Loss = 3.6588287353515625
Epoch 13: Loss = 3.4368104934692383
Epoch 14: Loss = 3.530839204788208
Epoch 15: Loss = 3.506779193878174


In [None]:
def generate_music_transformer(model, start_sequence, num_steps=100):
    generated = start_sequence
    for _ in range(num_steps):
        output = model(generated)
        next_step = torch.argmax(output, dim=-1)[-1]  # Get the last prediction
        generated = torch.cat((generated, next_step.unsqueeze(0)), dim=0)
    return generated


In [None]:
def generate_music_lstm(model, start_sequence, num_steps=100):
    generated = start_sequence
    for _ in range(num_steps):
        next_step = model.predict(generated[-25:])  # Predict using last 25 steps
        generated = np.append(generated, next_step, axis=0)
    return generated


In [None]:
# Compare loss curves
import matplotlib.pyplot as plt


plt.plot(history.history['loss'], label='LSTM Loss')

plt.title('Model Loss Comparison')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()

# Compare generated music (just print some tokens or analyze)
lstm_output = generate_music_lstm(lstm_model, np.random.rand(1, 25, 3))
transformer_output = generate_music_transformer(transformer_model, torch.LongTensor([1]))

print(f"LSTM Output: {lstm_output[:10]}")
print(f"Transformer Output: {transformer_output[:10]}")
