In [1]:
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
import torchvision.transforms as transforms
from torch import optim
import torch
from matplotlib import pyplot as plt

In [2]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np

In [3]:
if torch.cuda.is_available():
    device=torch.device(type='cuda',index=0)
else:
    device=torch.device(type='cpu',index=0)

In [4]:
file_path = '/kaggle/input/piano-musics-abc-notation/piano-musics-abc-notation.txt'
with open(file_path, 'r') as file:
    abc_data = file.read()

In [5]:
tokens = sorted(set(abc_data))
num_tokens = len(tokens)
token_to_index = {token: i for i, token in enumerate(tokens)}
index_to_token = {i: token for token, i in token_to_index.items()}

In [6]:
# token_to_index

In [7]:
encoder = OneHotEncoder(categories=[range(num_tokens)], sparse_output=False)

In [8]:
# abc_data = abc_data[0:1000]

In [9]:
SEQ_LENGTH = 100
STEP = SEQ_LENGTH
sequences = []

for i in range(0, len(abc_data) - SEQ_LENGTH + 1, STEP):
    seq = [token_to_index[token] for token in abc_data[i:i + SEQ_LENGTH]]
    seq = encoder.fit_transform(np.array([seq]).reshape(-1, 1))
    seq.reshape(SEQ_LENGTH, -1)
    sequences.append(seq.tolist())

In [10]:
# sequences[0]

In [11]:
class MusicDataset(Dataset):
    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        # Convert sequence to tensor if needed
        sequence_tensor = torch.tensor(sequence, dtype=torch.float32)
        return sequence_tensor

In [12]:
# np.array(sequences).shape

In [13]:
train_data = MusicDataset(sequences)

In [14]:
batch_size=64

In [15]:
train_dataloader=DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)

In [16]:
for i,(music) in enumerate(train_dataloader):
    print(i, music.shape) 
    
    if i > 5:
        break

0 torch.Size([64, 100, 86])
1 torch.Size([64, 100, 86])
2 torch.Size([64, 100, 86])
3 torch.Size([64, 100, 86])
4 torch.Size([64, 100, 86])
5 torch.Size([64, 100, 86])
6 torch.Size([64, 100, 86])


In [17]:
# noise_vectors=torch.randn((32,100)).to(device)
# noise_vectors.shape

In [18]:
# sample_size = 50

In [19]:
class Generator(nn.Module):
    def __init__(self, latent_dim, feature_size, hidden_size, num_layers):
        super().__init__()
        
        self.latent_dim = latent_dim
        self.feature_size = feature_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.leakyrelu = nn.LeakyReLU(negative_slope=0)
        self.bn1 = nn.BatchNorm1d(self.feature_size * 100 * 2)
        self.bn2 = nn.BatchNorm1d(self.feature_size * 100)
        self.dropout = nn.Dropout(p=0.1)
        
        self.lin1 = nn.Linear(in_features=self.latent_dim, out_features=self.feature_size * 100 * 2)
        self.lin2 = nn.Linear(in_features=self.feature_size * 100 * 2, out_features=self.feature_size * 100)
        
        # Define LSTM layer
        self.lstm = nn.LSTM(input_size=self.feature_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True)
        self.softmax = nn.Softmax(dim=-1)
        self.argmax = lambda x: torch.argmax(x, dim=-1)
        
    def forward(self, x):
        # Apply linear layer
        x = self.lin1(x)
        x = self.bn1(x)
        x = self.leakyrelu(x)
        x = self.dropout(x)
    
        x = self.lin2(x)
        x = self.bn2(x)
        x = self.leakyrelu(x)
        x = self.dropout(x)
        
        x = x.view(-1, 100, self.feature_size)  # Reshape to (batch_size, sequence_length, feature_size)
        # Pass through LSTM layer
        output, hidden = self.lstm(x)
        softmax_x = self.softmax(output)
        argmax_x = self.argmax(softmax_x)
        
        return softmax_x, argmax_x

# # Example usage
# latent_dim = 100  # Example latent dimension
# feature_size = 86  # Example feature size
# hidden_size = 86  # Set hidden size to match feature size
# num_layers = 2  # Example number of LSTM layers

# # Initialize Generator model
# generator = Generator(latent_dim, feature_size, hidden_size, num_layers)

# # Example input
# x = torch.randn(2, latent_dim)  # Example batch size of 32

# # Forward pass
# softmax_output, argmax_output = generator(x)
# print(argmax_output)# Output shape will be (32, 100, 86)

# print(softmax_output.shape)

In [20]:
class Discriminator(nn.Module):
    def __init__(self, feature_size, hidden_size, num_layers):
        super().__init__()
        
        self.feature_size = feature_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_size=self.feature_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True)
        self.leakyrelu = nn.LeakyReLU(negative_slope=0)
        self.bn1 = nn.BatchNorm1d(self.feature_size * 100 * 4)
        self.bn2 = nn.BatchNorm1d(64)
        self.bn3 = nn.BatchNorm1d(2)
        self.dropout = nn.Dropout(p=0.1)
        
        self.lin1 = nn.Linear(in_features=self.feature_size * 100, out_features=self.feature_size * 100 * 4)
        self.lin2 = nn.Linear(in_features=self.feature_size * 100 * 4, out_features=64)
        self.lin3 = nn.Linear(in_features=64, out_features=2)
        
        self.softmax = nn.Softmax(dim=-1)
        self.argmax = lambda x: torch.argmax(x, dim=-1)
        
    def forward(self,x):
        
        output, hidden = self.lstm(x)
        
        x = output.contiguous().view(-1, 100 * self.feature_size) 
        
        x = self.lin1(x)
        x = self.bn1(x)
        x = self.leakyrelu(x)
        x = self.dropout(x)

        
        x = self.lin2(x)
        x = self.bn2(x)
        x = self.leakyrelu(x)
        x = self.dropout(x)
        
        x = self.lin3(x)
        x = self.bn3(x)
        x = self.softmax(x)
#         x = self.argmax(x)
        
        return x

    
# # Example usage
# feature_size = 86  # Example feature size
# hidden_size = 86  # Set hidden size to match feature size
# num_layers = 2  # Example number of LSTM layers
    
# discriminator = Discriminator(feature_size, hidden_size, num_layers)


# # Forward pass
# cls_output = discriminator(softmax_output)
# print(cls_output)# Output shape will be (32, 100, 86)

In [21]:
# Example usage
latent_dim = 100  # Example latent dimension
feature_size = 86  # Example feature size
hidden_size = 86  # Set hidden size to match feature size
num_layers = 2  # Example number of LSTM layers


# Initialize Generator model
generator = Generator(latent_dim, feature_size, hidden_size, num_layers).to(device)
discriminator = Discriminator(feature_size, hidden_size, num_layers).to(device)

In [22]:
loss_fn=nn.CrossEntropyLoss().to(device)
n_epochs=1 #actually, you should run for more epochs, may be 200
lr=0.01

gopt=optim.Adam(params=generator.parameters(),lr=lr)
dopt=optim.Adam(params=discriminator.parameters(),lr=lr)

In [23]:
def train_one_epoch():
    
    for i,(music) in enumerate(train_dataloader):
    
        noise_vectors = torch.randn(music.shape[0], latent_dim).to(device)
        
        zero_labels=torch.zeros((music.shape[0])).to(device).type(torch.int64)
        one_labels=torch.ones((music.shape[0])).to(device).type(torch.int64)
                
        fake_music, argmax_output = generator(noise_vectors)
        
        pred=discriminator(fake_music)
        
        gloss=loss_fn(pred,one_labels)
        
        gopt.zero_grad()
        gloss.backward()
        gopt.step()
        
        
        
        real_music=music.to(device)
        
        
        pred_on_real=discriminator(real_music)
        dloss_on_real=loss_fn(pred_on_real,one_labels)
        
        pred_on_fake=discriminator(fake_music.detach())
        dloss_on_fake=loss_fn(pred_on_fake,zero_labels)
        
        dloss=(dloss_on_real+dloss_on_fake)/2
        
        dopt.zero_grad()
        dloss.backward()
        dopt.step()
        
        if i%10 == 0:
            
            print("Batch No.:", i+1,"/",len(train_dataloader),":", "GLoss=",round(gloss.item(),4), "DLoss=",round(dloss.item(),4))

In [24]:
for e in range(n_epochs):
    print("Epoch",e+1,"/",n_epochs,":")
    train_one_epoch()

Epoch 1 / 1 :
Batch No.: 1 / 180 : GLoss= 0.7191 DLoss= 0.7223
Batch No.: 11 / 180 : GLoss= 0.7144 DLoss= 0.6939
Batch No.: 21 / 180 : GLoss= 0.71 DLoss= 0.6938
Batch No.: 31 / 180 : GLoss= 0.6896 DLoss= 0.6933
Batch No.: 41 / 180 : GLoss= 0.6874 DLoss= 0.6933
Batch No.: 51 / 180 : GLoss= 0.6951 DLoss= 0.6932
Batch No.: 61 / 180 : GLoss= 0.6952 DLoss= 0.6932
Batch No.: 71 / 180 : GLoss= 0.6921 DLoss= 0.6932
Batch No.: 81 / 180 : GLoss= 0.6929 DLoss= 0.6932
Batch No.: 91 / 180 : GLoss= 0.6938 DLoss= 0.6932
Batch No.: 101 / 180 : GLoss= 0.6931 DLoss= 0.6932
Batch No.: 111 / 180 : GLoss= 0.693 DLoss= 0.6932
Batch No.: 121 / 180 : GLoss= 0.6934 DLoss= 0.6932
Batch No.: 131 / 180 : GLoss= 0.6932 DLoss= 0.6932
Batch No.: 141 / 180 : GLoss= 0.6931 DLoss= 0.6932
Batch No.: 151 / 180 : GLoss= 0.6932 DLoss= 0.6932
Batch No.: 161 / 180 : GLoss= 0.6932 DLoss= 0.6932
Batch No.: 171 / 180 : GLoss= 0.6931 DLoss= 0.6932


In [25]:
noise_vectors = torch.randn(2, latent_dim).to(device)
fake_music, argmax_output = generator(noise_vectors)

In [26]:
# Convert tensor to a list of lists of tokens
music = []

for row in argmax_output:
    row_music = [index_to_token[i.item()] for i in row]
    music.append(row_music)

# Print the resulting music
for row_music in music:
    print(''.join(row_music), '\n\n')

pBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 


pBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 


