In [3]:
import pickle
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
import torch.nn as nn
import torch.optim as optim

device = "cuda"

# Load the training data
with open('train.pkl', 'rb') as f:
    train_data = pickle.load(f)

# Load the test data
with open('test_no_target.pkl', 'rb') as f:
    test_data = pickle.load(f)

# MusicDataset to handle sequences and labels
class MusicDataset(Dataset):
    def __init__(self, data, labels=None):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if self.labels is not None:
            return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.long)
        return torch.tensor(self.data[idx], dtype=torch.float32)

# Function to pad sequences and return batch data
def collate_fn(batch):
    sequences, labels = zip(*batch)
    lengths = torch.tensor([len(seq) for seq in sequences])
    padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=-1)  # Padding value as -1
    return padded_sequences, torch.stack(labels), lengths

# Prepare the training and test datasets and loaders
train_sequences, train_labels = zip(*train_data)
train_dataset = MusicDataset(train_sequences, train_labels)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

test_dataset = MusicDataset(test_data)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=lambda x: pad_sequence(x, batch_first=True, padding_value=-1))

# Define the ComposerClassifier model
class ComposerClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, dropout):
        super(ComposerClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, lengths):
        packed_embedded = pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        output, output_lengths = pad_packed_sequence(packed_output, batch_first=True)
        hidden = self.dropout(hidden[-1, :, :])
        return self.fc(hidden)

# Hyperparameters
input_dim = 1  # Since we are directly using note indices
hidden_dim = 20
output_dim = 5  # Liczba kompozytor贸w
n_layers = 3
dropout = 0.3

model = ComposerClassifier(input_dim, hidden_dim, output_dim, n_layers, dropout)
model = model.to(device)
# Training setup
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
n_epochs = 10

print("xpp")
model.train()
for epoch in range(n_epochs):
    epoch_loss = 0
    for sequences, labels, lengths in train_loader:

        sequences, labels, lengths = sequences.to(device), labels.to(device), lengths.to(device)

        optimizer.zero_grad()
        sequences = sequences.unsqueeze(-1)  # Add an extra dimension for input_dim
        predictions = model(sequences, lengths)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {epoch_loss/len(train_loader):.4f}')

# Generating predictions
model.eval()
predictions = []

with torch.no_grad():
    for sequences in test_loader:
        lengths = torch.tensor([len(seq) for seq in sequences])
        sequences = sequences.unsqueeze(-1)  # Add an extra dimension for input_dim
        output = model(sequences, lengths)
        preds = output.argmax(dim=1)
        predictions.extend(preds.cpu().numpy())

# Save predictions to CSV
predictions_df = pd.DataFrame(predictions)
predictions_df.to_csv('/mnt/data/poniedzialek_nazwisko1_nazwisko2.csv', index=False, header=False)

# Archiving results
import zipfile

with zipfile.ZipFile('/mnt/data/poniedzialek_nazwisko1_nazwisko2.zip', 'w') as zipf:
    zipf.write('/mnt/data/poniedzialek_nazwisko1_nazwisko2.csv', arcname='poniedzialek_nazwisko1_nazwisko2.csv')
    zipf.write('script.py', arcname='script.py')


xpp
Epoch 1, Loss: 1.3757
Epoch 2, Loss: 1.2324
Epoch 3, Loss: 1.1868
Epoch 4, Loss: 1.1636


KeyboardInterrupt: 

In [1]:
import pickle
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
import torch.nn as nn
import torch.optim as optim
import zipfile

device = "cuda"

# Load the training data
with open('train.pkl', 'rb') as f:
    train_data = pickle.load(f)

# Load the test data
with open('test_no_target.pkl', 'rb') as f:
    test_data = pickle.load(f)

# MusicDataset to handle sequences and labels
class MusicDataset(Dataset):
    def __init__(self, data, labels=None):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if self.labels is not None:
            return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.long)
        return torch.tensor(self.data[idx], dtype=torch.float32)

# Function to pad sequences and return batch data
def collate_fn(batch):
    sequences, labels = zip(*batch)
    lengths = torch.tensor([len(seq) for seq in sequences])
    padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=-10)  # Padding value as -1
    return padded_sequences, torch.stack(labels), lengths

# Prepare the training and validation datasets
train_sequences, train_labels = zip(*train_data)
full_dataset = MusicDataset(train_sequences, train_labels)

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Prepare the test dataset and loader
test_dataset = MusicDataset(test_data)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=lambda x: pad_sequence(x, batch_first=True, padding_value=-10))

# Define the ComposerClassifier model
class ComposerClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, dropout):
        super(ComposerClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, lengths):
        packed_embedded = pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        output, output_lengths = pad_packed_sequence(packed_output, batch_first=True)
        hidden = self.dropout(hidden[-1, :, :])
        return self.fc(hidden)

# Hyperparameters
input_dim = 1  # Since we are directly using note indices
hidden_dim = 256
output_dim = 5  # Liczba kompozytor贸w
n_layers = 2
dropout = 0.3

model = ComposerClassifier(input_dim, hidden_dim, output_dim, n_layers, dropout).to(device)

# Training setup
optimizer = optim.Adam(model.parameters(), lr=1e-3)
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.97)
criterion = nn.CrossEntropyLoss()

# Training loop
n_epochs = 10

def calculate_accuracy(predictions, labels):
    _, preds = torch.max(predictions, 1)
    correct = (preds == labels).float()
    acc = correct.sum() / len(correct)
    return acc

model.train()
for epoch in range(n_epochs):
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    for sequences, labels, lengths in train_loader:

        sequences, labels, lengths = sequences.to(device), labels.to(device), lengths.to(device)

        optimizer.zero_grad()
        sequences = sequences.unsqueeze(-1)  # Add an extra dimension for input_dim
        predictions = model(sequences, lengths)
        loss = criterion(predictions, labels)
        acc = calculate_accuracy(predictions, labels)
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    epoch_loss /= len(train_loader)
    epoch_acc /= len(train_loader)

    val_loss = 0
    val_acc = 0
    model.eval()
    with torch.no_grad():
        for sequences, labels, lengths in val_loader:

            sequences, labels, lengths = sequences.to(device), labels.to(device), lengths.to(device)

            sequences = sequences.unsqueeze(-1)
            predictions = model(sequences, lengths)
            loss = criterion(predictions, labels)
            acc = calculate_accuracy(predictions, labels)
            val_loss += loss.item()
            val_acc += acc.item()
    val_loss /= len(val_loader)
    val_acc /= len(val_loader)

    print(f'Epoch {epoch+1}, Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

# Generating predictions
model.eval()



Epoch 1, Train Loss: 1.2488, Train Acc: 0.5525, Val Loss: 1.2775, Val Acc: 0.5181
Epoch 2, Train Loss: 1.1964, Train Acc: 0.5613, Val Loss: 1.2612, Val Acc: 0.5197
Epoch 3, Train Loss: 1.1830, Train Acc: 0.5619, Val Loss: 1.2591, Val Acc: 0.5230
Epoch 4, Train Loss: 1.1852, Train Acc: 0.5631, Val Loss: 1.2589, Val Acc: 0.5230


KeyboardInterrupt: 

In [3]:
import pickle
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
import torch.nn as nn
import torch.optim as optim
import zipfile

device="cuda"

# Load the training data
with open('train.pkl', 'rb') as f:
    train_data = pickle.load(f)

# Load the test data
with open('test_no_target.pkl', 'rb') as f:
    test_data = pickle.load(f)

# MusicDataset to handle sequences and labels
class MusicDataset(Dataset):
    def __init__(self, data, labels=None):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if self.labels is not None:
            return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.long)
        return torch.tensor(self.data[idx], dtype=torch.float32)

# Function to pad sequences and return batch data
def collate_fn(batch):
    sequences, labels = zip(*batch)
    lengths = torch.tensor([len(seq) for seq in sequences])
    padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=-1)  # Padding value as -1
    return padded_sequences, torch.stack(labels), lengths

# Prepare the training and validation datasets
train_sequences, train_labels = zip(*train_data)
full_dataset = MusicDataset(train_sequences, train_labels)

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Prepare the test dataset and loader
test_dataset = MusicDataset(test_data)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=lambda x: pad_sequence(x, batch_first=True, padding_value=-1))

# Define the ComposerClassifier model
class ComposerClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, dropout):
        super(ComposerClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, lengths):
        packed_embedded = pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        output, output_lengths = pad_packed_sequence(packed_output, batch_first=True)
        hidden = self.dropout(hidden[-1, :, :])
        return self.fc(hidden)

# Hyperparameters
input_dim = 1  # Since we are directly using note indices
hidden_dim = 256
output_dim = 5  # Liczba kompozytor贸w
n_layers = 2
dropout = 0.3

model = ComposerClassifier(input_dim, hidden_dim, output_dim, n_layers, dropout).to(device)

# Training setup
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
n_epochs = 10

def calculate_accuracy(predictions, labels):
    _, preds = torch.max(predictions, 1)
    correct = (preds == labels).float()
    acc = correct.sum() / len(correct)
    return acc

model.train()
for epoch in range(n_epochs):
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    for sequences, labels, lengths in train_loader:
        sequences, labels, lengths = sequences.to(device), labels.to(device), lengths.to(device)
        optimizer.zero_grad()
        sequences = sequences.unsqueeze(-1)  # Add an extra dimension for input_dim
        predictions = model(sequences, lengths)
        loss = criterion(predictions, labels)
        acc = calculate_accuracy(predictions, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    epoch_loss /= len(train_loader)
    epoch_acc /= len(train_loader)

    val_loss = 0
    val_acc = 0
    model.eval()
    with torch.no_grad():
        for sequences, labels, lengths in val_loader:
            sequences, labels, lengths = sequences.to(device), labels.to(device), lengths.to(device)
            sequences = sequences.unsqueeze(-1)
            predictions = model(sequences, lengths)
            loss = criterion(predictions, labels)
            acc = calculate_accuracy(predictions, labels)
            val_loss += loss.item()
            val_acc += acc.item()
    val_loss /= len(val_loader)
    val_acc /= len(val_loader)

    print(f'Epoch {epoch+1}, Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

# Generating predictions
model.eval()
predictions = []




Epoch 1, Train Loss: 1.2642, Train Acc: 0.5432, Val Loss: 1.2366, Val Acc: 0.5669
Epoch 2, Train Loss: 1.1277, Train Acc: 0.5749, Val Loss: 1.1025, Val Acc: 0.5855
Epoch 3, Train Loss: 1.0142, Train Acc: 0.6273, Val Loss: 1.0922, Val Acc: 0.6321
Epoch 4, Train Loss: 1.0208, Train Acc: 0.6063, Val Loss: 1.0235, Val Acc: 0.6310
Epoch 5, Train Loss: 0.9580, Train Acc: 0.6254, Val Loss: 0.9752, Val Acc: 0.6316
Epoch 6, Train Loss: 0.9411, Train Acc: 0.6423, Val Loss: 1.2870, Val Acc: 0.4052
Epoch 7, Train Loss: 0.9004, Train Acc: 0.6550, Val Loss: 0.8947, Val Acc: 0.6738
Epoch 8, Train Loss: 0.8870, Train Acc: 0.6675, Val Loss: 0.9094, Val Acc: 0.6859
Epoch 9, Train Loss: 0.8557, Train Acc: 0.6820, Val Loss: 0.8388, Val Acc: 0.7083
Epoch 10, Train Loss: 0.8595, Train Acc: 0.6826, Val Loss: 0.8989, Val Acc: 0.6968


In [4]:
import pickle
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
import torch.nn as nn
import torch.optim as optim
import zipfile

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the training data
with open('train.pkl', 'rb') as f:
    train_data = pickle.load(f)

# Load the test data
with open('test_no_target.pkl', 'rb') as f:
    test_data = pickle.load(f)

# MusicDataset to handle sequences and labels
class MusicDataset(Dataset):
    def __init__(self, data, labels=None):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if self.labels is not None:
            return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.long)
        return torch.tensor(self.data[idx], dtype=torch.float32)

# Function to pad sequences and return batch data
def collate_fn(batch):
    sequences, labels = zip(*batch)
    lengths = torch.tensor([len(seq) for seq in sequences])
    padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=-2)  # Padding value as -1
    return padded_sequences, torch.stack(labels), lengths

# Prepare the training and validation datasets
train_sequences, train_labels = zip(*train_data)
full_dataset = MusicDataset(train_sequences, train_labels)

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Prepare the test dataset and loader
test_dataset = MusicDataset(test_data)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=lambda x: pad_sequence(x, batch_first=True, padding_value=-2))

# Define the ComposerClassifier model
class ComposerClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, dropout):
        super(ComposerClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True, dropout=dropout, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # * 2 because bidirectional
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, lengths):
        packed_embedded = pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        output, output_lengths = pad_packed_sequence(packed_output, batch_first=True)
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))  # Concatenate the hidden states from both directions
        return self.fc(hidden)

# Hyperparameters
input_dim = 1  # Tylko nuty
hidden_dim = 256
output_dim = 5  # Liczba kompozytor贸w
n_layers = 2
dropout = 0.3

model = ComposerClassifier(input_dim, hidden_dim, output_dim, n_layers, dropout).to(device)

# Training setup
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
n_epochs = 10

def calculate_accuracy(predictions, labels):
    _, preds = torch.max(predictions, 1)
    correct = (preds == labels).float()
    acc = correct.sum() / len(correct)
    return acc

model.train()
for epoch in range(n_epochs):
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    for sequences, labels, lengths in train_loader:
        sequences, labels, lengths = sequences.to(device), labels.to(device), lengths.to(device)
        optimizer.zero_grad()
        sequences = sequences.unsqueeze(-1)  # Add an extra dimension for input_dim
        predictions = model(sequences, lengths)
        loss = criterion(predictions, labels)
        acc = calculate_accuracy(predictions, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    epoch_loss /= len(train_loader)
    epoch_acc /= len(train_loader)

    val_loss = 0
    val_acc = 0
    model.eval()
    with torch.no_grad():
        for sequences, labels, lengths in val_loader:
            sequences, labels, lengths = sequences.to(device), labels.to(device), lengths.to(device)
            sequences = sequences.unsqueeze(-1)
            predictions = model(sequences, lengths)
            loss = criterion(predictions, labels)
            acc = calculate_accuracy(predictions, labels)
            val_loss += loss.item()
            val_acc += acc.item()
    val_loss /= len(val_loader)
    val_acc /= len(val_loader)

    print(f'Epoch {epoch+1}, Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

# Generating predictions
model.eval()
predictions = []


Epoch 1, Train Loss: 1.1984, Train Acc: 0.5588, Val Loss: 1.0571, Val Acc: 0.5811
Epoch 2, Train Loss: 1.0328, Train Acc: 0.6180, Val Loss: 0.9942, Val Acc: 0.6020
Epoch 3, Train Loss: 0.9562, Train Acc: 0.6376, Val Loss: 0.9265, Val Acc: 0.6376
Epoch 4, Train Loss: 0.9119, Train Acc: 0.6736, Val Loss: 0.9128, Val Acc: 0.6562
Epoch 5, Train Loss: 0.8716, Train Acc: 0.6724, Val Loss: 0.9839, Val Acc: 0.5877
Epoch 6, Train Loss: 0.8195, Train Acc: 0.6907, Val Loss: 0.8467, Val Acc: 0.6732
Epoch 7, Train Loss: 0.7551, Train Acc: 0.7214, Val Loss: 0.8661, Val Acc: 0.6705
Epoch 8, Train Loss: 0.7364, Train Acc: 0.7229, Val Loss: 0.7723, Val Acc: 0.6941
Epoch 9, Train Loss: 0.6747, Train Acc: 0.7561, Val Loss: 0.7553, Val Acc: 0.7094
Epoch 10, Train Loss: 0.6650, Train Acc: 0.7545, Val Loss: 0.7001, Val Acc: 0.7390
