In [1]:
import numpy as np
import librosa
import os

#Скачивание музыки из файлов из папки с данными и создание по ним датасетов
def get_datasets(data_folder, sample_size, train_size):
    train_end = train_size * len(os.listdir(data_folder))
    train_dataset = []
    test_dataset = []
    for j, file in enumerate(os.listdir(data_folder)):
        if not file.endswith(".mp3"):
            continue
        filepath = os.path.join(data_folder, file)
        print(filepath)
        y, _ = librosa.load(filepath)
        for i in range(len(y)):
            if y[i] > 1:
                y[i] = 1.0
            elif y[i] < -1:
                y[i] = 0.0
            else:
                y[i] = (y[i] + 1) / 2
        for i in range(int(len(y) / sample_size) - 1):
            start = i * sample_size
            end = (i + 1) * sample_size
            if j < train_end:
                train_dataset.append(y[start:end])
            else:
                test_dataset.append(y[start:end])
    return train_dataset, test_dataset


In [2]:
data_folder = os.path.join(os.getcwd(),"data")
sample_size = 441 # размер одного кусочка, секунда - 22050
train_dataset, test_dataset = get_datasets(data_folder, sample_size, 0.8)

C:\work\courses\BelHard_DS2\hw2\data\David Garrett - As It Was (David Garrett Edition).mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Beauty And The Beast.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Bella Ciao.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Bitter Sweet Symphony.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Blinding Lights (David Garrett Edition).mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Caprice No.24.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Circle Of Life.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Come Together.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Confutatis.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Despacito (David Garrett Edition).mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Enter Sandman.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Game Rhapsody.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Happy.mp3
C:\work\c

In [3]:
print(len(train_dataset))
print(len(test_dataset))

288773
60294


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, Subset
import matplotlib.pyplot as plt
import numpy as np

# Определение устройства (GPU если доступно, иначе CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Используется устройство: {device}")

# Параметры модели
batch_size = 64
epochs = 10
learning_rate = 1e-4

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

class AE(torch.nn.Module):
    def __init__(self):
        super().__init__()
         
        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(sample_size, 256),
            torch.nn.ReLU(),
            torch.nn.Linear(256, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 64),
        )
         
        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(64, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 256),
            torch.nn.ReLU(),
            torch.nn.Linear(256, sample_size),
            torch.nn.Sigmoid()
        )
 
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

loss_function = torch.nn.MSELoss()

# Инициализация модели и оптимизатора
model = AE().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Функция обучения
def train(epoch):
    model.train()
    train_loss = 0
    for batch_idx, data in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        recon_batch = model(data)
        loss = loss_function(recon_batch, data)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

        if batch_idx % 100 == 0:
            print(f"Эпоха {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} "
                  f"({100. * batch_idx / len(train_loader):.0f}%)]\tПотеря: {loss.item() / len(data):.10f}")

    print(f"====> Эпоха {epoch} Средняя потеря: {train_loss / len(train_loader.dataset):.4f}")

# Функция тестирования
def test(epoch):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for data in test_loader:
            data = data.to(device)
            recon = model(data)
            test_loss += loss_function(recon, data).item()

    test_loss /= len(test_loader.dataset)
    print(f"====> Тестовая потеря: {test_loss:.4f}")

# Обучение модели
for epoch in range(1, epochs + 1):
    train(epoch)
    test(epoch)


Используется устройство: cpu
====> Эпоха 1 Средняя потеря: 0.0001
====> Тестовая потеря: 0.0001
====> Эпоха 2 Средняя потеря: 0.0001
====> Тестовая потеря: 0.0001
====> Эпоха 3 Средняя потеря: 0.0000
====> Тестовая потеря: 0.0001
====> Эпоха 4 Средняя потеря: 0.0000
====> Тестовая потеря: 0.0001
====> Эпоха 5 Средняя потеря: 0.0000
====> Тестовая потеря: 0.0000
====> Эпоха 6 Средняя потеря: 0.0000
====> Тестовая потеря: 0.0000
====> Эпоха 7 Средняя потеря: 0.0000
====> Тестовая потеря: 0.0000
====> Эпоха 8 Средняя потеря: 0.0000
====> Тестовая потеря: 0.0000
====> Эпоха 9 Средняя потеря: 0.0000
====> Тестовая потеря: 0.0000
====> Эпоха 10 Средняя потеря: 0.0000
====> Тестовая потеря: 0.0000


In [15]:
with torch.no_grad():
    data_folder = os.path.join(os.getcwd(),"data")
    file = os.listdir(data_folder)
    number = 6
    for f in os.listdir(data_folder):
        if f.endswith(".mp3"):
            number -= 1
            if number == 0:
                file = f
    print(file)
    filepath = os.path.join(data_folder, file)
    y, _ = librosa.load(filepath)
    for i in range(len(y)):
        if y[i] > 1:
            y[i] = 1.0
        elif y[i] < -1:
            y[i] = 0.0
        else:
            y[i] = (y[i] + 1) / 2
    from_sound = []
    for i in range(500):
        start = i * sample_size
        end = (i + 1) * sample_size
        from_sound.append(y[start:end])
    loader  = DataLoader(from_sound, batch_size=batch_size, shuffle=False)

    sound = []
    for data in loader:
        data = data.to(device)
        recon = model(data)
        for rec in recon:
            sound += rec.tolist()

David Garrett - Caprice No.24.mp3


In [16]:
import soundfile as sf
output_path = os.path.join(os.getcwd(), "out.wav")
sf.write(output_path, sound, 22050, 'PCM_24')