In [1]:
import argparse
import os
import numpy as np
import math
import sys

import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.autograd import Variable

import torch.nn as nn
import torch.nn.functional as F
import torch

n_epochs=200
batch_size=64
lr=0.00005
n_cpu=8
latent_dim=100
sample_size=882
n_critic=5
clip_value=0.01
sample_interval=400
data_folder, _ = os.path.split(os.getcwd())
data_folder = os.path.join(data_folder, "hw2")
data_folder = os.path.join(data_folder, "data")

cuda = True if torch.cuda.is_available() else False

print(data_folder)

C:\work\courses\BelHard_DS2\hw2\data


In [2]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(sample_size, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
        )

    def forward(self, img):
        validity = self.model(img)
        return validity

In [3]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()

        def block(in_feat, out_feat, normalize=True):
            layers = [nn.Linear(in_feat, out_feat)]
            if normalize:
                layers.append(nn.BatchNorm1d(out_feat, 0.8))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return layers

        self.model = nn.Sequential(
            *block(latent_dim, 128, normalize=False),
            *block(128, 256),
            *block(256, 512),
            *block(512, 1024),
            nn.Linear(1024, sample_size),
            nn.Tanh()
        )

    def forward(self, z):
        img = self.model(z)
        return img

In [4]:
# Configure data loader
import numpy as np
import librosa
import os

#Скачивание музыки из файлов из папки с данными и создание по ним датасета
def get_datasets(data_folder, sample_size):
    dataset = []
    for j, file in enumerate(os.listdir(data_folder)):
        if not file.endswith(".mp3"):
            continue
        filepath = os.path.join(data_folder, file)
        print(filepath)
        y, _ = librosa.load(filepath)
        for i in range(len(y)):
            if y[i] > 1:
                y[i] = 1.0
            elif y[i] < -1:
                y[i] = 0.0
            else:
                y[i] = (y[i] + 1) / 2
        for i in range(int(len(y) / sample_size) - 1):
            start = i * sample_size
            end = (i + 1) * sample_size
            dataset.append(y[start:end])
    return dataset

In [5]:
import soundfile as sf
def save_sound(batches , filename):
    sound = []
    for batch in batches:
        sound += batch.tolist()
    output_path = os.path.join(os.getcwd(), filename)
    sf.write(output_path, sound, 22050, 'PCM_24')

In [6]:
dataloader = DataLoader(get_datasets(data_folder, sample_size), batch_size=batch_size, shuffle=True)

# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()

if cuda:
    generator.cuda()
    discriminator.cuda()


C:\work\courses\BelHard_DS2\hw2\data\David Garrett - As It Was (David Garrett Edition).mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Beauty And The Beast.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Bella Ciao.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Bitter Sweet Symphony.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Blinding Lights (David Garrett Edition).mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Caprice No.24.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Circle Of Life.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Come Together.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Confutatis.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Despacito (David Garrett Edition).mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Enter Sandman.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Game Rhapsody.mp3
C:\work\courses\BelHard_DS2\hw2\data\David Garrett - Happy.mp3
C:\work\c

In [None]:
# Optimizers
optimizer_G = torch.optim.RMSprop(generator.parameters(), lr=lr)
optimizer_D = torch.optim.RMSprop(discriminator.parameters(), lr=lr)

Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

for epoch in range(n_epochs):
    batches_done = 0
    for i, data in enumerate(dataloader):
        real_data = Variable(data.type(Tensor))

        optimizer_D.zero_grad()

        # Sample noise as generator input
        z = Variable(Tensor(np.random.normal(0, 1, (data.shape[0], latent_dim))))

        # Generate a batch of images
        fake_data = generator(z).detach()
        # Adversarial loss
        loss_D = -torch.mean(discriminator(real_data)) + torch.mean(discriminator(fake_data))

        loss_D.backward()
        optimizer_D.step()

        # Clip weights of discriminator
        for p in discriminator.parameters():
            p.data.clamp_(-clip_value, clip_value)

        # Train the generator every n_critic iterations
        if i % n_critic == 0:
            optimizer_G.zero_grad()

            # Generate a batch of images
            gen_data = generator(z)
            # Adversarial loss
            loss_G = -torch.mean(discriminator(gen_data))

            loss_G.backward()
            optimizer_G.step()

            print(
                "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
                % (epoch, n_epochs, batches_done % len(dataloader), len(dataloader), loss_D.item(), loss_G.item())
            )
            if batches_done % sample_interval == 0:
                save_sound(gen_data.data, f"sounds-{epoch}-{batches_done}.wav")
            batches_done += 1

[Epoch 0/200] [Batch 0/2727] [D loss: -0.007638] [G loss: -3.561971]
[Epoch 0/200] [Batch 1/2727] [D loss: -0.508096] [G loss: 6.127840]
[Epoch 0/200] [Batch 2/2727] [D loss: 0.217928] [G loss: 5.871147]
[Epoch 0/200] [Batch 3/2727] [D loss: -0.019874] [G loss: 4.461695]
[Epoch 0/200] [Batch 4/2727] [D loss: 0.076234] [G loss: 0.076054]
[Epoch 0/200] [Batch 5/2727] [D loss: -0.160286] [G loss: -3.073197]
[Epoch 0/200] [Batch 6/2727] [D loss: -0.283778] [G loss: -4.111230]
[Epoch 0/200] [Batch 7/2727] [D loss: 0.048432] [G loss: -4.673753]
[Epoch 0/200] [Batch 8/2727] [D loss: 0.059649] [G loss: -3.926721]
[Epoch 0/200] [Batch 9/2727] [D loss: 0.173024] [G loss: -2.221670]
[Epoch 0/200] [Batch 10/2727] [D loss: -0.032034] [G loss: 0.944559]
[Epoch 0/200] [Batch 11/2727] [D loss: -0.166278] [G loss: 3.194025]
[Epoch 0/200] [Batch 12/2727] [D loss: -0.272995] [G loss: 4.869434]
[Epoch 0/200] [Batch 13/2727] [D loss: 0.115438] [G loss: 4.425972]
[Epoch 0/200] [Batch 14/2727] [D loss: 0.185