In [1]:
using WAV, Knet

In [2]:
music_sample_size = 479232
sample_frequency = 16000
batch_size = 12

function load_musician_dataset()
    # Some constants we need to define now
    music_sample_size = 479232 #How long our "songs" are

    # Directories holding each musician's subdirectory of samples
    use_musicians = ["satriani"]
    musician_dirs = filter(x -> x in use_musicians, readdir("/mnt/data/other/music_samples/processed/"))

    # Arrays which will hold the data we read from the wav files
    X_train = Array{Array{Float32,1},1}()
    Y_train = Array{UInt16,1}()
    X_test = Array{Array{Float32,1},1}()
    Y_test = Array{UInt16,1}()

    # Mapping for the musician labels
    musician_mapping = Dict{String,UInt16}()
    musician_label = 0

    for musician in musician_dirs

        # Add musician to the label mapping, with an incrementing index
        musician_label += 1
        musician_mapping[musician] = musician_label
       
        # Read the samples for each musician into X and associate the apropriate label into Y
        index = 0
        for sample in readdir("/mnt/data/other/music_samples/processed/$(musician)")
            values_raw, fq = wavread("/mnt/data/other/music_samples/processed/$(musician)/$(sample)")
            values_mon = values_raw[:,1]
            if  round(rand()*7 + 1) == 3
                if length(values_mon) >= music_sample_size
                    push!(X_test, values_mon[1:music_sample_size])
                    push!(Y_test, musician_label)
                end
            else
                if length(values_mon) >= music_sample_size
                    push!(X_train, values_mon[1:music_sample_size])
                    push!(Y_train, musician_label)
                end
            end
        end
    end
    
    X_test_matrix = Array{Float32,2}(hcat(X_test...))
    X_train_matrix = Array{Float32,2}(hcat(X_train...))
    
    return minibatch(X_train_matrix, Y_train, batch_size, shuffle=true), minibatch(X_test_matrix, Y_test, batch_size, shuffle=true)
end

load_musician_dataset (generic function with 1 method)

In [3]:
Dtrain, Dtest = load_musician_dataset()

(Knet.MB(Float32[0.0 0.1377 … -0.0993988 -0.104984; 0.0 0.104648 … -0.0798669 -0.115879; … ; 0.196844 -0.119938 … -0.169195 0.000183111; 0.121464 -0.120945 … -0.170507 -0.000122074], UInt16[0x0001 0x0001 … 0x0001 0x0001], 12, 83, false, [60, 82, 57, 29, 75, 34, 6, 41, 18, 47  …  26, 7, 64, 67, 20, 73, 36, 50, 31, 38], [479232, 83], [83], Array{Float32,2}, Array{UInt16,1}), Knet.MB(Float32[0.106998 -0.098178 … -0.100131 -0.0415967; 0.255898 -0.0116581 … -0.0743431 -0.131138; … ; -0.355144 -0.0723899 … -0.437269 -0.148717; -0.471267 0.0218818 … -0.102206 -0.149327], UInt16[0x0001 0x0001 … 0x0001 0x0001], 12, 12, false, [6, 5, 4, 9, 2, 7, 3, 8, 12, 10, 1, 11], [479232, 12], [12], Array{Float32,2}, Array{UInt16,1}))

In [4]:
function create_discriminator()
    samples = music_sample_size
    Weights = [
        xavier(Float32,Int64(samples*2/(3*512)),samples),
        xavier(Float32,Int64(samples/1024),Int64(samples*2/(3*512))), 
        xavier(Float32,Int64(samples/2048),Int64(samples/1024)), 
        xavier(Float32,2,Int64(samples/2048))
    ]
    Biases = [
        zeros(Float32,samples*2/(3*512),1),
        zeros(Float32,samples/1024,1),
        zeros(Float32,samples/2048,1),
        zeros(Float32,2,1)
    ]
    Model = Any[]
    for i = 1:length(Weights)
        push!(Model, Weights[i])
        push!(Model, Biases[i])
    end
    Model
end

function create_generator()
    granularity = 64
    oscilators = 80
    samples = music_sample_size
    Weights = [
        xavier(Float32,Int64(oscilators * granularity), Int64(oscilators * granularity)),
        xavier(Float32,Int64(oscilators * granularity / 2), Int64(oscilators * granularity)), 
        xavier(Float32,Int64(oscilators * granularity / 4), Int64(oscilators * granularity / 2)), 
        xavier(Float32,Int64(oscilators * granularity), Int64(oscilators * granularity / 4))
    ]
    Biases = [
        zeros(Float32,Int64(oscilators * granularity), 1),
        zeros(Float32,Int64(oscilators * granularity / 2), 1),
        zeros(Float32,Int64(oscilators * granularity / 4), 1),
        zeros(Float32,Int64(oscilators * granularity), 1)
    ]
    Model = Any[]
    for i = 1:length(Weights)
        push!(Model, Weights[i])
        push!(Model, Biases[i])
    end
    Model
end

create_generator (generic function with 1 method)

In [40]:
function discriminate(Model,X)
    for i=1:2:length(Model) - 2
        X = tanh.(Model[i]*X .+ Model[i+1])
    end
    labels = tanh.(Model[end - 1]*X .+ Model[end])
    return labels
end
#=
function generate(Model, batch_size)
    Z = Array(randn(Float32, music_sample_size, batch_size))
    for i=1:2:length(Model) - 2
        Z = tanh.(Model[i]*Z .+ Model[i+1])
    end
    labels = tanh.(Model[end - 1]*Z .+ Model[end])
    return labels
end
=#
granularity = 64
oscilators = 80
function generate(Model, bs)
    Z = Array(zeros(Float32, granularity*oscilators, 1)) + 0.1
    
    for i=1:2:length(Model) - 2
        Z = tanh.(Model[i]*Z .+ Model[i+1])
    end
    Z = tanh.(Model[end - 1]*Z .+ Model[end])
    
    samples = Array(zeros(Float32, music_sample_size, 1))
    
    vals = [] 
    for sample_nr=0:oscilators:oscilators*(granularity - 1)
        push!(vals, Z[(sample_nr + 1):(sample_nr + oscilators)])        
    end
    
    for si=1:music_sample_size
        for v in vals[Int64(round(1 + si/music_sample_size * (granularity -1) ))]
            samples[si] += sin(si*v/sample_frequency)
        end
        samples[si] = samples[si]/oscilators
    end
    return hcat(samples,samples,samples,samples)
end


generate (generic function with 2 methods)

In [47]:
D = create_discriminator()
G = create_generator()

function loss_d(D,X,Y_real) 
    return nll(discriminate(D,X), Y_real)
end 

lossgradients_d = grad(loss_d)

function loss_g(D,FS,TY)
    return nll(discriminate(D,FS), TY)
end

lossgradients_g = grad(loss_g)

optimizers_d = optimizers(D, Nesterov)
optimizers_g = optimizers(G, Nesterov)


function save_samples(epoch)
    samples = generate(G, 5)
    for i=1:length(samples[1,:])
        wavwrite(samples[:,i], 16000, "/mnt/data/other/music_samples/generated/epoch-$(epoch)-nr-$(i).wav")
        println("Generated a sample !")
    end
end

for epoch=1:20
    println("Started training for epoch: $(epoch)")
    
    for (X,Y) in Dtrain
        
        fake_samples = generate(G, batch_size)
        fake_Y = zeros(UInt16, length(fake_samples[1,:]))
        
        X = hcat(X, fake_samples)
        Y = vcat(Y, fake_Y)
        
        GD = lossgradients_d(D,X,Y)
        
        fake_samples = generate(G, 4)
        wannabe_Y = [UInt16(1),UInt16(1),UInt16(1),UInt16(1)]
        GG = lossgradients_g(D,fake_samples, wannabe_Y)
        
        update!(D, GD , optimizers_d)
        update!(G, GG , optimizers_g)
        save_samples(epoch)
    end
    
    println("Finished training epoch: $(epoch)")
    println("Got accuracy: $(accuracy(D, Dtrain, discriminate)) , on the training set")
    save_samples(epoch)
    flush(STDOUT)
end

Started training for epoch: 1


LoadError: [91mDimensionMismatch("x has length 299040768, but y has length 26214400")[39m