## Word2Vec

In [45]:
using Pkg

In [66]:
using LinearAlgebra
using StatsBase
using ProgressMeter
using Distributed

In [67]:
function text_preprocessing(text::Vector{String})::Vector{Vector{SubString{String}}}
    text_vec = Array{Vector{SubString{String}}}(undef, size(text)[1])
    for i in 1:size(text)[1]
        text_vec[i] = split(text[i], " ")
        text_vec[i][1] = "<eos>"
        text_vec[i][end] = "<eos>"
    end
    return text_vec
end

function word_char(text::Vector{SubString{String}})::Vector{SubString{String}}
    return unique(text)
end

function word_to_id(text::Vector{SubString{String}})::Dict{SubString{String}, Int64}
    text_num = 1:size(text)[1]
    word_to_id = Dict([(text[t], text_num[t]) for t in 1:size(text)[1]])
    return word_to_id
end

function id_to_word(text::Vector{SubString{String}})::Dict{Int64, SubString{String}}
    text_num = 1:size(text)[1]
    id_to_word = Dict([(text_num[t], text[t]) for t in 1:size(text)[1]])
    return id_to_word
end

function corpus(text::Vector{SubString{String}}, word_id::Dict{SubString{String}, Int64})::Vector{Int64}
    corpus_vec = [word_id[w] for w in text]
    return corpus_vec
end

function one_hot_corpus(corpus_vec::Vector{Int64})::Matrix{Int8}
    vec_var = unique(corpus_vec)
    vec = zeros(Int8, size(corpus_vec)[1], size(vec_var)[1])
    for i in 1:size(corpus_vec)[1]
        vec[i, corpus_vec[i]] = 1
    end
    return vec
end

n_batch = 30
time_length = 30
weight_length = 100
vec_length = 100

function corpus_batch(corpus::Vector{Int64}, n_batch::Int64, time_length::Int64)
    corpus_size = size(corpus)[1]
    corpus_temp = []
    corpus_batch = []
    
    for j in 1:corpus_size÷time_length
        push!(corpus_temp, corpus[time_length*(j-1)+1:time_length*j])
    end
    for i in 1:n_batch
        push!(corpus_batch, corpus_temp[corpus_size÷n_batch÷time_length*(i-1)+1:corpus_size÷n_batch÷time_length*i])
    end
    return corpus_batch
end

function corpus_mat(corpus::Vector{Int64}, n_batch::Int64, time_length::Int64)::Matrix{Int64}
    corpus_size = size(corpus)[1]
    corpus_batch = zeros(Int64, corpus_size÷n_batch, n_batch)
    
    for i in 1:corpus_size÷n_batch
        for j in 1:n_batch
        corpus_batch[i, j] = corpus[corpus_size÷n_batch*(j-1)+1+(i-1)]
        end
    end
    return corpus_batch
end

function one_hot_corpus_batch(corpus_mat::Matrix{Int64}; vec_var=10000)::Array{Int8, 3}
    oh_corpus_batch = zeros(Int8, size(corpus_mat)[1], size(corpus_mat)[2], vec_var)
    for i in 1:size(corpus_mat)[1]
        for j in 1:size(corpus_mat)[2]
            oh_corpus_batch[i, j, corpus_mat[i, j]] = 1
        end
    end
    return oh_corpus_batch
end
    

one_hot_corpus_batch (generic function with 1 method)

In [48]:
text = "I am who I am. I will do what I want to do."

"I am who I am. I will do what I want to do."

In [49]:
pwd()
cd("/Users/ishidatetsurou/")
path = pwd()

"/Users/ishidatetsurou"

In [50]:
ptb_path = path*"/Downloads/ptbdataset/ptb.train.txt"
a = open(ptb_path)
ptb_train_txt = readlines(a)

42068-element Vector{String}:
 " aer banknote berlitz calloway " ⋯ 115 bytes ⋯ "-food ssangyong swapo wachter "
 " pierre <unk> N years old will " ⋯ 18 bytes ⋯ "a nonexecutive director nov. N "
 " mr. <unk> is chairman of <unk> n.v. the dutch publishing group "
 " rudolph <unk> N years old and " ⋯ 91 bytes ⋯ "ritish industrial conglomerate "
 " a form of asbestos once used t" ⋯ 128 bytes ⋯ "ears ago researchers reported "
 " the asbestos fiber <unk> is un" ⋯ 102 bytes ⋯ "ecades later researchers said "
 " <unk> inc. the unit of new yor" ⋯ 72 bytes ⋯ "s <unk> cigarette filters in N "
 " although preliminary findings " ⋯ 128 bytes ⋯ " new attention to the problem "
 " a <unk> <unk> said this is an old story "
 " we 're talking about years ago" ⋯ 37 bytes ⋯ "ng any questionable properties "
 " there is no asbestos in our products now "
 " neither <unk> nor the research" ⋯ 58 bytes ⋯ "smokers of the kent cigarettes "
 " we have no useful information " ⋯ 54 bytes ⋯ "ston 's <unk> cancer ins

In [51]:
a = split(ptb_train_txt[1], " ")

26-element Vector{SubString{String}}:
 ""
 "aer"
 "banknote"
 "berlitz"
 "calloway"
 "centrust"
 "cluett"
 "fromstein"
 "gitano"
 "guterman"
 "hydro-quebec"
 "ipo"
 "kia"
 "memotec"
 "mlx"
 "nahb"
 "punts"
 "rake"
 "regatta"
 "rubens"
 "sim"
 "snack-food"
 "ssangyong"
 "swapo"
 "wachter"
 ""

In [52]:
input_data = text_preprocessing(ptb_train_txt)

42068-element Vector{Vector{SubString{String}}}:
 ["<eos>", "aer", "banknote", "berlitz", "calloway", "centrust", "cluett", "fromstein", "gitano", "guterman"  …  "punts", "rake", "regatta", "rubens", "sim", "snack-food", "ssangyong", "swapo", "wachter", "<eos>"]
 ["<eos>", "pierre", "<unk>", "N", "years", "old", "will", "join", "the", "board", "as", "a", "nonexecutive", "director", "nov.", "N", "<eos>"]
 ["<eos>", "mr.", "<unk>", "is", "chairman", "of", "<unk>", "n.v.", "the", "dutch", "publishing", "group", "<eos>"]
 ["<eos>", "rudolph", "<unk>", "N", "years", "old", "and", "former", "chairman", "of"  …  "named", "a", "nonexecutive", "director", "of", "this", "british", "industrial", "conglomerate", "<eos>"]
 ["<eos>", "a", "form", "of", "asbestos", "once", "used", "to", "make", "kent"  …  "to", "it", "more", "than", "N", "years", "ago", "researchers", "reported", "<eos>"]
 ["<eos>", "the", "asbestos", "fiber", "<unk>", "is", "unusually", "<unk>", "once", "it"  …  "causing", "symptoms

In [53]:
input_data_whole = vcat(input_data...)

971657-element Vector{SubString{String}}:
 "<eos>"
 "aer"
 "banknote"
 "berlitz"
 "calloway"
 "centrust"
 "cluett"
 "fromstein"
 "gitano"
 "guterman"
 "hydro-quebec"
 "ipo"
 "kia"
 ⋮
 "unilab"
 "'s"
 "presence"
 "has"
 "been"
 "less"
 "prominent"
 "according"
 "to"
 "mr."
 "<unk>"
 "<eos>"

In [54]:
input_data_unique = word_char(input_data_whole)

10000-element Vector{SubString{String}}:
 "<eos>"
 "aer"
 "banknote"
 "berlitz"
 "calloway"
 "centrust"
 "cluett"
 "fromstein"
 "gitano"
 "guterman"
 "hydro-quebec"
 "ipo"
 "kia"
 ⋮
 "shah"
 "torrijos"
 "lung-cancer"
 "bikers"
 "bofors"
 "parsow"
 "caci"
 "isi"
 "chestman"
 "tci"
 "trecker"
 "unilab"

In [55]:
word_id = word_to_id(input_data_unique)

Dict{SubString{String}, Int64} with 10000 entries:
  "adviser"        => 903
  "enjoy"          => 5596
  "advertisements" => 2605
  "fight"          => 520
  "nicholas"       => 4062
  "everywhere"     => 4533
  "surveyed"       => 2220
  "helping"        => 3011
  "whose"          => 1007
  "manufacture"    => 1072
  "tribune"        => 5197
  "redevelopment"  => 5800
  "favor"          => 3095
  "poised"         => 8385
  "henry"          => 4724
  "eddie"          => 6626
  "borders"        => 4667
  "star"           => 2068
  "bidder"         => 6436
  "ventures"       => 4230
  "reform"         => 2847
  "plan"           => 739
  "rises"          => 401
  "hampshire"      => 825
  "those"          => 214
  ⋮                => ⋮

In [56]:
text_vec = corpus(input_data_whole, word_id)

971657-element Vector{Int64}:
     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
     ⋮
 10000
   120
  1144
    70
   553
   918
  3197
   259
    65
    40
    27
     1

In [57]:
cp_batch = corpus_batch(text_vec[1:971100], n_batch, time_length)

30-element Vector{Any}:
 Any[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  21, 22, 23, 24, 25, 1, 1, 26, 27, 28], [29, 30, 31, 32, 33, 34, 35, 36, 37, 38  …  44, 33, 45, 46, 47, 1, 1, 48, 27, 28], [29, 30, 49, 50, 42, 43, 51, 52, 53, 54  …  1, 1, 36, 61, 43, 62, 63, 64, 65, 66], [67, 68, 69, 70, 71, 36, 72, 73, 43, 74  …  81, 28, 29, 82, 83, 84, 1, 1, 33, 62], [85, 27, 41, 86, 27, 63, 79, 87, 33, 27  …  96, 97, 98, 83, 99, 1, 1, 27, 100, 33], [101, 43, 102, 103, 27, 104, 94, 105, 67, 106  …  1, 1, 111, 112, 113, 114, 84, 80, 81, 36], [115, 82, 33, 116, 117, 118, 109, 119, 120, 102  …  127, 65, 33, 128, 1, 1, 36, 27, 27, 99], [57, 41, 129, 30, 130, 1, 1, 131, 132, 133  …  140, 141, 1, 1, 142, 41, 143, 62, 109, 144], [145, 146, 1, 1, 147, 27, 148, 33, 83, 149  …  43, 33, 67, 106, 1, 1, 131, 155, 143, 156], [157, 153, 158, 159, 160, 161, 162, 99, 163, 164  …  27, 168, 36, 169, 43, 83, 170, 33, 171, 74]  …  [576, 796, 557, 529, 832, 719, 221, 316, 226, 875  …  161, 4134, 3719, 4101, 120, 34, 1, 1, 10

In [58]:
cp_batch_matrix = [[vcat(cp_batch[i][j]...) for i in 1:size(cp_batch)[1]] for j in 1:size(cp_batch[1][1])[1]]

30-element Vector{Vector{Vector{Int64}}}:
 [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  21, 22, 23, 24, 25, 1, 1, 26, 27, 28], [400, 541, 4151, 41, 4018, 971, 2466, 109, 33, 653  …  33, 238, 824, 120, 27, 134, 541, 1624, 624, 4152], [5792, 5793, 230, 802, 5794, 120, 411, 549, 100, 49  …  991, 2314, 168, 230, 4793, 5587, 3503, 1, 1, 33], [1168, 27, 88, 6784, 49, 2266, 27, 1, 1, 2595  …  3709, 459, 735, 109, 982, 70, 553, 480, 109, 36], [65, 27, 214, 1253, 142, 41, 143, 7482, 182, 4765  …  1, 1, 142, 397, 221, 36, 249, 1345, 285, 33], [28, 1, 1, 586, 2501, 1, 1, 653, 5729, 2304  …  1630, 1, 1, 36, 1005, 182, 2314, 4793, 5587, 100], [936, 937, 27, 65, 388, 28, 469, 931, 933, 35  …  1523, 939, 1, 1, 5509, 5510, 1453, 33, 245, 400], [94, 33, 4845, 27, 110, 1191, 3688, 325, 33, 4980  …  70, 110, 1191, 3688, 325, 1, 1, 43, 584, 319], [1, 1, 8917, 70, 27, 28, 3881, 109, 57, 1403  …  36, 8655, 5211, 88, 653, 973, 2156, 109, 33, 1847], [4282, 41, 283, 27, 76, 496, 149, 4869, 49, 2975  …  29, 27, 27, 35, 

In [59]:
corpus_mt_bat = corpus_mat(text_vec[1:971100], n_batch, time_length)

32370×30 Matrix{Int64}:
    1   400  5792  1168    65    28  …   979  6567    33   333    99    94
    2   541  5793    27    27     1      774  4572  4125   114   375   226
    3  4151   230    88   214     1        1  1513  3948   691   719  1033
    4    41   802  6784  1253   586        1     1    94     1   567    31
    5  4018  5794    49   142  2501       33     1   719     1  7717   765
    6   971   120  2266    41     1  …  1407  9409  4965   410    88  3372
    7  2466   411    27   143     1     1769  7317   110   436    27    35
    8   109   549     1  7482   653      276   100    27  7853   847  1124
    9    33   100     1   182  5729       33   746     1   946   707    35
   10   653    49  2595  4765  2304     1638    65     1  2471   142   417
   11   225   253   171    33  2501  …  2356  1140    33  2739    55    28
   12   361   231  3272  1266   114      375   110   524  2425  1396   188
   13  2059  1039    99   374  1185      384  7935  2101   155  3404     1
 

In [None]:
oh_corpus_mt_bat = one_hot_corpus_batch(corpus_mt_bat)

In [None]:
text_vec_oh = one_hot_corpus(text_vec)

In [60]:
id_word = id_to_word(input_data_unique)

Dict{Int64, SubString{String}} with 10000 entries:
  4986 => "hang"
  7329 => "sociologist"
  4700 => "perceptions"
  4576 => "sharp"
  7144 => "fiercely"
  6073 => "innovative"
  2288 => "damage"
  1703 => "sweeping"
  1956 => "unfilled"
  8437 => "ge"
  2350 => "prepared"
  7685 => "repeal"
  8690 => "mandated"
  5975 => "intraday"
  9773 => "ashland"
  3406 => "high-priced"
  2841 => "treated"
  2876 => "surfaced"
  687  => "deficit"
  7353 => "border"
  185  => "replaced"
  1090 => "depend"
  2015 => "thing"
  7272 => "risky"
  8544 => "peladeau"
  ⋮    => ⋮

## Layer Implimentation

In [246]:
function MatMul_forward(h::Matrix{Float64}, w::Matrix{Float64}, b::Matrix{Float64})::Matrix{Float64}
    return h * w .+ b
end

function softmax_with_entropy(input::Matrix{Float64}, input_label::Array{Int64}, n_batch::Int, word_var::Int)::Tuple{Float64, Matrix{Float64}}
    temp = zeros(n_batch, word_var)
    temp_max = [maximum(input[i, :]) for i in 1:n_batch]
    temp += exp.(input .- temp_max)./sum(exp.(input .- temp_max), dims=2)
    loss = 0.0
    for j in enumerate(input_label)
        loss += -log(temp[j[1], j[2]])/n_batch
    end
    return (loss, temp)
end

function sigm(x::Array)::Array
    return 1.0 ./(1.0 .+ exp.(-x))
end

function RNN_forward(input::Matrix{Float64}, h_in::Matrix{Float64}, w_hidden::Matrix{Float64}, w_x::Matrix{Float64}, bias::Matrix{Float64}, c_in::Matrix{Float64})::Tuple{Matrix{Float64}, Matrix{Float64}, Matrix{Float64}}
    sizew = size(w_hidden)[2]÷4
    h = input*w_x + h_in*w_hidden .+ bias
    f = sigm(h[:, 1:sizew])
    i = sigm(h[:, sizew+1:sizew*2])
    o = sigm(h[:, sizew*2+1:sizew*3])
    g = tanh.(h[:, sizew*3+1:sizew*4])
    c_out = c_in .* f + i .* g
    h_out = o .* tanh.(c_out)
    return h_out, c_out, h
end

function MatMul_backward(s::Matrix{Float64}, w::Matrix{Float64}, h::Matrix{Float64}, n_batch::Int64)::Tuple{Matrix{Float64}, Matrix{Float64}, Matrix{Float64}}
    dx = s * w'
    dw = 1.0/n_batch .* (h' * s)
    db = 1.0/n_batch .* sum(s, dims=1)
    return (dx, dw, db)
end

function swe_backward(output::Matrix{Float64}, prob::Matrix{Float64}, input_label::Vector{Int64}, n_batch::Int64)::Matrix{Float64}
    output = prob
    for j in 1:n_batch
        output[j, input_label[j]] -= 1.0
    end
    return output
end

function clipping!(input::Matrix{Float64}; max_norm = 5.0)::Matrix{Float64}
    norm_ = norm(input)
    rate = max_norm / norm_
    if rate < 1.0
        input *= rate
    end
end

function Embed_backward(x::Matrix{Float64}, input_label::Vector{Int64}, in_weight::Matrix{Float64})::Matrix{Float64}
    out = zeros(Float32, size(in_weight)[1], size(in_weight)[2])
    for i in enumerate(input_label)
        out[i[2], :] = x[i[1], :]
    end
    return out
end

function RNN_backward(dout_LSTM::Array{Float64, 3}, z::Array{Float64, 3}, c::Array{Float64, 3}, dx::Array{Float64, 3}, dx_a::Array{Float64, 3}, dh::Array{Float64, 3}, dc::Array{Float64, 3}, h_w::Array{Float64, 2}, x_w::Array{Float64, 2}, wl::Int64, t::Int64)::Tuple{Matrix{Float64}, Matrix{Float64}, Matrix{Float64}, Matrix{Float64}}
    dout_LSTM[:, 1:wl, t] = sigm(z[:, 1:wl, t]).*(1.0 .- sigm(z[:, 1:wl, t])) .* c[:, :, t-1].*(dc[:, :, t] .+ ((1.0 .- (tanh.(c[:, :, t])).^2.0) .* (sigm(z[:, 2*wl+1:3*wl, t]) .* (dx_a[:, :, t] .+ dh[:, :, t]))))
    dout_LSTM[:, 1*wl+1:2*wl, t] = sigm(z[:, 1*wl+1:2*wl, t]).*(1.0 .- sigm(z[:, 1*wl+1:2*wl, t])).* tanh.(z[:, 3*wl+1:4*wl, t]) .* (dc[:, :, t] .+ ((1.0 .- (tanh.(c[:, :, t])).^2.0) .* (sigm(z[:, 2*wl+1:3*wl, t]) .* (dx_a[:, :, t] .+ dh[:, :, t]))))
    dout_LSTM[:, 2*wl+1:3*wl, t] = sigm(z[:, 2*wl+1:3*wl, t]).*(1.0 .- sigm(z[:, 2*wl+1:3*wl, t])).* tanh.(c[:, :, t]) .* (dx_a[:, :, t] .+ dh[:, :, t])
    dout_LSTM[:, 3*wl+1:4*wl, t] = (1.0 .- (tanh.(z[:, 3*wl+1:4*wl, t])).^2.0).* sigm(z[:, 1*wl+1:2*wl, t]).* (dc[:, :, t] .+ ((1.0 .- (tanh.(c[:, :, t])).^2) .* (sigm(z[:, 2*wl+1:3*wl, t]) .* (dx_a[:, :, t] .+ dh[:, :, t]))))
    
    dh[:, :, t-1] = dout_LSTM[:, :, t] * h_w'
    dx[:, :, t] = dout_LSTM[:, :, t] * x_w'
    dc[:, :, t-1] = sigm(z[:, 1:wl, t]) .* (dc[:, :, t] .+ ((1.0 .- (tanh.(c[:, :, t])).^2.0) .* (sigm(z[:, 2*wl+1:3*wl, t]) .* (dx_a[:, :, t] .+ dh[:, :, t]))))
    
    return dh[:, :, t-1], dx[:, :, t], dc[:, :, t-1], dout_LSTM[:, :, t]
end

RNN_backward (generic function with 3 methods)

In [247]:
n_batch = 30
time_length = 30
weight_length = 100
vec_length = 100

function LSTM(corpus_mt_bat::Matrix{Int64}, vec_length::Int64, weight_length::Int64, time_length::Int64, n_batch::Int64; word_var = 10000, eta = 20.0)::Tuple{Matrix{Int64}, Float64}
    input_size = size(corpus_mt_bat)[1]
    in_weight = rand(word_var, vec_length)
    
    x_weight = rand(vec_length, 4*weight_length)
    h_weight = rand(weight_length, 4*weight_length)
    x_batch = zeros(n_batch, vec_length)
    h_batch = zeros(n_batch, weight_length)
    bias_in = rand(1, 4*weight_length)
    t_batch = circshift(corpus_mt_bat, (1, 0))
    c = zeros(n_batch, weight_length)
    out_weight = rand(weight_length, word_var)
    out_bias = rand(1, word_var)
    h_out = zeros(Int64, input_size, n_batch)
    trans_corpus = corpus_mt_bat'
    h_temp = zeros(n_batch, weight_length, time_length)
    c_temp = zeros(n_batch, weight_length, time_length)
    z_temp = zeros(n_batch, 4*weight_length, time_length)
    x_temp = zeros(n_batch, vec_length, time_length)
    
    grads_swe = zeros(time_length, n_batch, word_var)
    dx_affine = zeros(n_batch, weight_length, time_length)
    dw_h = zeros(n_batch, weight_length, 4*weight_length)
    dw_x = zeros(n_batch, vec_length, 4*weight_length)
    dh_prev = zeros(n_batch, weight_length, time_length)
    dc_prev = zeros(n_batch, weight_length, time_length)
    dx_prev = zeros(n_batch, vec_length, time_length)
    grads_LSTM = zeros(n_batch, 4*weight_length, time_length)
    
    total_loss = 0.0
    loss_count = 0.0
    count = 0
    
    @showprogress 1 "Computing..." for i in 1:input_size
        for j in 1:n_batch
            x_batch[j, :] += in_weight[Int(corpus_mt_bat[i, j]), :]
        end
        h_batch, c, z = RNN_forward(x_batch, h_batch, h_weight, x_weight, bias_in, c)
        h_temp[:, :, i - count*time_length] = h_batch
        c_temp[:, :, i - count*time_length] = c
        z_temp[:, :, i - count*time_length] = z
        x_temp[:, :, i - count*time_length] = x_batch
        
        z_out = MatMul_forward(h_batch, out_weight, out_bias)
        entropy, prob = softmax_with_entropy(z_out, trans_corpus[:, i], n_batch, word_var)
        for j in 1:n_batch
            h_out[i, j] = argmax(prob[j, :])
        end
        
        grads_swe[i - count*time_length, :, :] = swe_backward(grads_swe[i - count*time_length, :, :], prob, corpus_mt_bat[i, :], n_batch)
        dx_affine[:, :, i - count*time_length], dw_affine, db_affine = MatMul_backward(grads_swe[i - count*time_length, :, :], out_weight, h_batch, n_batch)

        """update"""
        out_weight -= eta.*dw_affine
        out_bias -= eta.*db_affine
        clipping!(out_weight)
        clipping!(out_bias)
        
        if i%time_length == 0
            for t in time_length:-1:2
                # f(1), i(2), o(3), g(4) の順番
                dh_prev[:, :, t-1], dx_prev[:, :, t], dc_prev[:, :, t-1], grads_LSTM[:, :, t] =　
                RNN_backward(grads_LSTM, z_temp, c_temp, dx_prev, dx_affine, dh_prev, dc_prev, h_weight, x_weight, weight_length, t)
                
                """update"""
                h_weight -= eta.*((1.0/n_batch).* (h_temp[:, :, t-1]' * grads_LSTM[:, :, t]))
                x_weight -= eta.*((1.0/n_batch).* (x_temp[:, :, t]' * grads_LSTM[:, :, t]))
                bias_in -= eta.*((1.0/n_batch)*sum(grads_LSTM[:, :, t], dims=1))
                clipping!(h_weight)
                clipping!(x_weight)
                clipping!(bias_in)
                
                in_dw = Embed_backward(dx_prev[:, :, t], corpus_mt_bat[(i-time_length)+t, :], in_weight)
                in_weight -= eta.*in_dw
                clipping!(in_weight)
            
            end
            grads_LSTM = zeros(n_batch, 4*weight_length, time_length)
            grads_swe = zeros(time_length, n_batch, word_var)
            dx_affine = zeros(n_batch, weight_length, time_length)
            dh_prev = zeros(n_batch, weight_length, time_length)
            dx_prev = zeros(n_batch, vec_length, time_length)
            dc_prev = zeros(n_batch, weight_length, time_length)
            h_temp = zeros(n_batch, weight_length, time_length)
            c_temp = zeros(n_batch, weight_length, time_length)
            z_temp = zeros(n_batch, 4*weight_length, time_length)
            x_temp = zeros(n_batch, vec_length, time_length)
            count += 1
        end
        total_loss += entropy
        loss_count += 1
    end
    perplexity = exp(total_loss/loss_count)
    return h_out, perplexity
end

LSTM (generic function with 2 methods)

In [251]:
corpus_mt_bat[:, 1]

32370-element Vector{Int64}:
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
    ⋮
  231
 1869
 1005
 1960
 1219
    1
    1
 4150
  109
   33
  238
  302

In [254]:
h_out, perplexity = LSTM(corpus_mt_bat[1:1500, :], vec_length, weight_length, time_length, n_batch)

[32mComputing... 100%|███████████████████████████████████████| Time: 0:02:46[39m


([53 53 … 53 53; 1 1 … 1 1; … ; 1326 1326 … 1326 1326; 172 172 … 172 172], Inf)

In [None]:
h_out[:, 1]

In [256]:
h_out[:, 1]

1500-element Vector{Int64}:
   53
    1
   27
   28
   65
   33
   94
   43
  719
   49
   35
   36
    1
    ⋮
   27
  110
   99
  134
 3072
 3045
  931
  228
  109
   33
 1326
  172

In [73]:
grads_LSTM[:, 1:weight_length, t] = sigm.(z_temp[:, 1:weight_length, t]).*(1.0 .- sigm.(z_temp[:, 1:weight_length, t])) .* c_temp[:, :, t-1].*(dc_prev[:, :, t] .+ ((1.0 .- (tanh.(c_temp[:, :, t])).^2.0) .* (sigm.(z_temp[:, 2*weight_length+1:3*weight_length, t]) .* (dx_affine[:, :, t] .+ dh_prev[:, :, t]))))
grads_LSTM[:, 1*weight_length+1:2*weight_length, t] = sigm.(z_temp[:, 1*weight_length+1:2*weight_length, t]).*(1.0 .- sigm.(z_temp[:, 1*weight_length+1:2*weight_length, t])).* tanh.(z_temp[:, 3*weight_length+1:4*weight_length, t]) .* (dc_prev[:, :, t] .+ ((1.0 .- (tanh.(c_temp[:, :, t])).^2.0) .* (sigm.(z_temp[:, 2*weight_length+1:3*weight_length, t]) .* (dx_affine[:, :, t] .+ dh_prev[:, :, t]))))
grads_LSTM[:, 2*weight_length+1:3*weight_length, t] = sigm.(z_temp[:, 2*weight_length+1:3*weight_length, t]).*(1.0 .- sigm.(z_temp[:, 2*weight_length+1:3*weight_length, t])).* tanh.(c_temp[:, :, t]) .* (dx_affine[:, :, t] .+ dh_prev[:, :, t])
grads_LSTM[:, 3*weight_length+1:4*weight_length, t] = (1.0 .- (tanh.(z_temp[:, 3*weight_length+1:4*weight_length, t])).^2.0).* sigm.(z_temp[:, 1*weight_length+1:2*weight_length, t]).* (dc_prev[:, :, t] .+ ((1.0 .- (tanh.(c_temp[:, :, t])).^2) .* (sigm.(z_temp[:, 2*weight_length+1:3*weight_length, t]) .* (dx_affine[:, :, t] .+ dh_prev[:, :, t]))))

dh_prev[:, :, t-1] = grads_LSTM[:, :, t] * h_weight'
dx_prev[:, :, t] = grads_LSTM[:, :, t] * x_weight'
dc_prev[:, :, t-1] = sigm.(z_temp[:, 1:weight_length, t]) .* (dc_prev[:, :, t] .+ ((1.0 .- (tanh.(c_temp[:, :, t])).^2.0) .* (sigm.(z_temp[:, 2*weight_length+1:3*weight_length, t]) .* (dx_affine[:, :, t] .+ dh_prev[:, :, t]))))

LoadError: UndefVarError: z_temp not defined

In [None]:
id_word[3476]

In [None]:
oh_array_shape = reshape(text_vec_oh[1:971100, :], time_length, size(text_vec)[1]÷n_batch÷time_length, n_batch, size(text_vec_oh)[2])

In [None]:
dic = Dict([("A", 1), ("B", 2)])
dic["A"]

In [None]:
sample(1:200, Weights(rand(200)), 1, replace=false)

In [None]:
circshift(a, (0, 0, 1))

In [None]:
function sigm(x::Array)
    return 1.0 ./(1.0 .+ exp.(-x))
end

In [None]:
sigm([1, 2, 3])

In [None]:
tanh.([1, 2, 3])

In [None]:
[1, 2, 3] .* [1, 2, 3]

In [5]:
[[1, 2, 3]*[1, 2, 3]' for i in 1:10]

10-element Vector{Matrix{Int64}}:
 [1 2 3; 2 4 6; 3 6 9]
 [1 2 3; 2 4 6; 3 6 9]
 [1 2 3; 2 4 6; 3 6 9]
 [1 2 3; 2 4 6; 3 6 9]
 [1 2 3; 2 4 6; 3 6 9]
 [1 2 3; 2 4 6; 3 6 9]
 [1 2 3; 2 4 6; 3 6 9]
 [1 2 3; 2 4 6; 3 6 9]
 [1 2 3; 2 4 6; 3 6 9]
 [1 2 3; 2 4 6; 3 6 9]

In [18]:
for i in 10:-1:1
    println(i)
end

10
9
8
7
6
5
4
3
2
1


In [20]:
[2.0, 1.0].^2

2-element Vector{Float64}:
 4.0
 1.0

In [21]:
sum([1.0, 1.0], dims=1)

1-element Vector{Float64}:
 2.0

In [34]:
for i in enumerate(1:2:10)
    println(i[1])
end

1
2
3
4
5


In [42]:
zeros(Float32, 2, 2)::Array{Float32, 2}

2×2 Matrix{Float32}:
 0.0  0.0
 0.0  0.0

In [None]:
31÷30

1

In [124]:
@show out_bias

out_bias = Float32[0.9185047, 0.232791, 0.70738757, 0.50849026, 0.9280791, 0.62523586, 0.29086965, 0.1561268, 0.40935546, 0.5948539, 0.094766855, 0.10050881, 0.43456233, 0.7057095, 0.6514245, 0.5030774, 0.44270068, 0.5330198, 0.36145985, 0.69919103, 0.5726094, 0.14253038, 0.98216105, 0.39656168, 0.18606937, 0.3865732, 0.64962, 0.7574294, 0.21158439, 0.31651598, 0.38761473, 0.6113986, 0.7119241, 0.18068337, 0.7011936, 0.40855545, 0.98843175, 0.31857598, 0.63226175, 0.01545167, 0.6588761, 0.004155934, 0.11622596, 0.630855, 0.21809334, 0.49355453, 0.8043935, 0.40079546, 0.9827549, 0.82442474, 0.61673456, 0.06722283, 0.5835128, 0.9216018, 0.18957895, 0.96368086, 0.56505287, 0.2992317, 0.19795752, 0.35401785, 0.63669133, 0.38530117, 0.55588746, 0.43733668, 0.40046227, 0.96537465, 0.43723327, 0.98091525, 0.8523379, 0.35070157, 0.8596077, 0.82986605, 0.6700985, 0.1963278, 0.77539724, 0.55488825, 0.009744167, 0.113906205, 0.50345325, 0.81980425, 0.41916215, 0.33930653, 0.32413244, 0.81729674, 

100-element Vector{Float32}:
 0.9185047
 0.232791
 0.70738757
 0.50849026
 0.9280791
 0.62523586
 0.29086965
 0.1561268
 0.40935546
 0.5948539
 0.094766855
 0.10050881
 0.43456233
 ⋮
 0.69546854
 0.8625941
 0.93253016
 0.05729586
 0.49819732
 0.6827586
 0.10831785
 0.2602014
 0.6543097
 0.7046952
 0.7931672
 0.86041254

In [79]:
a = zeros(Float32, 2, 2)
b = zeros(Float32, 2)
a.+b

2×2 Matrix{Float32}:
 0.0  0.0
 0.0  0.0

In [161]:
b = rand(Float32, (1, 100))

1×100 Matrix{Float32}:
 0.944586  0.440856  0.00311387  0.992156  …  0.238158  0.45541  0.760603

In [160]:
out_bias = b .+ a

3×100 Matrix{Float32}:
 1.35174  1.07004  2.46246  1.28356  …  1.09322  1.47056  1.87084  0.507212
 1.62199  1.91622  2.03224  1.01192     1.75414  1.17371  1.4359   0.528912
 1.98419  1.91303  2.29391  1.90885     1.48505  1.29218  1.85185  0.969487

In [159]:
a = sum(rand(Float32, (2, 100)), dims=1)

1×100 Matrix{Float32}:
 1.14471  0.935413  1.48511  0.95273  …  1.06152  1.05896  1.31567  0.498294

In [133]:
sum(rand(Float32, (2, 100)), dims=1)

100×1 adjoint(::Matrix{Float32}) with eltype Float32:
 1.1479161
 1.2023498
 1.269912
 1.5605032
 1.2410071
 0.7793526
 1.2186346
 1.1828353
 0.78433996
 0.9206997
 1.6393926
 0.72612035
 1.5171044
 ⋮
 0.62643653
 0.5260909
 1.412022
 1.5392983
 0.72361815
 1.060127
 0.08641821
 1.6778607
 0.48160255
 1.1986036
 0.5561585
 1.315058

In [148]:
vcat(sum(rand(Float32, (2, 100)), dims=1)...)

100-element Vector{Float32}:
 0.58804065
 1.2670829
 0.43006432
 0.6585703
 1.1020083
 0.47671008
 0.44501275
 1.2043326
 0.98503786
 0.93488026
 0.9940717
 1.394166
 1.6774338
 ⋮
 1.470303
 0.9146804
 1.1431062
 1.3187218
 1.5447755
 0.8829644
 1.0609995
 1.0425112
 1.753279
 0.33197618
 0.6597744
 1.0383241

In [176]:
rand(1, 2, 3)

1×2×3 Array{Float64, 3}:
[:, :, 1] =
 0.181811  0.0129015

[:, :, 2] =
 0.210595  0.625855

[:, :, 3] =
 0.649082  0.260742

In [211]:
norm(rand(2, 3))

1.6416771355902053

In [226]:
rand(2) + sum(rand(3, 2), dims=1)

LoadError: DimensionMismatch: dimensions must match: a has dims (Base.OneTo(1), Base.OneTo(2)), b has dims (Base.OneTo(2),), mismatch at 1

In [227]:
sum(rand(3, 2), dims=1)

1×2 Matrix{Float64}:
 1.74099  1.10871

In [241]:
1 + 1.0

2.0

In [242]:
1.0 + 1.0

2.0