In [1]:
#=
# This is an implementation of
# Glove: Global vectors for word representation
# J Pennington, R Socher, C Manning
# Proceedings of the 2014 conference on empirical methods in natural language
# https://nlp.stanford.edu/pubs/glove.pdf
# (Made from the paper without reference to the source code)
#
# Pennington et. al's implementation is ~1K lines of  C
#=#


LoadError: [91msyntax: incomplete: unterminated multi-line comment #= ... =#[39m

In [22]:
LinAlg.dot(x::Flux.TrackedArray, y::Flux.TrackedArray) = sum(x.*y)

In [1]:
using CorpusLoaders
using MLDataUtils
using StringInterning
using DataStructures
using Flux

In [64]:
wikidata = collect(Iterators.take(CorpusLoaders.load_wikicorpus(), 10_000_000))

10000000-element Array{StringInterning.InternedString,1}:
 "henry"       
 "hallam"      
 "july"        
 "9"           
 "1777"        
 "january"     
 "21"          
 "1859"        
 "was"         
 "an"          
 "english"     
 "historian"   
 "the"         
 ⋮             
 "concerns"    
 "a"           
 "midwife"     
 "watts"       
 "in"          
 "london"      
 "who"         
 "investigates"
 "the"         
 "identity"    
 "of"          
 "a"           

In [65]:
function coocurs(data, hw=5)
    coocurs = DefaultDict{Tuple{InternedString,InternedString}, Float32}(0f0)
    distance_weights = [1f0/abs(d-hw) for d in 0:2hw  if d!=hw]
    for (word_, window) in slidingwindow(i->[i-hw:i-1; i+1:i+hw], data, 1, stride=1)
        word = first(word_)
        for (weight, coword) in zip(distance_weights, window)
            coocurs[(word,coword)]+=weight
        end
    end
    coocurs
end

coocurs (generic function with 2 methods)

In [66]:
f(x, xmax=100f0, α=3/4)::Float32 = x>xmax ? 1f0 : (x/xmax)^α


f (generic function with 3 methods)

In [67]:

function glove(data, ndim=300, halfwindow=5)
    xco  = coocurs(data, halfwindow)
    # sum f.(xco)
    words = unique(last.(collect(keys(xco))))
    ws = Dict(w=>param(randn(Float32,ndim)) for w in words) 
    vs = Dict(w=>param(randn(Float32,ndim)) for w in words)
    bs = Dict(w=>param(randn(Float32)) for w in words)
    cs = Dict(w=>param(randn(Float32)) for w in words)
                    all_params = vcat(collect.(values.((ws,vs,bs,cs)))...)
                    function loss(ij,x)
                        i,j = ij
                        @inbounds res = f(x)*(ws[i]⋅vs[j] + bs[i] + cs[j] - log(x)).^2
                        res
                    end
                    
                    loss(co) = mapreduce(ijx -> loss(ijx...), sum, co)
    
                    Flux.train!(loss, collect(xco), ADAM(all_params, 0.01))
                    Dict(w=>v.data for (w,v) in ws)
end

glove (generic function with 3 methods)

In [68]:
@code_warntype glove(wikidata, 30,5)

Variables:
  #self# <optimized out>
  data::Array{StringInterning.InternedString,1}
  ndim::Int64
  halfwindow::Int64
  #207::##207#215{Int64}
  #208::##208#216{Int64}
  #209 <optimized out>
  #210 <optimized out>
  #211::##211#219
  #214 <optimized out>
  xco::DataStructures.DefaultDict{Tuple{StringInterning.InternedString,StringInterning.InternedString},Float32,Float32}
  words::Array{StringInterning.InternedString,1}
  ws::Dict{StringInterning.InternedString,TrackedArray{…,Array{Float32,1}}}
  vs::Dict{StringInterning.InternedString,TrackedArray{…,Array{Float32,1}}}
  bs::Dict{StringInterning.InternedString,TrackedArray{…,Array{Float32,0}}}
  cs::Dict{StringInterning.InternedString,TrackedArray{…,Array{Float32,0}}}
  all_params::Array{Flux.Tracker.TrackedArray{Float32,N,A} where A where N,1}
  loss[1m[91m::Core.Box[39m[22m
  T <optimized out>
  shape <optimized out>
  iter <optimized out>
  C::Array{StringInterning.InternedString,1}
  keeps@_23::Tuple{Tuple{Bool}}
  Idefaults@_2

In [None]:
wes, enc = glove(wikidata, 30)