Skip to content

Commit

Permalink
Revert "quieten tests"
Browse files Browse the repository at this point in the history
  • Loading branch information
oxinabox committed Sep 18, 2018
1 parent 2aa4eee commit a66c0a9
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 31 deletions.
2 changes: 1 addition & 1 deletion src/Embeddings.jl
@@ -1,6 +1,6 @@
module Embeddings

using LinearAlgebra: norm
using Statistics: norm
using DataDeps
using AutoHashEquals

Expand Down
10 changes: 5 additions & 5 deletions src/word2vec.jl
Expand Up @@ -34,18 +34,18 @@ function _load_embeddings(::Type{<:Word2Vec}, embedding_file, max_vocab_size, ke

index = 1
@inbounds for _ in 1:vocab_size
word = readuntil(fh, ' '; keep=false)
word = readuntil(fh, ' ', keep=false)
vector = Vector{Float32}(undef, vector_size)
@inbounds for i = 1:vector_size
vector[i] = read(fh, Float32)
end

if isempty(keep_words) || word keep_words
LL[:, index] = vector ./ norm(vector)
if !occursin("_", word) && (length(keep_words)==0 || word in keep_words ) #If it isn't a phrase
LL[:,index]=vector./norm(vector)
indexed_words[index] = word

index += 1
if index > max_stored_vocab_size
index+=1
if index>max_stored_vocab_size
break
end
end
Expand Down
1 change: 0 additions & 1 deletion test/REQUIRE

This file was deleted.

38 changes: 14 additions & 24 deletions test/runtests.jl
@@ -1,8 +1,7 @@
using Embeddings
using Test
using Suppressor
using DataDeps

using DataDeps

"""
tempdatadeps(fun)
Expand Down Expand Up @@ -36,7 +35,7 @@ end
@testset_nokeep_data
Use just like @testset,
but know that it deletes any downquiet_loaded data dependencies when it is done.
but know that it deletes any downloaded data dependencies when it is done.
"""
macro testset_nokeep_data(name, expr)
quote
Expand All @@ -47,29 +46,20 @@ macro testset_nokeep_data(name, expr)
end


"""
Loads embeddings with suppressed output.
For purposes of not flooding the logs.
During downloads.
"""
function quiet_load_embeddings(args...; kwargs...)
@suppress load_embeddings(args...; kwargs...)
end

@testset_nokeep_data "Word2Vec" begin
embs_full = quiet_load_embeddings(Word2Vec)
embs_full = load_embeddings(Word2Vec)

@test size(embs_full.embeddings) == (300, length(embs_full.vocab))

embs_mini = quiet_load_embeddings(Word2Vec; max_vocab_size=100)
embs_mini = load_embeddings(Word2Vec; max_vocab_size=100)
@test length(embs_mini.vocab)==100

@test embs_mini.embeddings == embs_full.embeddings[:, 1:100]
@test embs_mini.vocab == embs_full.vocab[1:100]

@test "for" embs_mini.vocab

embs_specific = quiet_load_embeddings(Word2Vec; keep_words=Set(["red", "green", "blue"]))
embs_specific = load_embeddings(Word2Vec; keep_words=Set(["red", "green", "blue"]))

@test size(embs_specific.embeddings) == (300, 3)
@test Set(embs_specific.vocab) == Set(["red", "green", "blue"])
Expand All @@ -78,7 +68,7 @@ end
@testset "GloVe" begin
# just test one file from each of provided sets
tests = ["glove.6B/glove.6B.50d.txt",
#"glove.42B.300d/glove.42B.300d.txt", # These files are too slow to downquiet_load
#"glove.42B.300d/glove.42B.300d.txt", # These files are too slow to download
#"glove.840B.300d/glove.840B.300d.txt", # They are not that big bt are on a slow server
"glove.twitter.27B/glove.twitter.27B.25d.txt"]

Expand All @@ -90,15 +80,15 @@ end

@testset_nokeep_data "$filename" begin
@testset "Basic" begin
glove = quiet_load_embeddings(GloVe{:en}, @datadep_str(file), max_vocab_size=1000)
glove = load_embeddings(GloVe{:en}, @datadep_str(file), max_vocab_size=1000)
@test length(glove.vocab) == 1000
@test size(glove.embeddings) == (dim(file), 1000)
@test "for" glove.vocab
end

@testset "Specific" begin
colors = ["red", "green", "blue"]
glove_colors = quiet_load_embeddings(GloVe, @datadep_str(file), keep_words=colors)
glove_colors = load_embeddings(GloVe, @datadep_str(file), keep_words=colors)
@test length(glove_colors.vocab) == 3
@test size(glove_colors.embeddings) == (dim(file), 3)
@test Set(glove_colors.vocab) == Set(colors)
Expand All @@ -110,14 +100,14 @@ end
# first 100 lines of official glove.6B.50d.txt
custom_glove_file = joinpath(@__DIR__, "data", "custom.glove.txt")
@testset "Basic" begin
glove = quiet_load_embeddings(GloVe, custom_glove_file)
glove = load_embeddings(GloVe, custom_glove_file)
@test length(glove.vocab) == 100
@test size(glove.embeddings) == (50, 100)
@test "the" glove.vocab
end
@testset "Specific" begin
punct = [".", ","]
glove_punct = quiet_load_embeddings(GloVe, custom_glove_file, keep_words=punct)
glove_punct = load_embeddings(GloVe, custom_glove_file, keep_words=punct)
@test length(glove_punct.vocab) == 2
@test size(glove_punct.embeddings) == (50, 2)
@test Set(glove_punct.vocab) == Set(punct)
Expand All @@ -129,29 +119,29 @@ end
@testset "FastText" begin
@testset_nokeep_data "English 1" begin
@testset "Basic" begin
embs1 = quiet_load_embeddings(FastText_Text; max_vocab_size=100)
embs1 = load_embeddings(FastText_Text; max_vocab_size=100)
@test length(embs1.vocab)==100
@test size(embs1.embeddings) == (300, 100)
end

@testset "Specific" begin
embs_specific = quiet_load_embeddings(FastText_Text; keep_words=Set(["red", "green", "blue"]))
embs_specific = load_embeddings(FastText_Text; keep_words=Set(["red", "green", "blue"]))
@test size(embs_specific.embeddings) == (300, 3)
@test Set(embs_specific.vocab) == Set(["red", "green", "blue"])
end
end


@testset_nokeep_data "French" begin
embs_fr = quiet_load_embeddings(FastText_Text{:fr}; max_vocab_size=100)
embs_fr = load_embeddings(FastText_Text{:fr}; max_vocab_size=100)
@test length(embs_fr.vocab)==100
@test size(embs_fr.embeddings) == (300, 100)
end



@testset_nokeep_data "English file number 2" begin
embs_specific = quiet_load_embeddings(FastText_Text, 2; keep_words=Set(["red", "green", "blue"]))
embs_specific = load_embeddings(FastText_Text, 2; keep_words=Set(["red", "green", "blue"]))
@test size(embs_specific.embeddings) == (300, 3)
@test Set(embs_specific.vocab) == Set(["red", "green", "blue"])
end
Expand Down

0 comments on commit a66c0a9

Please sign in to comment.