From 888a0696d85766d748b334a389bf49425bdcafc1 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Sat, 27 Aug 2016 02:12:44 +1000 Subject: [PATCH 1/5] =Added Sensekeys --- src/WordNet.jl | 3 ++- src/db.jl | 25 +++++++++++++++++++++++-- test/mock_db/dict/index.noun | 1 + test/mock_db/dict/index.sense | 1 + test/runtests.jl | 1 + test/test_db.jl | 3 ++- test/test_sensekeys.jl | 10 ++++++++++ 7 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 test/test_sensekeys.jl diff --git a/src/WordNet.jl b/src/WordNet.jl index 73c9c18..7bb0723 100644 --- a/src/WordNet.jl +++ b/src/WordNet.jl @@ -8,5 +8,6 @@ include("constants.jl") include("synset.jl") include("db.jl") include("operations.jl") +include("sensekeys.jl") -end \ No newline at end of file +end diff --git a/src/db.jl b/src/db.jl index ffa5833..36f313d 100644 --- a/src/db.jl +++ b/src/db.jl @@ -3,15 +3,18 @@ export DB immutable DB lemmas::Dict{Char, Dict{AbstractString, Lemma}} synsets::Dict{Char, Dict{Int, Synset}} + sensekeys::Dict{Tuple{Int,AbstractString}, AbstractString} end function DB(base_dir::AbstractString) - DB(load_lemmas(base_dir), load_synsets(base_dir)) + DB(load_lemmas(base_dir), + load_synsets(base_dir), + load_sensekeys(base_dir)) end Base.show(io::IO, db::DB) = print(io, "WordNet.DB") -function Base.getindex(db::DB, pos::Char, word::AbstractString) +function Base.getindex(db::DB, pos::Char, word::AbstractString) db.lemmas[pos][lowercase(word)] end @@ -57,6 +60,24 @@ function load_synsets(base_dir) synsets end + +function load_sensekeys(basedir) + path=joinpath(basedir, "dict","index.sense") + sensekeys = Dict{Tuple{Int64,AbstractString},AbstractString}() + + for line in eachline(path) + full_key, offset_str, sense_num_str, tagcount_str = split(line) + lemma_name = first(split(full_key,'%')) + sense_offset = parse(Int64, offset_str) + index = (sense_offset,lemma_name) + @assert(!haskey(sensekeys, index)) + sensekeys[index] = full_key + end + + sensekeys +end + + function path_to_data_file(base_dir, pos) joinpath(base_dir, "dict", "data.$(SYNSET_TYPES[pos])") end diff --git a/test/mock_db/dict/index.noun b/test/mock_db/dict/index.noun index 88b1706..56c32a4 100644 --- a/test/mock_db/dict/index.noun +++ b/test/mock_db/dict/index.noun @@ -28,3 +28,4 @@ 28 any associated documentation shall at all times remain with 29 Princeton University and LICENSEE agrees to preserve same. 'hood n 1 2 @ ; 1 0 08641944 +section n 1 2 @ ; 1 0 08648322 diff --git a/test/mock_db/dict/index.sense b/test/mock_db/dict/index.sense index e69de29..acb97b5 100644 --- a/test/mock_db/dict/index.sense +++ b/test/mock_db/dict/index.sense @@ -0,0 +1 @@ +section%1:15:01:: 08648322 3 11 diff --git a/test/runtests.jl b/test/runtests.jl index 46acc4e..58dd56f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -13,3 +13,4 @@ include("test_db.jl") include("test_lemma.jl") include("test_synset.jl") include("test_operations.jl") +include("test_sensekeys.jl") diff --git a/test/test_db.jl b/test/test_db.jl index 8d468f1..d03d8f9 100644 --- a/test/test_db.jl +++ b/test/test_db.jl @@ -1,7 +1,8 @@ facts("DB") do const mock_db = DB( Dict{Char, Dict{AbstractString, Lemma}}(), - Dict{Char, Dict{Int, Synset}}() + Dict{Char, Dict{Int, Synset}}(), + Dict{Tuple{Int,AbstractString}, AbstractString}() ) context("path_to_data_file") do diff --git a/test/test_sensekeys.jl b/test/test_sensekeys.jl new file mode 100644 index 0000000..623c75b --- /dev/null +++ b/test/test_sensekeys.jl @@ -0,0 +1,10 @@ +facts("sensekeys") do + const mock_db = DB(joinpath(dirname(@__FILE__), "mock_db")) + + + lem = mock_db["section",'n'] + ss = synsets(mock_db, lem) + @fact sensekeys(mock_db, lem) --> ["section%1:15:01::"] + @fact sensekey(mock_db, ss[1], lem) --> "section%1:15:01::" + +end From 53db8b5a06f1354ad907cdfdce19fc9b5e41f7c3 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Sat, 27 Aug 2016 02:52:06 +1000 Subject: [PATCH 2/5] =update readme --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index db61801..80d18d1 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,24 @@ expanded_hypernyms(db, synsets(db, db['n', "DOG"])[1]) > (n) physical entity (an entity that has physical existence) > (n) entity (that which is perceived or known or inferred to have its own distinct existence (living or nonliving)) +```julia +sensekey(db,synsets(db,db['n',"cat"])[1], db['n',"cat"]) +``` +> "cat%1:05:00::" + +```julia +sensekeys(db,db['n',"cat"]) +``` +>8-element Array{SubString{String},1}: +>"cat%1:05:00::" +>"cat%1:18:01::" +>"cat%1:18:00::" +>"cat%1:06:02::" +>"cat%1:06:00::" +>"cat%1:06:01::" +>"cat%1:05:02::" +>"cat%1:04:00::" + ## Design consideration From 565c2d41126ceaaf2b9d2c46e19e59a1236adafe Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Mon, 29 Aug 2016 16:22:26 +1000 Subject: [PATCH 3/5] = Add missing file --- src/sensekeys.jl | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 src/sensekeys.jl diff --git a/src/sensekeys.jl b/src/sensekeys.jl new file mode 100644 index 0000000..ab981bb --- /dev/null +++ b/src/sensekeys.jl @@ -0,0 +1,9 @@ +export sensekey, sensekeys + +function sensekey(db::DB, ss::Synset, lem::Lemma) + db.sensekeys[(ss.offset,lem.word)] +end + +function sensekeys(db::DB, lem::Lemma) + [db.sensekeys[(ss_offset,lem.word)] for ss_offset in lem.synset_offsets] +end From 0fc1be9f73d1ab10b5e29e08c023272a89421588 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Mon, 29 Aug 2016 17:05:34 +1000 Subject: [PATCH 4/5] =Update to require 0.4 --- .travis.yml | 4 ++-- REQUIRE | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index c28508e..338a8ad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,8 +4,8 @@ os: - linux - osx julia: - - 0.3 - 0.4 + - release - nightly notifications: email: false @@ -16,4 +16,4 @@ script: - julia -e 'Pkg.test("WordNet"; coverage=true)' after_success: - julia -e 'cd(Pkg.dir("WordNet")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' - \ No newline at end of file + diff --git a/REQUIRE b/REQUIRE index 17c43b5..2e28dad 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,3 +1,3 @@ -julia 0.3 +julia 0.4 Compat FactCheck From f1ead462c9cf6f4a0abce9a174d8a20bc216e6df Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Tue, 30 Aug 2016 13:01:21 +1000 Subject: [PATCH 5/5] =Also update appvey to do 0.4, 0.5, and nightlies (0.6) --- appveyor.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index d6989d3..ab9eded 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,7 +1,9 @@ environment: matrix: - - JULIAVERSION: "julialang/bin/winnt/x86/0.3/julia-0.3-latest-win32.exe" - - JULIAVERSION: "julialang/bin/winnt/x64/0.3/julia-0.3-latest-win64.exe" + - JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe" + - JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe" + - JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" + - JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe" - JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe"