In [1]:
using NamedColors
using Colors
using AutoHashEquals
using AbstractTrees

In [21]:
using DataStructures

In [55]:
colour_names = load_xkcd() |> keys |> collect


In [98]:
"""
Digests a set of raw color strings,
and finds lists of the "Parts Of Colors" (poc), similar to Parts of speech (pos).
These are

- `base_colors`: Normal colors people use all the time, eg `red`, `green`, `tan`, `purple`
- `generic_modifiers`: Adjectives that people apply to colors, `dark`, `pale` etc. Also includes (for POC tagging purposes) things like `greenish` and `bluey`. Though they should be parsed differently in next step 
- `nounish_colors`: Things that are not normally considered colors but use like them anyway (and don't thus normally occur with genric modifiers), eg `blood`, `puke`, `booger`
- `nounish_modifiers`: Nouns that are attached to specific colors, that indicate a specific type of that color.  Eg grass as in "grass green", "ghost" as in "ghost white"
- `conjunctives`: Words that go between two colors to join them. eg "with".

In general be confidant on the identification of all `base_colors` and `generic_modifiers`,
if capturing from a large number of `raw_color_names`
Be less confidant of the indification of `nounish_colors` and `nounish_modifiers`
have no confidance in the identifaction of `conjuctives, they are for debugging purposes
"""
function find_poc_(raw_color_names, nounish_cutoff = 2)
    split_on = [' ','-','/']
    first_color_words = first.(split.(raw_color_names, split_on))
    last_color_words =  last.(split.(raw_color_names,  split_on))

    first_color_counts = counter(first_color_words)
    last_color_counts = counter(last_color_words)

    base_colors = String[] #Generic colors
    nounish_colors = String[] #Words that are used as colors but probable are not normally recognised as suc

    modifiers = String[] #Adjectives that modify a color
    nounish_modifiers = String[] #Noun parts that describe a paricular color, eg lawn ad in lawn green


    for name in union(keys(first_color_words), keys(last_color_words))
        fcount = first_color_counts[name] 
        lcount = last_color_counts[name]
        if fcount  <= lcount 
            #including On a tie
            if fcount <= nounish_cutoff
                push!(nounish_colors, name)
            else
                push!(base_colors, name)
            end
        else
            if fcount <= nounish_cutoff
                push!(nounish_modifiers, name)
            else
                push!(modifiers, name)
            end
        end
    end
    
    mid(x) =  length(x)>2 ? x[2:end-1]: []
    middle_words = Base.flatten(mid.(split.(raw_color_names, split_on))) |> collect 
    conjuectives = setdiff(setdiff(middle_words, keys(first_color_words)),
                    keys(last_color_words))
    
    base_colors, modifiers, nounish_colors, nounish_modifiers, conjuectives
end



find_poc_

In [95]:
filter(x->contains(x, "engine"), colour_names)

1-element Array{SubString{String},1}:
 "fire engine red"

In [4]:
# ==(n1::Node, n2::Node) = n1.val == n2.val

macro decsubnode(name)
    :(type $(esc(name)){S<:AbstractString}<:Node val::S end)
end
#@decsubnode BaseColorName
#@decsubnode Modifier

@decsubnode (macro with 1 method)

In [5]:
using ParserCombinator

abstract AbstractColorName

@auto_hash_equals immutable BaseColorName{S<:AbstractString} <:AbstractColorName val::S end
@auto_hash_equals immutable Modifier{S<:AbstractString} <:AbstractColorName val::S end
@auto_hash_equals immutable BinaryOp{S<:AbstractString, V1<:AbstractColorName, V2<:AbstractColorName} <:AbstractColorName 
    val1::V1
    op::S
    val2::V2
end

@auto_hash_equals immutable ModifiedColor{S<:AbstractString, V<:AbstractColorName}<:AbstractColorName
    mod::Modifier{S}
    val::V
     
end

In [6]:
function AbstractTrees.children{T<:AbstractColorName}(v::T)
    fnames = fieldnames(T)
    getfield.([v], fnames)
end

function AbstractTrees.printnode{T<:AbstractColorName}(io::IO, ::T)
    showcompact(io,T.name)
end

In [73]:
word = Repeat(p"\w"; greedy=false) |> join #can't use regex `p"\w+"` as it has no bactracking

mod = word  > Modifier
#name = Alt(Equal.(["red", "green", "blue"]) ... ) >  BaseColorName
name = Alt(Equal.( base_colour_names) ... ) >  BaseColorName


color = Delayed()

modded = mod +  P"\s+" + color > ModifiedColor

binop = (e" with a hint of ") | (e"ish ") 
        | (e"y ") | e"/" | e" " | e"-" | e"" 

binaryoped = color + binop + color > BinaryOp

color.matcher = name | binaryoped | modded

colorgrammar = color +Eos()

#parse_one("red blue", colordec)   #Hangs forever

ParserCombinator.Seq(:Seq,ParserCombinator.Matcher[ParserCombinator.Delayed(:Delayed,ParserCombinator.Alt(:Alt,ParserCombinator.Matcher[ParserCombinator.Transform(:Transform,ParserCombinator.Alt(:Alt,ParserCombinator.Matcher[ParserCombinator.Equal(:Equal,"red"),ParserCombinator.Equal(:Equal,"ice"),ParserCombinator.Equal(:Equal,"sea"),ParserCombinator.Equal(:Equal,"egg"),ParserCombinator.Equal(:Equal,"rod"),ParserCombinator.Equal(:Equal,"poo"),ParserCombinator.Equal(:Equal,"pea"),ParserCombinator.Equal(:Equal,"tan"),ParserCombinator.Equal(:Equal,"sky"),ParserCombinator.Equal(:Equal,"mud")  …  ParserCombinator.Equal(:Equal,"chartreuse"),ParserCombinator.Equal(:Equal,"watermelon"),ParserCombinator.Equal(:Equal,"periwinkle"),ParserCombinator.Equal(:Equal,"aquamarine"),ParserCombinator.Equal(:Equal,"grapefruit"),ParserCombinator.Equal(:Equal,"ultramarine"),ParserCombinator.Equal(:Equal,"wintergreen"),ParserCombinator.Equal(:Equal,"yellowgreen"),ParserCombinator.Equal(:Equal,"perrywinkle"),P

In [8]:
using Base.Test
@test parse_one("blue", colorgrammar) |> first == BaseColorName{String}("blue")

@test parse_one("redish blue", colorgrammar) |> first == BinaryOp(BaseColorName("red"),"ish",BaseColorName("blue"))

[1m[32mTest Passed
[0m  Expression: parse_one("redish blue",colorgrammar) |> first == BinaryOp(BaseColorName("red"),"ish",BaseColorName("blue"))
   Evaluated: BinaryOp{SubString{String},BaseColorName{SubString{String}},BaseColorName{SubString{String}}}(BaseColorName{SubString{String}}("red"),"ish",BaseColorName{SubString{String}}("blue")) == BinaryOp{String,BaseColorName{String},BaseColorName{String}}(BaseColorName{String}("red"),"ish",BaseColorName{String}("blue"))

In [15]:
t=parse_one("redish blueish dark green-red", colorgrammar)

1-element Array{Any,1}:
 BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BaseColorName{SubString{String}}}}}}(BaseColorName{SubString{String}}("red"),"ish",BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BaseColorName{SubString{String}}}}}(BaseColorName{SubString{String}}("blue"),"ish",BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BaseColorName{SubString{String}}}}(BaseColorName{SubString{String}}("dark")," ",BinaryOp{SubString{String},BaseColorName{SubString{String}},BaseColorName{SubString{String}}}(BaseColorName{SubString{String}}("green"),"-",BaseColorName{SubString{String}}("red")))))

In [16]:
print_tree(STDOUT, t)

Any[BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BaseColorName{SubString{String}}}}}}(BaseColorName{SubString{String}}("red"),"ish",BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BaseColorName{SubString{String}}}}}(BaseColorName{SubString{String}}("blue"),"ish",BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BaseColorName{SubString{String}}}}(BaseColorName{SubString{String}}("dark")," ",BinaryOp{SubString{String},BaseColorName{SubString{String}},BaseColorName{SubString{String}}}(BaseColorName{SubString{String}}("green"),"-",BaseColorName{SubString{String}}("red")))))]
└─ BinaryOp
   ├─ BaseCol

In [17]:
?print_tree

search: [1mp[22m[1mr[22m[1mi[22m[1mn[22m[1mt[22m[1m_[22m[1mt[22m[1mr[22m[1me[22m[1me[22m [1mp[22m[1mr[22m[1mi[22m[1mn[22m[1mt[22m[1m_[22mwi[1mt[22mh_colo[1mr[22m [1mp[22m[1mr[22m[1mi[22m[1mn[22m[1mt[22m[1m_[22mshor[1mt[22mest



No documentation found.

`AbstractTrees.print_tree` is a `Function`.

```
# 2 methods for generic function "print_tree":
print_tree(f::Function, io::IO, tree, args...; kwargs...) at /home/ubuntu/.julia/v0.5/AbstractTrees/src/AbstractTrees.jl:150
print_tree(io::IO, tree, args...; kwargs...) at /home/ubuntu/.julia/v0.5/AbstractTrees/src/AbstractTrees.jl:151
```


In [12]:
children(BaseColorName("red"))

1-element Array{String,1}:
 "red"

In [13]:
T = typeof(t[1])

BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BinaryOp{SubString{String},BaseColorName{SubString{String}},BaseColorName{SubString{String}}}}}

In [14]:
T.name

BinaryOp