diff --git a/source/webster/apidata.go b/source/webster/apidata.go
new file mode 100644
index 0000000..21489cf
--- /dev/null
+++ b/source/webster/apidata.go
@@ -0,0 +1,382 @@
+package webster
+
+import (
+ "encoding/json"
+ "fmt"
+ "regexp"
+ "strconv"
+ "strings"
+
+ "github.com/Rican7/define/source"
+)
+
+const (
+ // See https://www.dictionaryapi.com/products/json#sec-2
+ arrayDataTagText = "text"
+ arrayDataTagSense = "sense"
+ arrayDataTagBindingSubstitute = "bs"
+ arrayDataTagVerbalIllustrations = "vis"
+
+ // See https://www.dictionaryapi.com/products/json#sec-2
+ objectDataTagText = "t"
+ objectDataTagSense = "sense"
+ objectDataTagSenseNumber = "sn"
+ objectDataTagDefiningText = "dt"
+ objectDataTagAttributionOfQuote = "aq"
+ objectDataTagAuthor = "auth"
+)
+
+var (
+ // regexpWebsterTokens is a regular exprssion for matching Webster API
+ // text tokens.
+ //
+ // Webster API text may contain "tokens", as documented:
+ //
+ // See https://www.dictionaryapi.com/products/json#sec-2.tokens
+ regexpWebsterTokens = regexp.MustCompile(`{.*?(?:\|(.*?)(?:\|\|?)?)?}`)
+
+ // regexpWebsterSenseNumber is a regular expression for matching Webster API
+ // sense numbers.
+ //
+ // Webster API sense numbers may be numerals, lower-case letters, or
+ // parenthesized numerals, as documented:
+ //
+ // See https://www.dictionaryapi.com/products/json#sec-2.sn
+ regexpWebsterSenseNumber = regexp.MustCompile(`(\d+)? ?(\w+)? ?(\(\d+\))?`)
+)
+
+// apiRawResponse defines the data structure for a raw Webster API response
+type apiRawResponse []any
+
+// apiResponse defines the data structure for a Webster API response
+type apiResponse[T apiResponseItem] []T
+
+// apiResponseItem defines a type constraint for Webster API response items
+type apiResponseItem interface {
+ apiSearchResult | apiDefinitionResult
+}
+
+// apiSearchResults defines the data for Webster API search results
+type apiSearchResults []apiSearchResult
+
+// apiDefinitionResults defines the data for Webster API definition results
+type apiDefinitionResults []apiDefinitionResult
+
+// apiSearchResult defines the data for a Webster API search result
+type apiSearchResult string
+
+// apiDefinitionResult defines the data structure for a Webster API definition
+// result
+type apiDefinitionResult struct {
+ Meta apiDefinitionMeta `json:"meta"`
+ Hom int `json:"hom"`
+ Hwi apiDefinitionHeadwordInfo `json:"hwi"`
+ Fl string `json:"fl"`
+ Ins []struct {
+ If string `json:"if"`
+ } `json:"ins"`
+ Def []apiDefinitionSectionEntry `json:"def"`
+ Uros []struct {
+ Ure string `json:"ure"`
+ Fl string `json:"fl"`
+ } `json:"uros"`
+ Syns []struct {
+ Pl string `json:"pl"`
+ Pt [][]any `json:"pt"`
+ } `json:"syns"`
+ Et [][]string `json:"et"`
+ Date string `json:"date"`
+ LdLink struct {
+ LinkHw string `json:"link_hw"`
+ LinkFl string `json:"link_fl"`
+ } `json:"ld_link"`
+ Shortdef []string `json:"shortdef"`
+}
+
+// apiDefinitionMeta defines the data structure for Webster API definition meta
+type apiDefinitionMeta struct {
+ ID string `json:"id"`
+ UUID string `json:"uuid"`
+ Sort string `json:"sort"`
+ Src string `json:"src"`
+ Section string `json:"section"`
+ Stems []string `json:"stems"`
+ Offensive bool `json:"offensive"`
+}
+
+// apiDefinitionMeta defines the data structure for Webster API definition
+// headword information
+type apiDefinitionHeadwordInfo struct {
+ Hw string `json:"hw"`
+ Prs []struct {
+ Mw string `json:"mw"`
+ Sound struct {
+ Audio string `json:"audio"`
+ Ref string `json:"ref"`
+ Stat string `json:"stat"`
+ } `json:"sound"`
+ } `json:"prs"`
+}
+
+// apiDefinitionSectionEntry defines the data structure for Webster API
+// definition section entries
+type apiDefinitionSectionEntry struct {
+ Vd string `json:"vd"`
+ Sseq apiSenseSequence `json:"sseq"`
+}
+
+// apiSenseSequence defines the data structure for a Webster API sense sequence
+type apiSenseSequence []apiSense
+
+// apiSense defines the data structure for a Webster API sense
+type apiSense [][]any
+
+// apiSenseData defines the data structure for a Webster API sense data
+type apiSenseData map[string]any
+
+// apiSenseNumber defines the data structure for a Webster API sense number
+type apiSenseNumber struct {
+ number int
+ letter string
+ sub string
+}
+
+// UnmarshalJSON satisfies the encoding/json.Unmarshaler interface
+func (r *apiRawResponse) UnmarshalJSON(data []byte) error {
+ var rawSlice []json.RawMessage
+
+ if err := json.Unmarshal(data, &rawSlice); err != nil {
+ return err
+ }
+
+ if len(rawSlice) < 1 || len(rawSlice[0]) < 1 {
+ return nil
+ }
+
+ var newResponse apiRawResponse
+ var err error
+
+ // Inspect the first byte of the first item
+ switch rawSlice[0][0] {
+ case '"':
+ var response apiResponse[apiSearchResult]
+ newResponse, err = replaceData(data, response)
+ case '{':
+ var response apiResponse[apiDefinitionResult]
+ newResponse, err = replaceData(data, response)
+ }
+
+ if err != nil {
+ return err
+ }
+ *r = newResponse
+
+ return nil
+}
+
+// replaceData takes raw JSON bytes and a new response type and returns a
+// apiRawResponse with the new response data after unmarshalling.
+func replaceData[T apiResponseItem](data []byte, response apiResponse[T]) (apiRawResponse, error) {
+ if err := json.Unmarshal(data, &response); err != nil {
+ return nil, err
+ }
+
+ newResponse := make(apiRawResponse, len(response))
+ for i, item := range response {
+ newResponse[i] = item
+ }
+
+ return newResponse, nil
+}
+
+func apiResponseFromRaw[T apiResponseItem](raw apiRawResponse) apiResponse[T] {
+ response := make(apiResponse[T], len(raw))
+
+ if len(raw) < 1 {
+ return response
+ }
+
+ for i, item := range raw {
+ response[i] = item.(T)
+ }
+
+ return response
+}
+
+// toResult converts the API response to the results that a source expects to
+// return.
+func (r apiDefinitionResults) toResults() []source.DictionaryResult {
+ mainEntry := r[0]
+ mainWord := cleanHeadword(mainEntry.Hwi.Hw)
+
+ sourceEntries := make([]source.DictionaryEntry, 0, len(r))
+
+ for _, apiEntry := range r {
+ headword := cleanHeadword(apiEntry.Hwi.Hw)
+
+ if !strings.EqualFold(headword, mainWord) {
+ continue
+ }
+
+ sourceEntry := source.DictionaryEntry{}
+
+ sourceEntry.Word = headword
+ sourceEntry.LexicalCategory = apiEntry.Fl
+
+ sourceEntry.Pronunciations = make([]string, 0, len(apiEntry.Hwi.Prs))
+ for _, pronunciation := range apiEntry.Hwi.Prs {
+ sourceEntry.Pronunciations = append(sourceEntry.Pronunciations, pronunciation.Mw)
+ }
+
+ // We filter etymologies and potentially add them later so the capacity
+ // can't be reasonably known here.
+ sourceEntry.Etymologies = make([]string, 0)
+ for _, etymology := range apiEntry.Et {
+ // Webster API etymologies are returned in prefixed arrays.
+ // See https://www.dictionaryapi.com/products/json#sec-2.et
+ if len(etymology) < 2 || etymology[0] != arrayDataTagText {
+ continue
+ }
+
+ etymologyText := cleanTextOfTokens(etymology[1])
+
+ sourceEntry.Etymologies = append(sourceEntry.Etymologies, etymologyText)
+ }
+
+ for _, def := range apiEntry.Def {
+ sourceEntry.Senses = append(sourceEntry.Senses, def.Sseq.toSenses()...)
+ }
+
+ sourceEntries = append(sourceEntries, sourceEntry)
+ }
+
+ return []source.DictionaryResult{
+ {
+ Language: "en", // TODO
+ Entries: sourceEntries,
+ },
+ }
+}
+
+// toSenses converts the API sense sequence to a list of source.Sense
+func (s apiSenseSequence) toSenses() []source.Sense {
+ senses := make([]source.Sense, 0)
+
+ for _, apiSense := range s {
+ var lastSenseNumber *apiSenseNumber
+
+ for _, apiSenseContainer := range apiSense {
+ // Webster API senses are returned in prefixed arrays.
+ // See https://www.dictionaryapi.com/products/json#sec-2.sense
+ if len(apiSenseContainer) < 2 {
+ continue
+ }
+
+ var senseData apiSenseData
+
+ switch apiSenseContainer[0] {
+ case arrayDataTagSense:
+ senseData = apiSenseData(apiSenseContainer[1].(map[string]any))
+ case arrayDataTagBindingSubstitute:
+ // See https://www.dictionaryapi.com/products/json#sec-2.bs
+ bindingSubstitute := apiSenseContainer[1].(map[string]any)
+ senseData = apiSenseData(bindingSubstitute[objectDataTagSense].(map[string]any))
+ default:
+ continue
+ }
+
+ senseNumber := parseSenseNumber(senseData[objectDataTagSenseNumber])
+
+ sourceSense := senseData.toSense()
+
+ if lastSenseNumber == nil || (senseNumber != nil && lastSenseNumber.number < senseNumber.number) {
+ // The sense is a new sense
+ senses = append(senses, sourceSense)
+ } else {
+ // The sense is a sub-sense
+ lastSense := &(senses[len(senses)-1])
+ lastSense.SubSenses = append(lastSense.SubSenses, sourceSense)
+ }
+
+ lastSenseNumber = senseNumber
+ }
+ }
+
+ return senses
+}
+
+// toSense converts the API sense data to a source.Sense
+func (d apiSenseData) toSense() source.Sense {
+ definitions := make([]string, 0)
+ examples := make([]string, 0)
+
+ senseDefinitions := d[objectDataTagDefiningText].([]any)
+
+ for _, defParts := range senseDefinitions {
+ definition := defParts.([]any)
+
+ // Webster API definition parts are returned in prefixed arrays.
+ // See https://www.dictionaryapi.com/products/json#sec-2.dt
+ if len(definition) < 2 {
+ continue
+ }
+
+ switch definition[0] {
+ case arrayDataTagText:
+ definitionText := cleanTextOfTokens(definition[1].(string))
+
+ definitions = append(definitions, definitionText)
+ case arrayDataTagVerbalIllustrations:
+ exampleTextObjects := definition[1].([]any)
+
+ for _, exampleTextObject := range exampleTextObjects {
+ exampleTextMap := exampleTextObject.(map[string]any)
+ exampleText := cleanTextOfTokens(exampleTextMap[objectDataTagText].(string))
+
+ if exampleTextMap[objectDataTagAttributionOfQuote] != nil {
+ exampleAttribution := exampleTextMap[objectDataTagAttributionOfQuote].(map[string]any)
+
+ // TODO: Handle attributions more generally... especially
+ // presentation wise...
+ if exampleAttribution[objectDataTagAuthor] != nil {
+ exampleText = fmt.Sprintf("'%s' - %s", exampleText, exampleAttribution[objectDataTagAuthor])
+ }
+ }
+
+ examples = append(examples, exampleText)
+ }
+ }
+ }
+
+ return source.Sense{
+ Definitions: definitions,
+ Examples: examples,
+ }
+}
+
+func cleanHeadword(headword string) string {
+ return strings.ReplaceAll(headword, "*", "")
+}
+
+func cleanTextOfTokens(text string) string {
+ return regexpWebsterTokens.ReplaceAllString(text, "$1")
+}
+
+func parseSenseNumber(rawSenseNumber any) *apiSenseNumber {
+ if rawSenseNumber == nil {
+ return nil
+ }
+
+ parsed := regexpWebsterSenseNumber.FindStringSubmatch(rawSenseNumber.(string))
+
+ var main int
+ if parsedMain, err := strconv.Atoi(parsed[1]); err == nil {
+ main = parsedMain
+ }
+
+ return &apiSenseNumber{
+ number: main,
+ letter: parsed[2],
+ sub: parsed[3],
+ }
+}
diff --git a/source/webster/webster.go b/source/webster/webster.go
index c1527ff..58ceebf 100644
--- a/source/webster/webster.go
+++ b/source/webster/webster.go
@@ -4,17 +4,12 @@
package webster
import (
- "encoding/xml"
- "html"
+ "encoding/json"
"io"
"net/http"
"net/url"
- "regexp"
- "strconv"
- "strings"
"github.com/Rican7/define/source"
- "github.com/microcosm-cc/bluemonday"
)
// Name defines the name of the source
@@ -22,41 +17,21 @@ const Name = "Merriam-Webster's Dictionary API"
const (
// baseURLString is the base URL for all Webster API interactions
- baseURLString = "http://www.dictionaryapi.com/api/v1/"
+ baseURLString = "https://www.dictionaryapi.com/api/v3/"
- entriesURLString = baseURLString + "references/collegiate/xml/"
+ entriesURLString = baseURLString + "references/collegiate/json/"
- httpRequestAcceptHeaderName = "Accept"
- httpRequestAppKeyQueryParamName = "key"
+ httpRequestAcceptHeaderName = "Accept"
+ httpRequestKeyQueryParamName = "key"
- xmlMIMEType = "application/xml"
- xmlTextMIMEType = "text/xml"
- xmlBaseMIMEType = "xml"
-
- senseTagName = "sn"
- senseDividerTagName = "sd"
- definingTextTagName = "dt"
- calledAlsoTagName = "ca"
-
- senseDividerPrefix = "; "
- definingTextPrefix = ":"
- exampleWrapCharacter = "'"
- authorPrefix = "- "
- crossReferenceJoinString = ", "
+ jsonMIMEType = "application/json"
)
// apiURL is the URL instance used for Webster API calls
var apiURL *url.URL
// validMIMETypes is the list of valid response MIME types
-var validMIMETypes = []string{xmlMIMEType, xmlTextMIMEType, xmlBaseMIMEType}
-
-// htmlCleaner is used to clean the strings returned from the API
-var htmlCleaner = bluemonday.StrictPolicy()
-
-// etymologyMetaStripperRegex is a regular expression for stripping meta from
-// etymology entries
-var etymologyMetaStripperRegex = regexp.MustCompile(".*?")
+var validMIMETypes = []string{jsonMIMEType}
// api contains a configured HTTP client for Webster API operations
type api struct {
@@ -64,75 +39,6 @@ type api struct {
appKey string
}
-// apiResult defines the data structure for Webster API results
-type apiResult struct {
- Entries []struct {
- ID string `xml:"id,attr"`
- Word string `xml:"ew"`
- Pronunciation string `xml:"pr"`
- LexicalCategory string `xml:"fl"`
- Etymologies []cleanableString `xml:"et"`
- DefinitionContainers []apiDefinitionContainer `xml:"def"`
- } `xml:"entry"`
-}
-
-// apiDefinitionContainer defines the data structure for Oxford API definitions
-type apiDefinitionContainer struct {
- Raw string `xml:",innerxml"`
- Date string `xml:"date"`
- DefiningTexts []cleanableString `xml:"dt"`
-
- senses []apiSense
-}
-
-// apiDefiningText defines the data structure for defining texts
-type apiDefiningText struct {
- Raw string `xml:",innerxml"`
- Stripped string `xml:",chardata"`
- CrossReferences []string `xml:"sx"`
- Examples []apiExample `xml:"vi"`
- UsageNotes []struct {
- Note string `xml:",chardata"`
- Examples []apiExample `xml:"vi"`
- } `xml:"un"`
-
- cleaned string
- formatted string
-}
-
-// apiExample defines the data structure for examples
-type apiExample struct {
- Raw string `xml:",innerxml"`
- Stripped string `xml:",chardata"`
- Author string `xml:"aq"`
-
- cleaned string
- formatted string
-}
-
-// apiSense defines the data structure for Oxford API senses
-type apiSense struct {
- Definitions []string
- Examples []string
- Notes []string
-
- Subsenses []apiSense
-}
-
-// sensePosition defines the data structure for sense positions
-type sensePosition struct {
- Position string `xml:",chardata"`
- SubPosition string `xml:"snp"`
-}
-
-// cleanableString defines the data structure for cleanable XML strings
-type cleanableString struct {
- Raw string `xml:",innerxml"`
- Stripped string `xml:",chardata"`
-
- cleaned string
-}
-
// Initialize the package
func init() {
var err error
@@ -160,7 +66,7 @@ func (g *api) Define(word string) ([]source.DictionaryResult, error) {
// Prepare our URL
requestURL, err := url.Parse(entriesURLString + word)
queryParams := apiURL.Query()
- queryParams.Set(httpRequestAppKeyQueryParamName, g.appKey)
+ queryParams.Set(httpRequestKeyQueryParamName, g.appKey)
requestURL.RawQuery = queryParams.Encode()
if err != nil {
@@ -173,9 +79,7 @@ func (g *api) Define(word string) ([]source.DictionaryResult, error) {
return nil, err
}
- httpRequest.Header.Set(httpRequestAcceptHeaderName, xmlMIMEType)
- httpRequest.Header.Add(httpRequestAcceptHeaderName, xmlTextMIMEType)
- httpRequest.Header.Add(httpRequestAcceptHeaderName, xmlBaseMIMEType)
+ httpRequest.Header.Set(httpRequestAcceptHeaderName, jsonMIMEType)
httpResponse, err := g.httpClient.Do(httpRequest)
@@ -195,326 +99,29 @@ func (g *api) Define(word string) ([]source.DictionaryResult, error) {
return nil, err
}
- var result apiResult
+ var rawResponse apiRawResponse
- if err = xml.Unmarshal(body, &result); err != nil {
+ if err = json.Unmarshal(body, &rawResponse); err != nil {
return nil, err
}
- if len(result.Entries) < 1 {
+ if len(rawResponse) < 1 {
return nil, &source.EmptyResultError{Word: word}
}
- return source.ValidateAndReturnDictionaryResults(word, result.toResults())
-}
-
-// UnmarshalXML customizes the way we can unmarshal our API definitions value
-func (s *apiDefinitionContainer) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
- var err error
-
- // Alias our type so that we can unmarshal as usual
- type sense apiDefinitionContainer
-
- err = d.DecodeElement((*sense)(s), &start)
-
- // Create a new decoder for our RAW parts
- subDecoder := xml.NewDecoder(strings.NewReader(s.Raw))
-
- senses := []*apiSense{}
- subsenses := []*apiSense{}
- senseIndex := 0
- var currentSense *apiSense
- isDefinitionContinuation := false
-
- for token, err := subDecoder.Token(); token != nil || err == nil; token, err = subDecoder.Token() {
- switch t := token.(type) {
- case xml.StartElement:
- switch t.Name.Local {
- case senseTagName:
- sp := &sensePosition{}
- err = subDecoder.DecodeElement(&sp, &t)
-
- currentSense = &apiSense{}
-
- // If the position is a number, then its a top-level sense
- if _, err := strconv.Atoi(sp.Position); err == nil {
- if len(subsenses) > 0 {
- senses[senseIndex].Subsenses = make([]apiSense, len(subsenses))
- for i, subsense := range subsenses {
- senses[senseIndex].Subsenses[i] = *subsense
- }
-
- // Reset our subsenses
- subsenses = make([]*apiSense, 0)
- }
-
- if len(senses) > 0 {
- senseIndex++
- }
- senses = append(senses, currentSense)
- } else {
- subsenses = append(subsenses, currentSense)
- }
- case senseDividerTagName:
- lastDefinitionIndex := len(currentSense.Definitions) - 1
-
- str := &cleanableString{}
- err = subDecoder.DecodeElement(&str, &t)
-
- currentSense.Definitions[lastDefinitionIndex] =
- currentSense.Definitions[lastDefinitionIndex] + senseDividerPrefix + str.cleaned
-
- isDefinitionContinuation = true
- case definingTextTagName:
- if len(senses) == 0 || currentSense == nil {
- currentSense = &apiSense{}
- senses = append(senses, currentSense)
- }
-
- dt := &apiDefiningText{}
- err = subDecoder.DecodeElement(&dt, &t)
-
- if !isDefinitionContinuation {
- currentSense.Definitions = append(currentSense.Definitions, dt.formatted)
-
- currentSense.Examples = make([]string, len(dt.Examples))
- for i, example := range dt.Examples {
- currentSense.Examples[i] = example.formatted
- }
-
- currentSense.Notes = make([]string, len(dt.UsageNotes))
- for i, note := range dt.UsageNotes {
- currentSense.Notes[i] = note.Note
- }
- } else {
- lastDefinitionIndex := len(currentSense.Definitions) - 1
-
- currentSense.Definitions[lastDefinitionIndex] =
- currentSense.Definitions[lastDefinitionIndex] + " " + dt.formatted
-
- isDefinitionContinuation = false
- }
- }
-
- if err != nil {
- return err
- }
- }
- }
-
- s.senses = make([]apiSense, len(senses))
- for i, sense := range senses {
- s.senses[i] = *sense
- }
-
- return err
-}
-
-// UnmarshalXML customizes the way we can unmarshal our API defining texts value
-func (dt *apiDefiningText) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
- // Alias our type so that we can unmarshal as usual
- type defText apiDefiningText
-
- // Unmarshal our wrapped value before cleaning
- err := d.DecodeElement((*defText)(dt), &start)
-
- // Initialize our cleaned string
- cleanStr := &cleanableString{}
- xml.Unmarshal(wrapRawXML(dt.Raw), cleanStr)
- dt.cleaned = cleanStr.cleaned
-
- dt.cleaned = strings.TrimLeft(dt.cleaned, definingTextPrefix)
-
- // Our formatted version will start as just the cleaned version
- dt.formatted = dt.cleaned
-
- // Clean our cross references
- for i, crossReference := range dt.CrossReferences {
- crossReference = htmlCleaner.Sanitize(crossReference)
- crossReference = strings.TrimSpace(crossReference)
- crossReference = strings.TrimLeft(crossReference, definingTextPrefix)
-
- dt.CrossReferences[i] = crossReference
- }
-
- for i, usageNote := range dt.UsageNotes {
- // Grab our examples from our usage notes
- dt.Examples = append(dt.Examples, usageNote.Examples...)
+ switch rawResponse[0].(type) {
+ case apiDefinitionResult:
+ response := apiResponseFromRaw[apiDefinitionResult](rawResponse)
+ results := apiDefinitionResults(response)
- // Clean our note
- dt.UsageNotes[i].Note = strings.TrimSpace(usageNote.Note)
- }
-
- // If we only have a single usage note, and the defining text starts with it
- if len(dt.UsageNotes) == 1 && strings.HasPrefix(dt.cleaned, dt.UsageNotes[0].Note) {
- // Functionally replace the defining text with the note
- dt.formatted = dt.UsageNotes[0].Note
-
- // Remove the note, since it would then be redundant
- dt.UsageNotes = dt.UsageNotes[:0]
- } else {
- for _, usageNote := range dt.UsageNotes {
- if strings.Contains(dt.formatted, usageNote.Note) {
- parts := strings.SplitN(dt.formatted, usageNote.Note, 2)
-
- // Get our start and end pieces
- strStart := strings.TrimSpace(parts[0])
- strEnd := strings.TrimSpace(parts[1])
-
- dt.formatted = strStart + strEnd
- }
- }
- }
-
- for _, example := range dt.Examples {
- if strings.Contains(dt.formatted, example.cleaned) {
- parts := strings.SplitN(dt.formatted, example.cleaned, 2)
-
- // Get our start and end pieces
- strStart := strings.TrimSpace(parts[0])
- strEnd := strings.TrimSpace(parts[1])
-
- dt.formatted = strStart + strEnd
- }
- }
-
- // If our cleaned string only contains our cross references
- if len(dt.CrossReferences) > 1 && strings.EqualFold(dt.formatted, strings.Join(dt.CrossReferences, " ")) {
- // Add commas, for readability
- dt.formatted = strings.Join(dt.CrossReferences, crossReferenceJoinString)
- }
-
- return err
-}
-
-// UnmarshalXML customizes the way we can unmarshal our API example value
-func (e *apiExample) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
- // Alias our type so that we can unmarshal as usual
- type example apiExample
-
- // Unmarshal our wrapped value before cleaning
- err := d.DecodeElement((*example)(e), &start)
-
- // Initialize our cleaned string
- cleanStr := &cleanableString{}
- xml.Unmarshal(wrapRawXML(e.Raw), cleanStr)
- e.cleaned = cleanStr.cleaned
-
- // Our formatted version will start as just the cleaned version
- e.formatted = e.cleaned
-
- // Clean our author string
- e.Author = strings.TrimSpace(e.Author)
-
- // If we have an author
- if e.Author != "" {
- // If the author is in the string, strip it from the original string,
- // so that we can properly append it
- if strings.Contains(e.cleaned, e.Author) {
- parts := strings.SplitN(e.cleaned, e.Author, 2)
-
- // Get our start and end pieces
- strStart := strings.TrimSpace(parts[0])
- strEnd := strings.TrimSpace(parts[1])
-
- // If we have an ending string, pad it
- if 0 < len(strEnd) {
- strEnd = " " + strEnd
- }
-
- e.formatted = exampleWrapCharacter + strStart + exampleWrapCharacter + " " + authorPrefix + e.Author + strEnd
- } else {
- e.formatted = exampleWrapCharacter + e.cleaned + exampleWrapCharacter + " " + authorPrefix + e.Author
- }
- }
-
- return err
-}
-
-// UnmarshalXML customizes the way we can unmarshal cleanable strings
-func (s *cleanableString) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
- // Alias our type so that we can unmarshal as usual
- type str cleanableString
-
- // Unmarshal our wrapped value before cleaning
- err := d.DecodeElement((*str)(s), &start)
-
- // Initialize our clean string
- s.cleaned = s.Raw
-
- // Clean our raw string
- s.cleaned = htmlCleaner.Sanitize(s.cleaned)
- s.cleaned = html.UnescapeString(s.cleaned)
- s.cleaned = strings.TrimSpace(s.cleaned)
-
- return err
-}
-
-// toResult converts the API response to the results that a source expects to
-// return.
-func (r *apiResult) toResults() []source.DictionaryResult {
- mainEntry := r.Entries[0]
- mainWord := mainEntry.Word
-
- sourceEntries := make([]source.DictionaryEntry, 0, len(r.Entries))
-
- for _, apiEntry := range r.Entries {
- if !strings.EqualFold(apiEntry.Word, mainWord) {
- continue
- }
-
- entry := source.DictionaryEntry{}
-
- entry.Word = apiEntry.Word
- entry.LexicalCategory = apiEntry.LexicalCategory
-
- if apiEntry.Pronunciation != "" {
- entry.Pronunciations = []string{apiEntry.Pronunciation}
- }
-
- entry.Etymologies = make([]string, 0, len(apiEntry.Etymologies))
- for _, etymology := range apiEntry.Etymologies {
- etymology.cleaned = etymologyMetaStripperRegex.ReplaceAllString(etymology.cleaned, "")
-
- entry.Etymologies = append(entry.Etymologies, strings.TrimSpace(etymology.cleaned))
- }
-
- if len(apiEntry.DefinitionContainers) > 0 {
- def := apiEntry.DefinitionContainers[0]
+ return source.ValidateAndReturnDictionaryResults(word, results.toResults())
+ case apiSearchResult:
+ response := apiResponseFromRaw[apiSearchResult](rawResponse)
+ _ = apiSearchResults(response)
- for _, sense := range def.senses {
- sourceSense := sense.toSense()
-
- // Only go one level deep of sub-senses
- for _, subSense := range sense.Subsenses {
- sourceSense.SubSenses = append(sourceSense.SubSenses, subSense.toSense())
- }
-
- entry.Senses = append(entry.Senses, sourceSense)
- }
- }
-
- sourceEntries = append(sourceEntries, entry)
- }
-
- return []source.DictionaryResult{
- {
- Language: "en", // TODO
- Entries: sourceEntries,
- },
- }
-}
-
-// toSense converts the API sense to a source.Sense
-func (s *apiSense) toSense() source.Sense {
- return source.Sense{
- Definitions: s.Definitions,
- Examples: s.Examples,
- Notes: s.Notes,
+ // TODO: Handle a fallback search with a "did you mean?" error of sorts
+ return nil, &source.EmptyResultError{Word: word}
}
-}
-// wrapRawXML wraps a raw XML string in arbitrary container elements
-func wrapRawXML(raw string) []byte {
- return []byte("" + raw + "")
+ return nil, &source.EmptyResultError{Word: word}
}