-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updating the Webster source to support the V3 API
- Loading branch information
Showing
2 changed files
with
387 additions
and
406 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,366 @@ | ||
package webster | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
"regexp" | ||
"strconv" | ||
"strings" | ||
|
||
"github.com/Rican7/define/source" | ||
) | ||
|
||
var ( | ||
// regexpWebsterTokens is a regular exprssion for matching Webster API | ||
// text tokens. | ||
// | ||
// Webster API text may contain "tokens", as documented: | ||
// | ||
// See https://www.dictionaryapi.com/products/json#sec-2.tokens | ||
regexpWebsterTokens = regexp.MustCompile(`{.*?(?:\|(.*?)(?:\|\|?)?)?}`) | ||
|
||
// regexpWebsterSenseNumber is a regular expression for matching Webster API | ||
// sense numbers. | ||
// | ||
// Webster API sense numbers may be numerals, lower-case letters, or | ||
// parenthesized numerals, as documented: | ||
// | ||
// See https://www.dictionaryapi.com/products/json#sec-2.sn | ||
regexpWebsterSenseNumber = regexp.MustCompile(`(\d+)? ?(\w+)? ?(\(\d+\))?`) | ||
) | ||
|
||
// apiRawResponse defines the data structure for a raw Webster API response | ||
type apiRawResponse []any | ||
|
||
// apiResponse defines the data structure for a Webster API response | ||
type apiResponse[T apiResponseItem] []T | ||
|
||
// apiResponseItem defines a type constraint for Webster API response items | ||
type apiResponseItem interface { | ||
apiSearchResult | apiDefinitionResult | ||
} | ||
|
||
// apiSearchResults defines the data for Webster API search results | ||
type apiSearchResults []apiSearchResult | ||
|
||
// apiDefinitionResults defines the data for Webster API definition results | ||
type apiDefinitionResults []apiDefinitionResult | ||
|
||
// apiSearchResult defines the data for a Webster API search result | ||
type apiSearchResult string | ||
|
||
// apiDefinitionResult defines the data structure for a Webster API definition | ||
// result | ||
type apiDefinitionResult struct { | ||
Meta apiDefinitionMeta `json:"meta"` | ||
Hom int `json:"hom"` | ||
Hwi apiDefinitionHeadwordInfo `json:"hwi"` | ||
Fl string `json:"fl"` | ||
Ins []struct { | ||
If string `json:"if"` | ||
} `json:"ins"` | ||
Def []apiDefinitionSectionEntry `json:"def"` | ||
Uros []struct { | ||
Ure string `json:"ure"` | ||
Fl string `json:"fl"` | ||
} `json:"uros"` | ||
Syns []struct { | ||
Pl string `json:"pl"` | ||
Pt [][]any `json:"pt"` | ||
} `json:"syns"` | ||
Et [][]string `json:"et"` | ||
Date string `json:"date"` | ||
LdLink struct { | ||
LinkHw string `json:"link_hw"` | ||
LinkFl string `json:"link_fl"` | ||
} `json:"ld_link"` | ||
Shortdef []string `json:"shortdef"` | ||
} | ||
|
||
// apiDefinitionMeta defines the data structure for Webster API definition meta | ||
type apiDefinitionMeta struct { | ||
ID string `json:"id"` | ||
UUID string `json:"uuid"` | ||
Sort string `json:"sort"` | ||
Src string `json:"src"` | ||
Section string `json:"section"` | ||
Stems []string `json:"stems"` | ||
Offensive bool `json:"offensive"` | ||
} | ||
|
||
// apiDefinitionMeta defines the data structure for Webster API definition | ||
// headword information | ||
type apiDefinitionHeadwordInfo struct { | ||
Hw string `json:"hw"` | ||
Prs []struct { | ||
Mw string `json:"mw"` | ||
Sound struct { | ||
Audio string `json:"audio"` | ||
Ref string `json:"ref"` | ||
Stat string `json:"stat"` | ||
} `json:"sound"` | ||
} `json:"prs"` | ||
} | ||
|
||
// apiDefinitionSectionEntry defines the data structure for Webster API | ||
// definition section entries | ||
type apiDefinitionSectionEntry struct { | ||
Vd string `json:"vd"` | ||
Sseq apiSenseSequence `json:"sseq"` | ||
} | ||
|
||
// apiSenseSequence defines the data structure for a Webster API sense sequence | ||
type apiSenseSequence []apiSense | ||
|
||
// apiSense defines the data structure for a Webster API sense | ||
type apiSense [][]any | ||
|
||
// apiSenseData defines the data structure for a Webster API sense data | ||
type apiSenseData map[string]any | ||
|
||
// apiSenseNumber defines the data structure for a Webster API sense number | ||
type apiSenseNumber struct { | ||
number int | ||
letter string | ||
sub string | ||
} | ||
|
||
// UnmarshalJSON satisfies the encoding/json.Unmarshaler interface | ||
func (r *apiRawResponse) UnmarshalJSON(data []byte) error { | ||
var rawSlice []json.RawMessage | ||
|
||
if err := json.Unmarshal(data, &rawSlice); err != nil { | ||
return err | ||
} | ||
|
||
if len(rawSlice) < 1 || len(rawSlice[0]) < 1 { | ||
return nil | ||
} | ||
|
||
var newResponse apiRawResponse | ||
var err error | ||
|
||
// Inspect the first byte of the first item | ||
switch rawSlice[0][0] { | ||
case '"': | ||
var response apiResponse[apiSearchResult] | ||
newResponse, err = replaceData(data, response) | ||
case '{': | ||
var response apiResponse[apiDefinitionResult] | ||
newResponse, err = replaceData(data, response) | ||
} | ||
|
||
if err != nil { | ||
return err | ||
} | ||
*r = newResponse | ||
|
||
return nil | ||
} | ||
|
||
// replaceData takes raw JSON bytes and a new response type and returns a | ||
// apiRawResponse with the new response data after unmarshalling. | ||
func replaceData[T apiResponseItem](data []byte, response apiResponse[T]) (apiRawResponse, error) { | ||
if err := json.Unmarshal(data, &response); err != nil { | ||
return nil, err | ||
} | ||
|
||
newResponse := make(apiRawResponse, len(response)) | ||
for i, item := range response { | ||
newResponse[i] = item | ||
} | ||
|
||
return newResponse, nil | ||
} | ||
|
||
func apiResponseFromRaw[T apiResponseItem](raw apiRawResponse) apiResponse[T] { | ||
response := make(apiResponse[T], len(raw)) | ||
|
||
if len(raw) < 1 { | ||
return response | ||
} | ||
|
||
for i, item := range raw { | ||
response[i] = item.(T) | ||
} | ||
|
||
return response | ||
} | ||
|
||
// toResult converts the API response to the results that a source expects to | ||
// return. | ||
func (r apiDefinitionResults) toResults() []source.DictionaryResult { | ||
mainEntry := r[0] | ||
mainWord := cleanHeadword(mainEntry.Hwi.Hw) | ||
|
||
sourceEntries := make([]source.DictionaryEntry, 0, len(r)) | ||
|
||
for _, apiEntry := range r { | ||
headword := cleanHeadword(apiEntry.Hwi.Hw) | ||
|
||
if !strings.EqualFold(headword, mainWord) { | ||
continue | ||
} | ||
|
||
sourceEntry := source.DictionaryEntry{} | ||
|
||
sourceEntry.Word = headword | ||
sourceEntry.LexicalCategory = apiEntry.Fl | ||
|
||
sourceEntry.Pronunciations = make([]string, 0, len(apiEntry.Hwi.Prs)) | ||
for _, pronunciation := range apiEntry.Hwi.Prs { | ||
sourceEntry.Pronunciations = append(sourceEntry.Pronunciations, pronunciation.Mw) | ||
} | ||
|
||
// We filter etymologies and potentially add them later so the capacity | ||
// can't be reasonably known here. | ||
sourceEntry.Etymologies = make([]string, 0) | ||
for _, etymology := range apiEntry.Et { | ||
// Webster API etymologies are returned in prefixed arrays. | ||
// See https://www.dictionaryapi.com/products/json#sec-2.et | ||
if len(etymology) < 2 || etymology[0] != "text" { | ||
continue | ||
} | ||
|
||
etymologyText := cleanTextOfTokens(etymology[1]) | ||
|
||
sourceEntry.Etymologies = append(sourceEntry.Etymologies, etymologyText) | ||
} | ||
|
||
for _, def := range apiEntry.Def { | ||
sourceEntry.Senses = append(sourceEntry.Senses, def.Sseq.toSenses()...) | ||
} | ||
|
||
sourceEntries = append(sourceEntries, sourceEntry) | ||
} | ||
|
||
return []source.DictionaryResult{ | ||
{ | ||
Language: "en", // TODO | ||
Entries: sourceEntries, | ||
}, | ||
} | ||
} | ||
|
||
// toSenses converts the API sense sequence to a list of source.Sense | ||
func (s apiSenseSequence) toSenses() []source.Sense { | ||
senses := make([]source.Sense, 0) | ||
|
||
for _, apiSense := range s { | ||
var lastSenseNumber *apiSenseNumber | ||
|
||
for _, apiSenseContainer := range apiSense { | ||
// Webster API senses are returned in prefixed arrays. | ||
// See https://www.dictionaryapi.com/products/json#sec-2.sense | ||
if len(apiSenseContainer) < 2 { | ||
continue | ||
} | ||
|
||
var senseData apiSenseData | ||
|
||
switch apiSenseContainer[0] { | ||
case "sense": | ||
senseData = apiSenseData(apiSenseContainer[1].(map[string]any)) | ||
case "bs": | ||
// See https://www.dictionaryapi.com/products/json#sec-2.bs | ||
bindingSubstitute := apiSenseContainer[1].(map[string]any) | ||
senseData = apiSenseData(bindingSubstitute["sense"].(map[string]any)) | ||
default: | ||
continue | ||
} | ||
|
||
senseNumber := parseSenseNumber(senseData["sn"]) | ||
|
||
sourceSense := senseData.toSense() | ||
|
||
if lastSenseNumber == nil || (senseNumber != nil && lastSenseNumber.number < senseNumber.number) { | ||
// The sense is a new sense | ||
senses = append(senses, sourceSense) | ||
} else { | ||
// The sense is a sub-sense | ||
lastSense := &(senses[len(senses)-1]) | ||
lastSense.SubSenses = append(lastSense.SubSenses, sourceSense) | ||
} | ||
|
||
lastSenseNumber = senseNumber | ||
} | ||
} | ||
|
||
return senses | ||
} | ||
|
||
// toSense converts the API sense data to a source.Sense | ||
func (d apiSenseData) toSense() source.Sense { | ||
definitions := make([]string, 0) | ||
examples := make([]string, 0) | ||
|
||
senseDefinitions := d["dt"].([]any) | ||
|
||
for _, defParts := range senseDefinitions { | ||
definition := defParts.([]any) | ||
|
||
// Webster API definition parts are returned in prefixed arrays. | ||
// See https://www.dictionaryapi.com/products/json#sec-2.dt | ||
if len(definition) < 2 { | ||
continue | ||
} | ||
|
||
switch definition[0] { | ||
case "text": | ||
definitionText := cleanTextOfTokens(definition[1].(string)) | ||
|
||
definitions = append(definitions, definitionText) | ||
case "vis": | ||
exampleTextObjects := definition[1].([]any) | ||
|
||
for _, exampleTextObject := range exampleTextObjects { | ||
exampleTextMap := exampleTextObject.(map[string]any) | ||
exampleText := cleanTextOfTokens(exampleTextMap["t"].(string)) | ||
|
||
if exampleTextMap["aq"] != nil { | ||
exampleAttribution := exampleTextMap["aq"].(map[string]any) | ||
|
||
// TODO: Handle attributions more generally... especially | ||
// presentation wise... | ||
if exampleAttribution["auth"] != nil { | ||
exampleText = fmt.Sprintf("'%s' - %s", exampleText, exampleAttribution["auth"]) | ||
} | ||
} | ||
|
||
examples = append(examples, exampleText) | ||
} | ||
} | ||
} | ||
|
||
return source.Sense{ | ||
Definitions: definitions, | ||
Examples: examples, | ||
} | ||
} | ||
|
||
func cleanHeadword(headword string) string { | ||
return strings.ReplaceAll(headword, "*", "") | ||
} | ||
|
||
func cleanTextOfTokens(text string) string { | ||
return regexpWebsterTokens.ReplaceAllString(text, "$1") | ||
} | ||
|
||
func parseSenseNumber(rawSenseNumber any) *apiSenseNumber { | ||
if rawSenseNumber == nil { | ||
return nil | ||
} | ||
|
||
parsed := regexpWebsterSenseNumber.FindStringSubmatch(rawSenseNumber.(string)) | ||
|
||
var main int | ||
if parsedMain, err := strconv.Atoi(parsed[1]); err == nil { | ||
main = parsedMain | ||
} | ||
|
||
return &apiSenseNumber{ | ||
number: main, | ||
letter: parsed[2], | ||
sub: parsed[3], | ||
} | ||
} |
Oops, something went wrong.