diff --git a/atom/parser.go b/atom/parser.go index fc61c255..2134f16f 100644 --- a/atom/parser.go +++ b/atom/parser.go @@ -30,6 +30,9 @@ var ( "src": true, "uri": true, } + + // No known explicit extension parsers for Atom, currently + emptyExtParsers = make(shared.ExtParsers) ) // Parser is an Atom Parser @@ -50,6 +53,15 @@ func (ap *Parser) Parse(feed io.Reader) (*Feed, error) { return ap.parseRoot(p) } +func (ap *Parser) ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error) { + ap.base = &shared.XMLBase{URIAttrs: atomURIAttrs} // TODO: do we need the surrounding base for the urlstack? + entry := &Entry{} + if err := ap.parseEntryContent(p, entry); err != nil { + return nil, err + } + return entry, nil +} + func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) { if err := p.Expect(xpp.StartTag, "feed"); err != nil { return nil, err @@ -81,7 +93,7 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) { name := strings.ToLower(p.Name) if shared.IsExtension(p) { - e, err := shared.ParseExtension(extensions, p) + e, err := shared.ParseExtension(extensions, p, emptyExtParsers) if err != nil { return nil, err } @@ -227,103 +239,14 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) { } if tok == xpp.StartTag { - - name := strings.ToLower(p.Name) - if shared.IsExtension(p) { - e, err := shared.ParseExtension(extensions, p) + e, err := shared.ParseExtension(extensions, p, emptyExtParsers) if err != nil { return nil, err } extensions = e - } else if name == "title" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.Title = result - } else if name == "id" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.ID = result - } else if name == "rights" || - name == "copyright" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.Rights = result - } else if name == "summary" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.Summary = result - } else if name == "source" { - result, err := ap.parseSource(p) - if err != nil { - return nil, err - } - entry.Source = result - } else if name == "updated" || - name == "modified" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.Updated = result - date, err := shared.ParseDate(result) - if err == nil { - utcDate := date.UTC() - entry.UpdatedParsed = &utcDate - } - } else if name == "contributor" { - result, err := ap.parsePerson("contributor", p) - if err != nil { - return nil, err - } - entry.Contributors = append(entry.Contributors, result) - } else if name == "author" { - result, err := ap.parsePerson("author", p) - if err != nil { - return nil, err - } - entry.Authors = append(entry.Authors, result) - } else if name == "category" { - result, err := ap.parseCategory(p) - if err != nil { - return nil, err - } - entry.Categories = append(entry.Categories, result) - } else if name == "link" { - result, err := ap.parseLink(p) - if err != nil { - return nil, err - } - entry.Links = append(entry.Links, result) - } else if name == "published" || - name == "issued" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.Published = result - date, err := shared.ParseDate(result) - if err == nil { - utcDate := date.UTC() - entry.PublishedParsed = &utcDate - } - } else if name == "content" { - result, err := ap.parseContent(p) - if err != nil { - return nil, err - } - entry.Content = result } else { - err := p.Skip() - if err != nil { + if err := ap.parseEntryContent(p, entry); err != nil { return nil, err } } @@ -341,6 +264,103 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) { return entry, nil } +func (ap *Parser) parseEntryContent(p *xpp.XMLPullParser, entry *Entry) error { + name := strings.ToLower(p.Name) + + if name == "title" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.Title = result + } else if name == "id" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.ID = result + } else if name == "rights" || + name == "copyright" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.Rights = result + } else if name == "summary" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.Summary = result + } else if name == "source" { + result, err := ap.parseSource(p) + if err != nil { + return err + } + entry.Source = result + } else if name == "updated" || + name == "modified" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.Updated = result + date, err := shared.ParseDate(result) + if err == nil { + utcDate := date.UTC() + entry.UpdatedParsed = &utcDate + } + } else if name == "contributor" { + result, err := ap.parsePerson("contributor", p) + if err != nil { + return err + } + entry.Contributors = append(entry.Contributors, result) + } else if name == "author" { + result, err := ap.parsePerson("author", p) + if err != nil { + return err + } + entry.Authors = append(entry.Authors, result) + } else if name == "category" { + result, err := ap.parseCategory(p) + if err != nil { + return err + } + entry.Categories = append(entry.Categories, result) + } else if name == "link" { + result, err := ap.parseLink(p) + if err != nil { + return err + } + entry.Links = append(entry.Links, result) + } else if name == "published" || + name == "issued" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.Published = result + date, err := shared.ParseDate(result) + if err == nil { + utcDate := date.UTC() + entry.PublishedParsed = &utcDate + } + } else if name == "content" { + result, err := ap.parseContent(p) + if err != nil { + return err + } + entry.Content = result + } else { + err := p.Skip() + if err != nil { + return err + } + } + return nil +} + func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) { if err := p.Expect(xpp.StartTag, "source"); err != nil { @@ -370,7 +390,7 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) { name := strings.ToLower(p.Name) if shared.IsExtension(p) { - e, err := shared.ParseExtension(extensions, p) + e, err := shared.ParseExtension(extensions, p, emptyExtParsers) if err != nil { return nil, err } diff --git a/extensions/extensions.go b/extensions/extensions.go index 6c50d4aa..53021db8 100644 --- a/extensions/extensions.go +++ b/extensions/extensions.go @@ -12,6 +12,7 @@ type Extension struct { Value string `json:"value"` Attrs map[string]string `json:"attrs"` Children map[string][]Extension `json:"children"` + Parsed interface{} `json:"parsed,omitempty"` } func parseTextExtension(name string, extensions map[string][]Extension) (value string) { diff --git a/internal/shared/extparser.go b/internal/shared/extparser.go index 3706328a..9964a142 100644 --- a/internal/shared/extparser.go +++ b/internal/shared/extparser.go @@ -7,6 +7,12 @@ import ( "github.com/mmcdole/goxpp" ) +type ExtParser interface { + ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error) +} + +type ExtParsers map[string]ExtParser + // IsExtension returns whether or not the current // XML element is an extension element (if it has a // non empty prefix) @@ -22,10 +28,16 @@ func IsExtension(p *xpp.XMLPullParser) bool { // ParseExtension parses the current element of the // XMLPullParser as an extension element and updates // the extension map -func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) { +func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser, extParsers ExtParsers) (ext.Extensions, error) { prefix := prefixForNamespace(p.Space, p) - result, err := parseExtensionElement(p) + var result ext.Extension + var err error + if extParser, ok := extParsers[prefix]; ok { + result, err = parseExtensionFromParser(p, extParser) + } else { + result, err = parseExtensionElement(p) + } if err != nil { return nil, err } @@ -43,6 +55,23 @@ func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, er return fe, nil } +func parseExtensionFromParser(p *xpp.XMLPullParser, extParser ExtParser) (e ext.Extension, err error) { + if err = p.Expect(xpp.StartTag, "*"); err != nil { + return e, err + } + + e.Name = p.Name + if e.Parsed, err = extParser.ParseAsExtension(p); err != nil { + return e, err + } + + if err = p.Expect(xpp.EndTag, e.Name); err != nil { + return e, err + } + + return e, nil +} + func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) { if err = p.Expect(xpp.StartTag, "*"); err != nil { return e, err diff --git a/parser.go b/parser.go index 273b466a..57d5c94b 100644 --- a/parser.go +++ b/parser.go @@ -10,6 +10,7 @@ import ( "strings" "github.com/mmcdole/gofeed/atom" + "github.com/mmcdole/gofeed/internal/shared" "github.com/mmcdole/gofeed/rss" ) @@ -131,8 +132,19 @@ func (f *Parser) parseAtomFeed(feed io.Reader) (*Feed, error) { return f.atomTrans().Translate(af) } +func (f *Parser) BuildRSSExtParsers() shared.ExtParsers { + extParsers := make(shared.ExtParsers, 3) + + // all possible atom variants + extParsers["atom"] = f.ap + extParsers["atom10"] = f.ap + extParsers["atom03"] = f.ap + + return extParsers +} + func (f *Parser) parseRSSFeed(feed io.Reader) (*Feed, error) { - rf, err := f.rp.Parse(feed) + rf, err := f.rp.Parse(feed, f.BuildRSSExtParsers()) if err != nil { return nil, err } diff --git a/rss/parser.go b/rss/parser.go index 684d160f..83201515 100644 --- a/rss/parser.go +++ b/rss/parser.go @@ -13,12 +13,14 @@ import ( // Parser is a RSS Parser type Parser struct { base *shared.XMLBase + extParsers shared.ExtParsers } // Parse parses an xml feed into an rss.Feed -func (rp *Parser) Parse(feed io.Reader) (*Feed, error) { +func (rp *Parser) Parse(feed io.Reader, extParsers shared.ExtParsers) (*Feed, error) { p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel) rp.base = &shared.XMLBase{} + rp.extParsers = extParsers _, err := rp.base.FindRoot(p) if err != nil { @@ -144,7 +146,8 @@ func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) { name := strings.ToLower(p.Name) if shared.IsExtension(p) { - ext, err := shared.ParseExtension(extensions, p) + + ext, err := shared.ParseExtension(extensions, p, rp.extParsers) if err != nil { return nil, err } @@ -335,7 +338,7 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) { name := strings.ToLower(p.Name) if shared.IsExtension(p) { - ext, err := shared.ParseExtension(extensions, p) + ext, err := shared.ParseExtension(extensions, p, rp.extParsers) if err != nil { return nil, err } diff --git a/rss/parser_test.go b/rss/parser_test.go index e46204c4..35f4872a 100644 --- a/rss/parser_test.go +++ b/rss/parser_test.go @@ -9,6 +9,7 @@ import ( "strings" "testing" + "github.com/mmcdole/gofeed" "github.com/mmcdole/gofeed/rss" "github.com/stretchr/testify/assert" ) @@ -27,7 +28,7 @@ func TestParser_Parse(t *testing.T) { // Parse actual feed fp := &rss.Parser{} - actual, _ := fp.Parse(bytes.NewReader(f)) + actual, _ := fp.Parse(bytes.NewReader(f), gofeed.NewParser().BuildRSSExtParsers()) // Get json encoded expected feed result ef := fmt.Sprintf("../testdata/parser/rss/%s.json", name)