diff --git a/tests/.gitkeep b/.docker/.gitkeep similarity index 100% rename from tests/.gitkeep rename to .docker/.gitkeep diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml new file mode 100644 index 0000000..7d547b1 --- /dev/null +++ b/.github/dependabot.yaml @@ -0,0 +1,9 @@ +version: 2 +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "weekly" + day: "monday" + time: "06:00" + timezone: "Europe/Amsterdam" \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..b161baa --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,29 @@ +name: Tests + +on: + pull_request: + types: [opened, reopened, synchronize] + branches: + - main + +jobs: + linter: + name: Linter check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: '1.24' + - name: Run Linter + uses: golangci/golangci-lint-action@v8 + test: + name: Regression tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-go@v4 + with: + go-version: '1.24' + - name: Run tests + run: go test ./tests/... \ No newline at end of file diff --git a/pkg/api.go b/pkg/api.go index 49856b4..935eb5c 100644 --- a/pkg/api.go +++ b/pkg/api.go @@ -1,53 +1,62 @@ package pkg import ( - "encoding/xml" + "log" "net/http" "strings" ) type Api struct{} +// healthcheck endpoint, does nothing (useful) func (api Api) Ping() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte("roxy is running...")) + w.Write([]byte("roxy is running...")) //nolint:errcheck } } -// /add endpoint for adding rss feeds through api +func (api Api) Stats(idx *Index) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + data, header := marshall(idx.Urls, JSON) + w.Header().Set("Content-Type", header) + w.Write(data) //nolint:errcheck + } +} + +// endpoint for adding rss feeds through api func (api Api) Add(idx *Index) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { + category := getStrParam(r.URL, "category") urls := getListParam(r.URL, "urls") - tags := getListParam(r.URL, "tags") if len(urls) == 0 { http.Error(w, "no url", http.StatusBadRequest) return } for _, url := range urls { - err := idx.Add(url, tags) + err := idx.Add(url, category) if err != nil { http.Error(w, "error: "+url, http.StatusInternalServerError) idx.Clear() return } } - w.Write([]byte("added " + strings.Join(urls, ", "))) + w.Write([]byte("add " + strings.Join(urls, ","))) //nolint:errcheck } } -// /get endpoint to query the rss feeds, returns xml in the body -func (api Api) Get(idx *Index) http.HandlerFunc { +// query the rss feeds using url parameters, returns xml in the body +func (api Api) Get(idx *Index, format Format) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { query := Query{ - Urls: getListParam(r.URL, "urls"), - Tags: getListParam(r.URL, "tags"), - Keywords: getListParam(r.URL, "keywords"), - Amount: getIntParam(r.URL, "amount", 10), + Urls: getListParam(r.URL, "urls"), + Categories: getListParam(r.URL, "category"), + Keywords: getListParam(r.URL, "keywords"), + Amount: getIntParam(r.URL, "amount", 10), } result := idx.Get(query) - xmlData, _ := xml.MarshalIndent(result, "", "\t") - w.Header().Set("Content-Type", "application/xml") - w.Write(xmlData) + data, header := marshall(result, format) + w.Header().Set("Content-Type", header) + w.Write(data) //nolint:errcheck } } @@ -56,9 +65,10 @@ func (api Api) Refresh(idx *Index) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { idx.Clear() for _, url := range idx.Urls { - err := idx.Add(url, []string{}) // TODO! RESET TAGS! + log.Printf("refreshing: %s", url.Url) + err := idx.Add(url.Url, url.Category) if err != nil { - http.Error(w, "can't refresh: "+url, http.StatusInternalServerError) + http.Error(w, "fail: "+url.Url, http.StatusInternalServerError) return } } diff --git a/pkg/index.go b/pkg/index.go index ca99a54..e67d564 100644 --- a/pkg/index.go +++ b/pkg/index.go @@ -3,31 +3,32 @@ package pkg import ( "bufio" "encoding/xml" - "fmt" "log" "net/http" "os" ) // gets rss feed from a url and adds it to the index, and parses the pubdates -// TODO: No error return, just skip record and log -func (idx *Index) Add(url string, tags []string) error { +func (idx *Index) Add(url string, category string) error { var feed Feed resp, err := http.Get(url) //nolint:errcheck if err != nil { return err } - err = xml.NewDecoder(resp.Body).Decode(&feed) - if err == nil { - feed.ParseTime() - feed.Tags = tags + if err = xml.NewDecoder(resp.Body).Decode(&feed); err == nil { + feed.Category = category feed.Url = url + feed.ParseTime() for _, item := range feed.Channel.Items { item.parentFeed = &feed idx.Rank = insertSorted(idx.Rank, &item) } - idx.Urls = append(idx.Urls, url) - log.Printf("added to feed: '%s'", url) + idx.Urls = append(idx.Urls, struct { + Url string + Category string + Size int + }{url, category, len(feed.Channel.Items)}) + log.Printf("added to feed: '%s' %v", url, category) } return err } @@ -52,21 +53,23 @@ func (idx *Index) Get(query Query) Result { } } -// loags (newsboat) file rss feeds into the index +// loads (newsboat) file rss feeds into the index func (idx *Index) Load(filename string) error { if filename == "" { - return nil // nothing to open + return nil } file, err := os.Open(filename) if err != nil { - return fmt.Errorf("can't open: %s", filename) + log.Printf("can't open: %s", filename) + return err } scanner := bufio.NewScanner(file) for scanner.Scan() { line := scanner.Text() - url, tags := parseLine(line) - if len(url) > 0 { - idx.Add(url, tags) + if url, category := parseLine(line); len(url) > 0 { + if err := idx.Add(url, category); err != nil { + log.Printf("error adding url: '%s'", url) + } } } return nil @@ -75,10 +78,12 @@ func (idx *Index) Load(filename string) error { // servers all api endpoints for an index instance func (idx *Index) Serve(port string) { api := Api{} - log.Printf("serving on http://localhost%s", port) + log.Printf("serving on: http://localhost%s", port) http.HandleFunc("/", api.Ping()) + http.HandleFunc("/stats", api.Stats(idx)) http.HandleFunc("/add", api.Add(idx)) - http.HandleFunc("/get", api.Get(idx)) + http.HandleFunc("/xml", api.Get(idx, XML)) + http.HandleFunc("/json", api.Get(idx, JSON)) http.HandleFunc("/refresh", api.Refresh(idx)) log.Fatal(http.ListenAndServe(port, nil)) } @@ -90,5 +95,6 @@ func (idx *Index) Clear() { // initiate rss feed index class (enforce singleton?) func NewIndex() *Index { + log.Println("starting roxy...") return &Index{} } diff --git a/pkg/rss.go b/pkg/rss.go index 8a43e1a..94660b4 100644 --- a/pkg/rss.go +++ b/pkg/rss.go @@ -2,14 +2,13 @@ package pkg import ( "regexp" - "slices" "strings" ) // extracts keywords from title, used for querying based on keywords func (item *Item) Keywords() []string { re := regexp.MustCompile(`[a-zA-Z]+`) - words := re.FindAllString(strings.ToLower(item.Title), -1) + words := re.FindAllString(strings.ToLower(item.Description), -1) keywords := []string{} for _, w := range words { if len(w) >= 4 { @@ -23,11 +22,11 @@ func (item *Item) Keywords() []string { // ps, King Terry said case/switch are devine, hence the choice func (item *Item) QueryMatch(query Query) bool { switch { - case len(query.Urls) > 0 && !slices.Contains(query.Urls, item.parentFeed.Url): + case !contains(query.Urls, item.parentFeed.Url): return false - case len(query.Tags) > 0 && !overlap(query.Tags, item.parentFeed.Tags): + case !contains(query.Categories, item.parentFeed.Category): return false - case len(query.Keywords) > 0 && !overlap(query.Keywords, item.Keywords()): + case !overlap(query.Keywords, item.Keywords()): return false default: return true diff --git a/pkg/types.go b/pkg/types.go index 8c4faf4..c94d9c1 100644 --- a/pkg/types.go +++ b/pkg/types.go @@ -5,24 +5,31 @@ import ( "time" ) +type Format string + +const ( + JSON Format = "json" + XML Format = "xml" +) + type Item struct { - Title string `xml:"title"` - Description string `xml:"description"` - Link string `xml:"link"` - Guid string `xml:"guid"` - PubDate string `xml:"pubDate"` + Title string `xml:"title" json:"title"` + Description string `xml:"description" json:"description"` + Link string `xml:"link" json:"link"` + Guid string `xml:"guid" json:"guid"` + PubDate string `xml:"pubDate" json:"pubDate"` // generated timestamp time.Time parentFeed *Feed } type Channel struct { - Title string `xml:"title"` - Description string `xml:"description"` - Link string `xml:"link"` + Title string `xml:"title" json:"title"` + Description string `xml:"description" json:"description"` + Link string `xml:"link" json:"link"` Items []Item `xml:"item"` - PubDate string `xml:"pubDate"` - Category []string `xml:"category"` + PubDate string `xml:"pubDate" json:"pubDate"` + Category []string `xml:"category" json:"category"` Generator string `xml:"generator"` // generated timestamp time.Time @@ -33,28 +40,28 @@ type Feed struct { Version string `xml:"version,attr"` Channel Channel `xml:"channel"` // generated - Tags []string - Url string + Category string + Url string } type Index struct { Rank []*Item - Urls []string - // Urls []struct { - // Url string - // Tags []string - // } + Urls []struct { + Url string + Category string + Size int + } } type Query struct { - Urls []string - Tags []string - Keywords []string - Amount int + Urls []string + Keywords []string + Categories []string + Amount int } type Result struct { - XMLName xml.Name `xml:"rss"` - Version string `xml:"version,attr"` - Items []*Item `xml:"channel>item"` + XMLName xml.Name `xml:"rss" json:"-"` + Version string `xml:"version,attr" json:"-"` + Items []*Item `xml:"channel>item" json:"items"` } diff --git a/pkg/utils.go b/pkg/utils.go index ecb40fa..377d2f1 100644 --- a/pkg/utils.go +++ b/pkg/utils.go @@ -1,6 +1,8 @@ package pkg import ( + "encoding/json" + "encoding/xml" "net/url" "slices" "sort" @@ -9,10 +11,21 @@ import ( "time" ) -// check if two lists have any overlap. Useful for querying. +// calls json/xml marshall function to format result object +func marshall(data any, format Format) ([]byte, string) { + var result []byte + if format == JSON { + result, _ = json.Marshal(data) //nolint:errcheck + return result, "application/json" + } + result, _ = xml.MarshalIndent(data, "", "\t") //nolint:errcheck + return result, "application/xml" +} + +// check if two lists have any overlap. Useful for querying func overlap[Type comparable](a []Type, b []Type) bool { if len(a) == 0 || len(b) == 0 { - return false + return true } for _, aItem := range a { if slices.Contains(b, aItem) { @@ -22,6 +35,14 @@ func overlap[Type comparable](a []Type, b []Type) bool { return false } +// test if item exists in a list of comparable items +func contains[Type comparable](list []Type, item Type) bool { + if len(list) == 0 { + return true + } + return slices.Contains(list, item) +} + // tries all mentally sane rss datetime formats and returns time object func parsePubDate(s string) time.Time { var rssDateFormats = []string{ @@ -41,18 +62,18 @@ func parsePubDate(s string) time.Time { return time.Time{} } -// parses a newsboat URL line and gets the url and tags -func parseLine(line string) (string, []string) { +// parses a newsboat URL line and gets the url and category +func parseLine(line string) (string, string) { parts := strings.Split(line, " ") url, err := url.Parse(parts[0]) - tags := "" + category := "" if err == nil { if len(parts) > 1 { - tags = parts[1] + category = parts[1] } - return url.String(), []string{tags} + return url.String(), strings.ReplaceAll(category, `"`, "") } - return "", []string{} // no valid URL found + return "", "" // no valid URL found } // inserts an item in sorted order. Also returns index it was inserted at. @@ -78,6 +99,16 @@ func getListParam(url *url.URL, param string) []string { return filteredParams } +func getStrParam(url *url.URL, param string) string { + params := url.Query().Get(param) + return strings.Map(func(r rune) rune { + if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') { + return r + } + return -1 + }, strings.ToLower(params)) +} + // gets integer param from url, and takes out bad values func getIntParam(url *url.URL, param string, fallback int) int { strValue := url.Query().Get(param) diff --git a/main.go b/roxy.go similarity index 50% rename from main.go rename to roxy.go index cffb8ce..d86aab9 100644 --- a/main.go +++ b/roxy.go @@ -8,10 +8,15 @@ import ( ) func main() { - filename := flag.String("filename", "", "(newsboat) file with rss feeds") + // flags + filename := flag.String("feeds", "", "(newsboat) file with rss feeds") port := flag.String("port", "2112", "port number to serve on") - log.Println("starting roxy...") + flag.Parse() + // start server idx := pkg.NewIndex() - idx.Load(*filename) + if err := idx.Load(*filename); err != nil { + log.Println("startup failed...") + return + } idx.Serve(":" + *port) } diff --git a/tests/regression_test.go b/tests/regression_test.go new file mode 100644 index 0000000..7911988 --- /dev/null +++ b/tests/regression_test.go @@ -0,0 +1,70 @@ +package tests + +import ( + "encoding/xml" + "net/http" + "testing" + "time" + + "github.com/TimoKats/roxy/pkg" +) + +func TestAdd(t *testing.T) { + idx := pkg.NewIndex() + url := "https://timokats.xyz/feed/website.xml" + if err := idx.Add(url, "test"); err != nil { + t.Errorf("Expected no error, got %v", err) + } + if len(idx.Rank) == 0 { + t.Error("Rank should not be empty after adding an item") + } +} + +func TestGet(t *testing.T) { + idx := pkg.NewIndex() + url := "https://timokats.xyz/feed/website.xml" + if err := idx.Add(url, "test3"); err != nil { + t.Errorf("Expected no error, got %v", err) + } + query := pkg.Query{Amount: 10} + result := idx.Get(query) + if len(result.Items) == 0 { + t.Error("Result should contain items after querying") + } +} + +func TestServe(t *testing.T) { + idx := pkg.NewIndex() + url := "https://timokats.xyz/feed/website.xml" + if err := idx.Add(url, "test5"); err != nil { + t.Errorf("Expected no error, got %v", err) + } + + go idx.Serve(":8080") + time.Sleep(2 * time.Second) + resp, err := http.Get("http://localhost:8080/xml?amount=10") + if err != nil { + t.Errorf("Expected no error, got %v", err) + } + + result := pkg.Result{} + err = xml.NewDecoder(resp.Body).Decode(&result) + if err != nil { + t.Errorf("Expected no error, got %v", err) + } + if len(result.Items) == 0 { + t.Error("Result should contain items after querying via API") + } +} + +func TestClear(t *testing.T) { + idx := pkg.NewIndex() + url := "https://timokats.xyz/feed/website.xml" + if err := idx.Add(url, ""); err != nil { + t.Errorf("Expected no error, got %v", err) + } + idx.Clear() + if len(idx.Rank) > 0 { + t.Error("Rank should be empty after clearing") + } +}