Skip to content

Commit

Permalink
Merge pull request #65 from Financial-Times/fix/UPPSF-4128-consistent…
Browse files Browse the repository at this point in the history
…-primary-theme

Annotations with same predicate are ordered before applying primary t…
  • Loading branch information
Eng3lFT committed Jan 23, 2023
2 parents 1c98371 + 2adf62c commit 8168cda
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 36 deletions.
64 changes: 51 additions & 13 deletions pkg/mapper/mapper.go
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/base64"
"errors"
"fmt"
"sort"
"strings"
"time"

Expand Down Expand Up @@ -68,6 +69,7 @@ func (h *Handler) ToIndexModel(enrichedContent schema.EnrichedContent, contentTy
return model
}

var annotationsIds = map[string][]string{}
for _, annotation := range annotations {
canonicalID := strings.TrimPrefix(annotation.ID, concept.ThingURIPrefix)
concepts, found := concepts[annotation.ID]
Expand All @@ -81,16 +83,64 @@ func (h *Handler) ToIndexModel(enrichedContent schema.EnrichedContent, contentTy
} else {
log.Warnf("TME id missing for concept with id %s, using only canonical id", canonicalID)
}
annotationsIds[annotation.ID] = annIDs

h.populateAnnotationRelatedFields(annotation, &model, annIDs, canonicalID)
}

h.populatePrimaryTheme(&model, annotations, annotationsIds, "about")

return model
}

func (h *Handler) populatePrimaryTheme(model *schema.IndexModel, annotations []schema.Thing, ids map[string][]string, predicateKey string) {
predicate := h.Config.Predicates.Get(predicateKey)
annotations = filterAnnotations(annotations, ids, predicate)
if len(annotations) == 0 {
return
}

sort.Slice(annotations, func(i, j int) bool {
return annotations[i].PrefLabel < annotations[j].PrefLabel
})

annotation := annotations[0]
annIDs := ids[annotation.ID]
conceptTypes := h.Config.ConceptTypes

for _, taxonomy := range annotation.Types {
tme := ""
switch taxonomy {
case conceptTypes.Get("organisation"):
tme = tmeOrganisations
case conceptTypes.Get("person"):
tme = tmePeople
case conceptTypes.Get("topic"):
tme = tmeTopics
case conceptTypes.Get("location"):
tme = tmeRegions
}
if tme != "" {
setPrimaryTheme(model, annotation.PrefLabel, getCmrIDWithFallback(tme, annIDs))
break
}
}
}

func filterAnnotations(annotations []schema.Thing, ids map[string][]string, predicate string) []schema.Thing {
var matchedAnnotations []schema.Thing

for _, annotation := range annotations {
if annotation.Predicate == predicate && ids[annotation.ID] != nil {
matchedAnnotations = append(matchedAnnotations, annotation)
}
}
return matchedAnnotations
}

func (h *Handler) populateAnnotationRelatedFields(annotation schema.Thing, model *schema.IndexModel, annIDs []string, canonicalID string) {
h.handleSectionMapping(annotation, model, annIDs)

about := h.Config.Predicates.Get("about")
hasAuthor := h.Config.Predicates.Get("hasAuthor")
hasContributor := h.Config.Predicates.Get("hasContributor")
for _, taxonomy := range annotation.Types {
Expand All @@ -99,9 +149,6 @@ func (h *Handler) populateAnnotationRelatedFields(annotation schema.Thing, model
case conceptTypes.Get("organisation"):
model.CmrOrgnames = appendIfNotExists(model.CmrOrgnames, annotation.PrefLabel)
model.CmrOrgnamesIds = prepareElasticField(model.CmrOrgnamesIds, annIDs)
if annotation.Predicate == about {
setPrimaryTheme(model, annotation.PrefLabel, getCmrIDWithFallback(tmeOrganisations, annIDs))
}
case conceptTypes.Get("person"):
_, personFound := getCmrID(tmePeople, annIDs)
authorCmrID, authorFound := getCmrID(tmeAuthors, annIDs)
Expand All @@ -117,9 +164,6 @@ func (h *Handler) populateAnnotationRelatedFields(annotation schema.Thing, model
model.CmrAuthorsIds = appendIfNotExists(model.CmrAuthorsIds, canonicalID)
}
}
if annotation.Predicate == about {
setPrimaryTheme(model, annotation.PrefLabel, getCmrIDWithFallback(tmePeople, annIDs))
}
case conceptTypes.Get("company"):
model.CmrCompanynames = appendIfNotExists(model.CmrCompanynames, annotation.PrefLabel)
model.CmrCompanynamesIds = prepareElasticField(model.CmrCompanynamesIds, annIDs)
Expand All @@ -129,15 +173,9 @@ func (h *Handler) populateAnnotationRelatedFields(annotation schema.Thing, model
case conceptTypes.Get("topic"):
model.CmrTopics = appendIfNotExists(model.CmrTopics, annotation.PrefLabel)
model.CmrTopicsIds = prepareElasticField(model.CmrTopicsIds, annIDs)
if annotation.Predicate == about {
setPrimaryTheme(model, annotation.PrefLabel, getCmrIDWithFallback(tmeTopics, annIDs))
}
case conceptTypes.Get("location"):
model.CmrRegions = appendIfNotExists(model.CmrRegions, annotation.PrefLabel)
model.CmrRegionsIds = prepareElasticField(model.CmrRegionsIds, annIDs)
if annotation.Predicate == about {
setPrimaryTheme(model, annotation.PrefLabel, getCmrIDWithFallback(tmeRegions, annIDs))
}
case conceptTypes.Get("genre"):
model.CmrGenres = appendIfNotExists(model.CmrGenres, annotation.PrefLabel)
model.CmrGenreIds = prepareElasticField(model.CmrGenreIds, annIDs)
Expand Down
10 changes: 2 additions & 8 deletions test/testdata/exampleElasticModel.json
Expand Up @@ -30,11 +30,11 @@
"model_excerpt": null,
"model_resource_uri": null,
"cmr_primarysection": "Equities",
"cmr_primarytheme": "Investor activism",
"cmr_primarytheme": null,
"cmr_mediatype": null,
"cmr_metadataupdatetime": null,
"cmr_primarysection_id": "OTg=-U2VjdGlvbnM=",
"cmr_primarytheme_id": "OWIwMDQ1MTEtOWIxYi00MmEzLWFjOGQtY2VhMDM0MjJlZjI3-VG9waWNz",
"cmr_primarytheme_id": null,
"cmr_mediatype_id": null,
"cmr_brands": [
"Markets Insight",
Expand All @@ -49,7 +49,6 @@
"cmr_specialreports_ids": null,
"cmr_sections": [
"Equities",
"Investor activism",
"Markets Insight",
"Opinion",
"Markets",
Expand All @@ -60,8 +59,6 @@
"cmr_sections_ids": [
"b7ea3c33-ea8c-432e-bb7e-e3bbc8fdc2bb",
"OTg=-U2VjdGlvbnM=",
"aeada5a9-39cb-4bb6-b795-5baba8acf3fb",
"OWIwMDQ1MTEtOWIxYi00MmEzLWFjOGQtY2VhMDM0MjJlZjI3-VG9waWNz",
"f021ef0a-e3a5-3530-9394-21a1eaf9f3f6",
"QnJhbmRzXzExNg==-QnJhbmRz",
"6da31a37-691f-4908-896f-2829ebe2309e",
Expand All @@ -79,15 +76,12 @@
"cmr_subjects_ids": null,
"cmr_topics": [
"Equities",
"Investor activism",
"Markets",
"Companies"
],
"cmr_topics_ids": [
"b7ea3c33-ea8c-432e-bb7e-e3bbc8fdc2bb",
"OTg=-U2VjdGlvbnM=",
"aeada5a9-39cb-4bb6-b795-5baba8acf3fb",
"OWIwMDQ1MTEtOWIxYi00MmEzLWFjOGQtY2VhMDM0MjJlZjI3-VG9waWNz",
"c91b1fad-1097-468b-be82-9a8ff717d54c",
"NzE=-U2VjdGlvbnM=",
"c47f4dfc-6879-4e95-accf-ca8cbe6a1f69",
Expand Down
13 changes: 0 additions & 13 deletions test/testdata/exampleEnrichedContentModel.json
Expand Up @@ -96,19 +96,6 @@
"apiUrl": "http://api.ft.com/things/b7ea3c33-ea8c-432e-bb7e-e3bbc8fdc2bb"
}
},
{
"thing": {
"id": "http://api.ft.com/things/aeada5a9-39cb-4bb6-b795-5baba8acf3fb",
"prefLabel": "Investor activism",
"types": [
"http://www.ft.com/ontology/core/Thing",
"http://www.ft.com/ontology/concept/Concept",
"http://www.ft.com/ontology/Topic"
],
"predicate": "http://www.ft.com/ontology/annotation/about",
"apiUrl": "http://api.ft.com/things/aeada5a9-39cb-4bb6-b795-5baba8acf3fb"
}
},
{
"thing": {
"id": "http://api.ft.com/things/f021ef0a-e3a5-3530-9394-21a1eaf9f3f6",
Expand Down
4 changes: 2 additions & 2 deletions test/testdata/testElasticModel1.json
Expand Up @@ -30,11 +30,11 @@
"model_excerpt": null,
"model_resource_uri": null,
"cmr_primarysection": null,
"cmr_primarytheme": "Tencent Holdings Ltd",
"cmr_primarytheme": "'Îlayh",
"cmr_mediatype": null,
"cmr_metadataupdatetime": null,
"cmr_primarysection_id": null,
"cmr_primarytheme_id": "ZjhiNGI0YjUtOTFjNC00NzY3LTk0NGQtMDEyNGI0ZTdiZTdj-T04=",
"cmr_primarytheme_id": "TnN0ZWluX0dMX0FGVE1fR0xfMTY0OTA2-R0w=",
"cmr_mediatype_id": null,
"cmr_brands": null,
"cmr_brands_ids": null,
Expand Down

0 comments on commit 8168cda

Please sign in to comment.