Skip to content

Commit

Permalink
fix: removed TME IDs encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
peterschubert committed Apr 27, 2017
1 parent 317b9ed commit 97b3733
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 36 deletions.
54 changes: 33 additions & 21 deletions model.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,13 @@ type esContentModel struct {
PublishReference string `json:"publishReference"`
}

type ContentType struct {
type contentType struct {
collection string
format string
category string
}

var contentTypeMap = map[string]ContentType{
var contentTypeMap = map[string]contentType{
"article": {
collection: "FTCom",
format: "Articles",
Expand Down Expand Up @@ -256,9 +256,9 @@ func convertToESContentModel(enrichedContent enrichedContentModel, contentType s
esModel.PublishReference = enrichedContent.Content.PublishReference

for _, annotation := range enrichedContent.Metadata {
tmeID := annotation.Thing.ID
tmeIDs := []string{annotation.Thing.ID}
if len(annotation.Thing.TmeIDs) != 0 {
tmeID = annotation.Thing.TmeIDs[0]
tmeIDs = append(tmeIDs, annotation.Thing.TmeIDs...)
} else {
log.Warnf("Indexing content with uuid %s - TME id missing for concept with id %s, using thing id instead",
&(enrichedContent.Content.UUID), annotation.Thing.ID)
Expand All @@ -267,55 +267,67 @@ func convertToESContentModel(enrichedContent enrichedContentModel, contentType s
switch taxonomy {
case "http://www.ft.com/ontology/organisation/Organisation":
esModel.CmrOrgnames = append(esModel.CmrOrgnames, annotation.Thing.PrefLabel)
esModel.CmrOrgnamesIds = append(esModel.CmrOrgnamesIds, getCmrID(tmeOrganisations, tmeID))
esModel.CmrOrgnamesIds = append(esModel.CmrOrgnamesIds, getCmrID(tmeOrganisations, tmeIDs))
case "http://www.ft.com/ontology/person/Person":
esModel.CmrPeople = append(esModel.CmrPeople, annotation.Thing.PrefLabel)
esModel.CmrPeopleIds = append(esModel.CmrPeopleIds, getCmrID(tmePeople, tmeID))
cmrID := getCmrID(tmePeople, tmeIDs)
if cmrID != annotation.Thing.ID {
esModel.CmrPeople = append(esModel.CmrPeople, annotation.Thing.PrefLabel)
esModel.CmrPeopleIds = append(esModel.CmrPeopleIds, cmrID)
}
if annotation.Thing.Predicate == hasAuthor {
esModel.CmrAuthors = append(esModel.CmrAuthors, annotation.Thing.PrefLabel)
esModel.CmrAuthorsIds = append(esModel.CmrAuthorsIds, getCmrID(tmeAuthors, tmeID))
cmrID := getCmrID(tmeAuthors, tmeIDs)
if cmrID != annotation.Thing.ID {
esModel.CmrAuthors = append(esModel.CmrAuthors, annotation.Thing.PrefLabel)
esModel.CmrAuthorsIds = append(esModel.CmrAuthorsIds, cmrID)
}
}
case "http://www.ft.com/ontology/company/Company":
esModel.CmrCompanynames = append(esModel.CmrCompanynames, annotation.Thing.PrefLabel)
esModel.CmrCompanynamesIds = append(esModel.CmrCompanynamesIds, getCmrID(tmeOrganisations, tmeID))
esModel.CmrCompanynamesIds = append(esModel.CmrCompanynamesIds, getCmrID(tmeOrganisations, tmeIDs))
case "http://www.ft.com/ontology/product/Brand":
esModel.CmrBrands = append(esModel.CmrBrands, annotation.Thing.PrefLabel)
esModel.CmrBrandsIds = append(esModel.CmrBrandsIds, getCmrID(tmeBrands, tmeID))
esModel.CmrBrandsIds = append(esModel.CmrBrandsIds, getCmrID(tmeBrands, tmeIDs))
case "http://www.ft.com/ontology/Subject":
esModel.CmrSubjects = append(esModel.CmrSubjects, annotation.Thing.PrefLabel)
esModel.CmrSubjectsIds = append(esModel.CmrSubjectsIds, getCmrID(tmeSubjects, tmeID))
esModel.CmrSubjectsIds = append(esModel.CmrSubjectsIds, getCmrID(tmeSubjects, tmeIDs))
case "http://www.ft.com/ontology/Section":
esModel.CmrSections = append(esModel.CmrSections, annotation.Thing.PrefLabel)
esModel.CmrSectionsIds = append(esModel.CmrSectionsIds, getCmrID(tmeSections, tmeID))
esModel.CmrSectionsIds = append(esModel.CmrSectionsIds, getCmrID(tmeSections, tmeIDs))
if annotation.Thing.Predicate == primaryClassification {
esModel.CmrPrimarysection = new(string)
*esModel.CmrPrimarysection = annotation.Thing.PrefLabel
esModel.CmrPrimarysectionID = new(string)
*esModel.CmrPrimarysectionID = getCmrID(tmeSections, tmeID)
*esModel.CmrPrimarysectionID = getCmrID(tmeSections, tmeIDs)
}
case "http://www.ft.com/ontology/Topic":
esModel.CmrTopics = append(esModel.CmrTopics, annotation.Thing.PrefLabel)
esModel.CmrTopicsIds = append(esModel.CmrTopicsIds, getCmrID(tmeTopics, tmeID))
esModel.CmrTopicsIds = append(esModel.CmrTopicsIds, getCmrID(tmeTopics, tmeIDs))
if annotation.Thing.Predicate == about {
esModel.CmrPrimarytheme = new(string)
*esModel.CmrPrimarytheme = annotation.Thing.PrefLabel
esModel.CmrPrimarythemeID = new(string)
*esModel.CmrPrimarythemeID = getCmrID(tmeTopics, tmeID)
*esModel.CmrPrimarythemeID = getCmrID(tmeTopics, tmeIDs)
}
case "http://www.ft.com/ontology/Location":
esModel.CmrRegions = append(esModel.CmrRegions, annotation.Thing.PrefLabel)
esModel.CmrRegionsIds = append(esModel.CmrRegionsIds, getCmrID(tmeRegions, tmeID))
esModel.CmrRegionsIds = append(esModel.CmrRegionsIds, getCmrID(tmeRegions, tmeIDs))
case "http://www.ft.com/ontology/Genre":
esModel.CmrGenres = append(esModel.CmrGenres, annotation.Thing.PrefLabel)
esModel.CmrGenreIds = append(esModel.CmrGenreIds, getCmrID(tmeGenres, tmeID))
esModel.CmrGenreIds = append(esModel.CmrGenreIds, getCmrID(tmeGenres, tmeIDs))
case "http://www.ft.com/ontology/SpecialReport":
esModel.CmrSpecialreports = append(esModel.CmrSpecialreports, annotation.Thing.PrefLabel)
esModel.CmrSpecialreportsIds = append(esModel.CmrSpecialreportsIds, getCmrID(tmeSpecialReports, tmeID))
esModel.CmrSpecialreportsIds = append(esModel.CmrSpecialreportsIds, getCmrID(tmeSpecialReports, tmeIDs))
}
}
}
return esModel
}
func getCmrID(taxonomy string, tmeID string) string {
return base64.StdEncoding.EncodeToString([]byte(tmeID)) + "-" + base64.StdEncoding.EncodeToString([]byte(taxonomy))
func getCmrID(taxonomy string, tmeIDs []string) string {
encodedTaxonomy := base64.StdEncoding.EncodeToString([]byte(taxonomy))
for _, tmeID := range tmeIDs {
if strings.HasSuffix(tmeID, encodedTaxonomy) {
return tmeID
}
}
return tmeIDs[0]
}
3 changes: 2 additions & 1 deletion model_mapping_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,6 @@ func TestConvertToESContentModel(t *testing.T) {

func TestCmrID(t *testing.T) {
assert := assert.New(t)
assert.Equal("NzE0ZThkZGItNDAyMC00MDRjLTlkNzMtY2I5MzRmZDVhOWM2-T04=", getCmrID("ON", "714e8ddb-4020-404c-9d73-cb934fd5a9c6"), "Wrong CMR ID encoding")
assert.Equal("NzE0ZThkZGItNDAyMC00MDRjLTlkNzMtY2I5MzRmZDVhOWM2-T04=",
getCmrID("ON", []string{"YzcxMTcyNGYtMzQyZC00ZmU2LTk0ZGYtYWI2Y2YxMDMwMTQy-QXV0aG9ycw==", "NzE0ZThkZGItNDAyMC00MDRjLTlkNzMtY2I5MzRmZDVhOWM2-T04="}), "Wrong CMR ID encoding")
}
20 changes: 16 additions & 4 deletions testdata/exampleElasticModel.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,28 @@
"cmr_subjects_ids": ["YWU2Zjg3ODMtNjNhNS00ZjUwLTg3MDktZTVkZjU1NDJjOWNj-U3ViamVjdHM="],
"cmr_topics": ["Life"],
"cmr_topics_ids": ["MWI3OGNhMzctZGMzYi00OTE3LWFhMGMtNGIwZTg4ZGJmZjc5-VG9waWNz"],
"cmr_people": ["John Doe"],
"cmr_people_ids": ["NzZlY2VlNTItOTk3ZC00M2EwLTljMzItNDlmMDQxNzlkZDI0-UE4="],
"cmr_people": [
"Joe Smith",
"Martin Testerman"
],
"cmr_people_ids": [
"NmFmNDU4NWQtZDBkYS00YjEyLWEwMmYtOWE0YTYwYTA2ZDRj-UE4=",
"NGI4YWNhOTktOTc0Ny00MTdiLTkzNzMtZTUwMDM4MGIyODdh-UE4="
],
"cmr_regions": ["Europe"],
"cmr_regions_ids": ["MjA1NWUzN2YtZmIzMC00ZTU2LWEwNTMtYTQ4NDQ2NDFlMzI3-R0w="],
"cmr_icb": null,
"cmr_icb_ids": null,
"cmr_iptc": null,
"cmr_iptc_ids": null,
"cmr_authors_ids": ["NzZlY2VlNTItOTk3ZC00M2EwLTljMzItNDlmMDQxNzlkZDI0-QXV0aG9ycw=="],
"cmr_authors": ["John Doe"],
"cmr_authors_ids": [
"NzZlY2VlNTItOTk3ZC00M2EwLTljMzItNDlmMDQxNzlkZDI0-QXV0aG9ycw==",
"YzcxMTcyNGYtMzQyZC00ZmU2LTk0ZGYtYWI2Y2YxMDMwMTQy-QXV0aG9ycw=="
],
"cmr_authors": [
"John Doe",
"Joe Smith"
],
"cmr_companynames": [
"Pomegranate",
"ACME Corp"
Expand Down
82 changes: 72 additions & 10 deletions testdata/exampleEnrichedContentModel.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
],
"predicate": "isClassifiedBy",
"tmeIDs": [
"05d58033-0456-4ee7-9408-0fef202a0bf7"
"MDVkNTgwMzMtMDQ1Ni00ZWU3LTk0MDgtMGZlZjIwMmEwYmY3-T04="
]
},
"provenances": [
Expand Down Expand Up @@ -63,7 +63,7 @@
"http://www.ft.com/ontology/concept/Concept"
],
"tmeIDs": [
"a356b264-ab94-4453-8155-44638d14a4f0"
"YTM1NmIyNjQtYWI5NC00NDUzLTgxNTUtNDQ2MzhkMTRhNGYw-T04="
]
},
"provenances": [
Expand Down Expand Up @@ -94,7 +94,69 @@
"http://www.ft.com/ontology/concept/Concept"
],
"tmeIDs": [
"76ecee52-997d-43a0-9c32-49f04179dd24"
"NzZlY2VlNTItOTk3ZC00M2EwLTljMzItNDlmMDQxNzlkZDI0-QXV0aG9ycw=="
]
},
"provenances": [
{
"scores": [
{
"scoringSystem": "http://api.ft.com/scoringsystem/FT-RELEVANCE-SYSTEM",
"value": 0.375
},
{
"scoringSystem": "http://api.ft.com/scoringsystem/FT-CONFIDENCE-SYSTEM",
"value": 0.9996836123273414
}
],
"atTime": "2016-01-20T19:43:47.314Z",
"agentRole": "http://api.ft.com/things/286b1285-737b-4fac-87dc-30a37575df80"
}
]
},
{
"thing": {
"id": "http://api.ft.com/things/bec5a464-bdf8-4f5a-95f0-8e2ea253c589",
"prefLabel": "Joe Smith",
"predicate": "hasAuthor",
"types": [
"http://www.ft.com/ontology/person/Person",
"http://www.ft.com/ontology/core/Thing",
"http://www.ft.com/ontology/concept/Concept"
],
"tmeIDs": [
"NmFmNDU4NWQtZDBkYS00YjEyLWEwMmYtOWE0YTYwYTA2ZDRj-UE4=",
"YzcxMTcyNGYtMzQyZC00ZmU2LTk0ZGYtYWI2Y2YxMDMwMTQy-QXV0aG9ycw=="
]
},
"provenances": [
{
"scores": [
{
"scoringSystem": "http://api.ft.com/scoringsystem/FT-RELEVANCE-SYSTEM",
"value": 0.375
},
{
"scoringSystem": "http://api.ft.com/scoringsystem/FT-CONFIDENCE-SYSTEM",
"value": 0.9996836123273414
}
],
"atTime": "2016-01-20T19:43:47.314Z",
"agentRole": "http://api.ft.com/things/286b1285-737b-4fac-87dc-30a37575df80"
}
]
},
{
"thing": {
"id": "http://api.ft.com/things/bec5a464-bdf8-4f5a-95f0-8e2ea253c589",
"prefLabel": "Martin Testerman",
"types": [
"http://www.ft.com/ontology/person/Person",
"http://www.ft.com/ontology/core/Thing",
"http://www.ft.com/ontology/concept/Concept"
],
"tmeIDs": [
"NGI4YWNhOTktOTc0Ny00MTdiLTkzNzMtZTUwMDM4MGIyODdh-UE4="
]
},
"provenances": [
Expand Down Expand Up @@ -124,7 +186,7 @@
"http://www.ft.com/ontology/concept/Concept"
],
"tmeIDs": [
"f99e9429-09c5-48c5-b018-6cab93a2c003"
"Zjk5ZTk0MjktMDljNS00OGM1LWIwMTgtNmNhYjkzYTJjMDAz-QnJhbmRz"
]
},
"provenances": [
Expand Down Expand Up @@ -154,7 +216,7 @@
"http://www.ft.com/ontology/concept/Concept"
],
"tmeIDs": [
"ae6f8783-63a5-4f50-8709-e5df5542c9cc"
"YWU2Zjg3ODMtNjNhNS00ZjUwLTg3MDktZTVkZjU1NDJjOWNj-U3ViamVjdHM="
]
},
"provenances": [
Expand Down Expand Up @@ -185,7 +247,7 @@
"http://www.ft.com/ontology/concept/Concept"
],
"tmeIDs": [
"1554cc6e-f981-4fda-8eba-02cb26fd70f9"
"MTU1NGNjNmUtZjk4MS00ZmRhLThlYmEtMDJjYjI2ZmQ3MGY5-U2VjdGlvbnM="
]
},
"provenances": [
Expand Down Expand Up @@ -216,7 +278,7 @@
"http://www.ft.com/ontology/concept/Concept"
],
"tmeIDs": [
"1b78ca37-dc3b-4917-aa0c-4b0e88dbff79"
"MWI3OGNhMzctZGMzYi00OTE3LWFhMGMtNGIwZTg4ZGJmZjc5-VG9waWNz"
]
},
"provenances": [
Expand Down Expand Up @@ -246,7 +308,7 @@
"http://www.ft.com/ontology/concept/Concept"
],
"tmeIDs": [
"2055e37f-fb30-4e56-a053-a4844641e327"
"MjA1NWUzN2YtZmIzMC00ZTU2LWEwNTMtYTQ4NDQ2NDFlMzI3-R0w="
]
},
"provenances": [
Expand Down Expand Up @@ -276,7 +338,7 @@
"http://www.ft.com/ontology/concept/Concept"
],
"tmeIDs": [
"44b1d0db-da34-45bf-9b6a-df9d33df6dee"
"NDRiMWQwZGItZGEzNC00NWJmLTliNmEtZGY5ZDMzZGY2ZGVl-R2VucmVz"
]
},
"provenances": [
Expand Down Expand Up @@ -306,7 +368,7 @@
"http://www.ft.com/ontology/concept/Concept"
],
"tmeIDs": [
"3afa56fc-1004-4f39-b9e4-72bed5f1fb88"
"M2FmYTU2ZmMtMTAwNC00ZjM5LWI5ZTQtNzJiZWQ1ZjFmYjg4-U3BlY2lhbFJlcG9ydHM="
]
},
"provenances": [
Expand Down

0 comments on commit 97b3733

Please sign in to comment.