diff --git a/content_indexer.go b/content_indexer.go index c692cdc..162c57a 100644 --- a/content_indexer.go +++ b/content_indexer.go @@ -20,16 +20,18 @@ const ( transactionIDHeader = "X-Request-Id" blogsAuthority = "http://api.ft.com/system/FT-LABS-WP" articleAuthority = "http://api.ft.com/system/FTCOM-METHODE" - videoAuthority = "http://api.ft.com/system/BRIGHTCOVE" + videoAuthority = "http://api.ft.com/system/NEXT-VIDEO-EDITOR" originHeader = "Origin-System-Id" methodeOrigin = "methode-web-pub" wordpressOrigin = "wordpress" - brightcoveOrigin = "brightcove" + videoOrigin = "next-video-editor" blogPostType = "blogPost" articleType = "article" videoType = "video" ) +var allowedTypes = []string{"Article", "Video"} + type contentIndexer struct { esServiceInstance esServiceI server *http.Server @@ -142,6 +144,11 @@ func (indexer *contentIndexer) handleMessage(msg consumer.Message) { return } + if !contains(allowedTypes, combinedPostPublicationEvent.Content.Type) { + log.Infof("[%s] Ignoring message of type %s", tid, combinedPostPublicationEvent.Content.Type) + return + } + uuid := combinedPostPublicationEvent.Content.UUID log.Printf("[%s] Processing combined post publication event for uuid [%s]", tid, uuid) @@ -163,7 +170,7 @@ func (indexer *contentIndexer) handleMessage(msg consumer.Message) { contentType = articleType } else if strings.Contains(origin, wordpressOrigin) { contentType = blogPostType - } else if strings.Contains(origin, brightcoveOrigin) { + } else if strings.Contains(origin, videoOrigin) { contentType = videoType } else { log.Errorf("[%s] Failed to index content with UUID %s. Could not infer type of content.", tid, uuid) @@ -187,3 +194,12 @@ func (indexer *contentIndexer) handleMessage(msg consumer.Message) { } } } + +func contains(list []string, elem string) bool { + for _, a := range list { + if a == elem { + return true + } + } + return false +} diff --git a/content_indexer_test.go b/content_indexer_test.go index c0e6684..bcf0ab2 100644 --- a/content_indexer_test.go +++ b/content_indexer_test.go @@ -232,7 +232,7 @@ func TestHandleWriteMessageVideo(t *testing.T) { inputJSON, err := ioutil.ReadFile("testdata/exampleEnrichedContentModel.json") assert.NoError(err, "Unexpected error") - input := strings.Replace(string(inputJSON), "FTCOM-METHODE", "BRIGHTCOVE", 1) + input := strings.Replace(string(inputJSON), "FTCOM-METHODE", "NEXT-VIDEO-EDITOR", 1) serviceMock := &esServiceMock{} @@ -244,6 +244,23 @@ func TestHandleWriteMessageVideo(t *testing.T) { serviceMock.AssertExpectations(t) } +func TestHandleWriteMessageUnknownType(t *testing.T) { + assert := assert.New(t) + + inputJSON, err := ioutil.ReadFile("testdata/exampleEnrichedContentModel.json") + assert.NoError(err, "Unexpected error") + input := strings.Replace(string(inputJSON), `"Article"`, `"Content"`, 1) + + serviceMock := &esServiceMock{} + + indexer := contentIndexer{esServiceInstance: serviceMock} + indexer.handleMessage(consumer.Message{Body: input}) + + serviceMock.AssertNotCalled(t, "writeData", mock.Anything, "aae9611e-f66c-4fe4-a6c6-2e2bdea69060", mock.Anything) + serviceMock.AssertNotCalled(t, "deleteData", mock.Anything, "aae9611e-f66c-4fe4-a6c6-2e2bdea69060") + serviceMock.AssertExpectations(t) +} + func TestHandleWriteMessageNoType(t *testing.T) { assert := assert.New(t) diff --git a/model.go b/model.go index d9b4969..ec48ab6 100644 --- a/model.go +++ b/model.go @@ -30,7 +30,7 @@ const ( type enrichedContentModel struct { UUID string `json:"uuid"` Content contentModel `json:"content"` - Metadata annotations `json:"v1-metadata"` + Metadata annotations `json:"metadata"` } type contentModel struct { @@ -47,6 +47,7 @@ type contentModel struct { Description string `json:"description"` MainImage string `json:"mainImage"` PublishReference string `json:"publishReference"` + Type string `json:"type"` } type identifier struct { diff --git a/model_mapping_test.go b/model_mapping_test.go index 5442bad..521d61b 100644 --- a/model_mapping_test.go +++ b/model_mapping_test.go @@ -17,6 +17,8 @@ func TestConvertToESContentModel(t *testing.T) { }{ {"testdata/exampleEnrichedContentModel.json", "testdata/exampleElasticModel.json"}, {"testdata/testInput1.json", "testdata/testOutput1.json"}, + {"testdata/testInput2.json", "testdata/testOutput2.json"}, + {"testdata/testInput3.json", "testdata/testOutput3.json"}, } for _, test := range tests { diff --git a/testdata/exampleEnrichedContentModel.json b/testdata/exampleEnrichedContentModel.json index 212e606..9b14528 100644 --- a/testdata/exampleEnrichedContentModel.json +++ b/testdata/exampleEnrichedContentModel.json @@ -17,9 +17,10 @@ "description": null, "mainImage": "7d696608-e032-4c61-a977-bdcfceb64291", "publishReference": "tid_f7k7nexpop", - "lastModified": "2017-02-01T03:57:55.505Z" + "lastModified": "2017-02-01T03:57:55.505Z", + "type": "Article" }, - "v1-metadata": [ + "metadata": [ { "thing": { "id": "http://api.ft.com/things/173f9769-2085-48e7-9927-3a71366f2ea3", diff --git a/testdata/testInput1.json b/testdata/testInput1.json index a72815e..115f969 100644 --- a/testdata/testInput1.json +++ b/testdata/testInput1.json @@ -19,9 +19,10 @@ "standfirst": "standfirst", "description": "description", "mainImage": "", - "publishReference": "tid_riega1hr5w" + "publishReference": "tid_riega1hr5w", + "type": "Article" }, - "v1-metadata": [ + "metadata": [ { "thing": { "id": "http://api.ft.com/things/bb923fea-82df-3d33-8fe1-e52292721258", diff --git a/testdata/testInput2.json b/testdata/testInput2.json new file mode 100644 index 0000000..7d870f2 --- /dev/null +++ b/testdata/testInput2.json @@ -0,0 +1,90 @@ +{ + "uuid": "62c53650-68d6-4ed9-9932-e76c53bebd35", + "content": { + "uuid": "62c53650-68d6-4ed9-9932-e76c53bebd35", + "title": "test title", + "body": "test body", + "identifiers": [ + { + "authority": "http://api.ft.com/system/FTCOM-METHODE", + "identifierValue": "62c53650-68d6-4ed9-9932-e76c53bebd35" + } + ], + "publishedDate": "", + "lastModified": "", + "firstPublishedDate": "", + "mediaType": "", + "marked_deleted": false, + "byline": "John Doe in London", + "standfirst": "standfirst", + "description": "description", + "mainImage": "", + "publishReference": "tid_riega1hr5w", + "type": "Article" + }, + "metadata": [ + { + "thing": { + "id": "http://api.ft.com/things/bec5a464-bdf8-4f5a-95f0-8e2ea253c589", + "prefLabel": "Martin Testerman", + "predicate": "http://www.ft.com/ontology/annotation/about", + "types": [ + "http://www.ft.com/ontology/person/Person", + "http://www.ft.com/ontology/core/Thing", + "http://www.ft.com/ontology/concept/Concept" + ], + "tmeIDs": [ + "NGI4YWNhOTktOTc0Ny00MTdiLTkzNzMtZTUwMDM4MGIyODdh-UE4=" + ] + }, + "provenances": [ + { + "scores": [ + { + "scoringSystem": "http://api.ft.com/scoringsystem/FT-RELEVANCE-SYSTEM", + "value": 0.375 + }, + { + "scoringSystem": "http://api.ft.com/scoringsystem/FT-CONFIDENCE-SYSTEM", + "value": 0.9996836123273414 + } + ], + "atTime": "2016-01-20T19:43:47.314Z", + "agentRole": "http://api.ft.com/things/286b1285-737b-4fac-87dc-30a37575df80" + } + ] + }, + { + "thing": { + "id": "http://api.ft.com/things/1d216512-3d7c-4757-bb25-824f3a21170b", + "prefLabel": "Special", + "predicate": "http://www.ft.com/ontology/classification/isPrimarilyClassifiedBy", + "types": [ + "http://www.ft.com/ontology/SpecialReport", + "http://www.ft.com/ontology/core/Thing", + "http://www.ft.com/ontology/concept/Concept" + ], + "apiUrl": "http://api.ft.com/things/1d216512-3d7c-4757-bb25-824f3a21170b", + "tmeIDs": [ + "M2FmYTU2ZmMtMTAwNC00ZjM5LWI5ZTQtNzJiZWQ1ZjFmYjg4-U3BlY2lhbFJlcG9ydHM=" + ] + }, + "provenances": [ + { + "scores": [ + { + "scoringSystem": "http://api.ft.com/scoringsystem/FT-RELEVANCE-SYSTEM", + "value": 0.375 + }, + { + "scoringSystem": "http://api.ft.com/scoringsystem/FT-CONFIDENCE-SYSTEM", + "value": 0.9996836123273414 + } + ], + "atTime": "2016-01-20T19:43:47.314Z", + "agentRole": "http://api.ft.com/things/286b1285-737b-4fac-87dc-30a37575df80" + } + ] + } + ] +} \ No newline at end of file diff --git a/testdata/testInput3.json b/testdata/testInput3.json new file mode 100644 index 0000000..ff7adb2 --- /dev/null +++ b/testdata/testInput3.json @@ -0,0 +1,55 @@ +{ + "uuid": "b17756fe-0f62-4cf1-9deb-ca7a2ff80172", + "content": { + "uuid": "b17756fe-0f62-4cf1-9deb-ca7a2ff80172", + "title": "test title", + "body": "test body", + "identifiers": [ + { + "authority": "http://api.ft.com/system/FTCOM-METHODE", + "identifierValue": "b17756fe-0f62-4cf1-9deb-ca7a2ff80172" + } + ], + "publishedDate": "", + "lastModified": "", + "firstPublishedDate": "", + "mediaType": "", + "marked_deleted": false, + "byline": "John Doe in London", + "standfirst": "standfirst", + "description": "description", + "mainImage": "", + "publishReference": "tid_riega1hr5w", + "type": "Article" + }, + "metadata": [ + { + "thing": { + "id": "http://api.ft.com/things/173f9769-2085-48e7-9927-3a71366f2ea3", + "prefLabel": "Pomegranate", + "types": [ + "http://www.ft.com/ontology/organisation/Organisation", + "http://www.ft.com/ontology/core/Thing", + "http://www.ft.com/ontology/concept/Concept" + ], + "predicate": "http://www.ft.com/ontology/annotation/about" + }, + "provenances": [ + { + "scores": [ + { + "scoringSystem": "http://api.ft.com/scoringsystem/FT-RELEVANCE-SYSTEM", + "value": 1 + }, + { + "scoringSystem": "http://api.ft.com/scoringsystem/FT-CONFIDENCE-SYSTEM", + "value": 0.9932743203464962 + } + ], + "atTime": "2016-01-20T19:43:47.314Z", + "agentRole": "http://api.ft.com/things/286b1285-737b-4fac-87dc-30a37575df80" + } + ] + } + ] +} \ No newline at end of file diff --git a/testdata/testOutput2.json b/testdata/testOutput2.json new file mode 100644 index 0000000..2cf79ee --- /dev/null +++ b/testdata/testOutput2.json @@ -0,0 +1,91 @@ +{ + "uid": "62c53650-68d6-4ed9-9932-e76c53bebd35", + "last_metadata_publish": null, + "index_date": null, + "mark_deleted": false, + "story_id": null, + "lead_headline": "test title", + "byline": "John Doe in London", + "body": "test body", + "url": "https://www.ft.com/content/62c53650-68d6-4ed9-9932-e76c53bebd35", + "initial_publish": null, + "last_publish": null, + "content_type": "article", + "provider_name": null, + "length_millis": 0, + "short_description": null, + "thumbnail_url": null, + "section_link": null, + "secondary_image_id": null, + "contributor_rights": null, + "source_code": null, + "storymodel_id": null, + "model_api_url": null, + "model_master_source": null, + "model_master_id": null, + "model_excerpt": null, + "model_resource_uri": null, + "cmr_primarysection": "Special", + "cmr_primarytheme": "Martin Testerman", + "cmr_mediatype": null, + "cmr_metadataupdatetime": null, + "cmr_primarysection_id": "M2FmYTU2ZmMtMTAwNC00ZjM5LWI5ZTQtNzJiZWQ1ZjFmYjg4-U3BlY2lhbFJlcG9ydHM=", + "cmr_primarytheme_id": "NGI4YWNhOTktOTc0Ny00MTdiLTkzNzMtZTUwMDM4MGIyODdh-UE4=", + "cmr_mediatype_id": null, + "cmr_brands": null, + "cmr_brands_ids": null, + "cmr_specialreports": [ + "Special" + ], + "cmr_specialreports_ids": [ + "M2FmYTU2ZmMtMTAwNC00ZjM5LWI5ZTQtNzJiZWQ1ZjFmYjg4-U3BlY2lhbFJlcG9ydHM=" + ], + "cmr_sections": null, + "cmr_sections_ids": null, + "cmr_subjects": null, + "cmr_subjects_ids": null, + "cmr_topics": null, + "cmr_topics_ids": null, + "cmr_people": [ + "Martin Testerman" + ], + "cmr_people_ids": [ + "NGI4YWNhOTktOTc0Ny00MTdiLTkzNzMtZTUwMDM4MGIyODdh-UE4=" + ], + "cmr_regions": null, + "cmr_regions_ids": null, + "cmr_icb": null, + "cmr_icb_ids": null, + "cmr_iptc": null, + "cmr_iptc_ids": null, + "cmr_authors_ids": null, + "cmr_authors": null, + "cmr_companynames": null, + "cmr_companynames_ids": null, + "cmr_orgnames": null, + "cmr_orgnames_ids": null, + "bestStory": false, + "internalContentType": "article", + "category": "article", + "lookupFailure": false, + "format": "Articles", + "cmr_genre": null, + "cmr_genre_id": null, + "region": null, + "topics": null, + "displayCodes": null, + "displayCodeNames": null, + "naicsNames": null, + "editorsTags": null, + "countryCodes": null, + "countryNames": null, + "subjects": null, + "companyNamesAuto": null, + "organisationNamesAuto": null, + "companyNamesEditorial": null, + "companyTickerCodeAuto": null, + "companyTickerCodeEditorial": null, + "articleTypes": null, + "articleBrands": null, + "publishReference": "tid_riega1hr5w" +} \ No newline at end of file diff --git a/testdata/testOutput3.json b/testdata/testOutput3.json new file mode 100644 index 0000000..10e789e --- /dev/null +++ b/testdata/testOutput3.json @@ -0,0 +1,87 @@ +{ + "uid": "b17756fe-0f62-4cf1-9deb-ca7a2ff80172", + "last_metadata_publish": null, + "index_date": null, + "mark_deleted": false, + "story_id": null, + "lead_headline": "test title", + "byline": "John Doe in London", + "body": "test body", + "url": "https://www.ft.com/content/b17756fe-0f62-4cf1-9deb-ca7a2ff80172", + "initial_publish": null, + "last_publish": null, + "content_type": "article", + "provider_name": null, + "length_millis": 0, + "short_description": null, + "thumbnail_url": null, + "section_link": null, + "secondary_image_id": null, + "contributor_rights": null, + "source_code": null, + "storymodel_id": null, + "model_api_url": null, + "model_master_source": null, + "model_master_id": null, + "model_excerpt": null, + "model_resource_uri": null, + "cmr_primarysection": null, + "cmr_primarytheme": "Pomegranate", + "cmr_mediatype": null, + "cmr_metadataupdatetime": null, + "cmr_primarysection_id": null, + "cmr_primarytheme_id": "http://api.ft.com/things/173f9769-2085-48e7-9927-3a71366f2ea3", + "cmr_mediatype_id": null, + "cmr_brands": null, + "cmr_brands_ids": null, + "cmr_specialreports": null, + "cmr_specialreports_ids": null, + "cmr_sections": null, + "cmr_sections_ids": null, + "cmr_subjects": null, + "cmr_subjects_ids": null, + "cmr_topics": null, + "cmr_topics_ids": null, + "cmr_people": null, + "cmr_people_ids": null, + "cmr_regions": null, + "cmr_regions_ids": null, + "cmr_icb": null, + "cmr_icb_ids": null, + "cmr_iptc": null, + "cmr_iptc_ids": null, + "cmr_authors_ids": null, + "cmr_authors": null, + "cmr_companynames": null, + "cmr_companynames_ids": null, + "cmr_orgnames": [ + "Pomegranate" + ], + "cmr_orgnames_ids": [ + "http://api.ft.com/things/173f9769-2085-48e7-9927-3a71366f2ea3" + ], + "bestStory": false, + "internalContentType": "article", + "category": "article", + "lookupFailure": false, + "format": "Articles", + "cmr_genre": null, + "cmr_genre_id": null, + "region": null, + "topics": null, + "displayCodes": null, + "displayCodeNames": null, + "naicsNames": null, + "editorsTags": null, + "countryCodes": null, + "countryNames": null, + "subjects": null, + "companyNamesAuto": null, + "organisationNamesAuto": null, + "companyNamesEditorial": null, + "companyTickerCodeAuto": null, + "companyTickerCodeEditorial": null, + "articleTypes": null, + "articleBrands": null, + "publishReference": "tid_riega1hr5w" +} \ No newline at end of file