Skip to content
This repository has been archived by the owner on Oct 24, 2022. It is now read-only.

Commit

Permalink
Detects unhanded collections in OAI feed
Browse files Browse the repository at this point in the history
If any Collections are detected in an OAI Harvest that are not handled,
an email is sent to sysadmins (this may want to change eventually to
instead email the appropriate Analysts but for now sysadmins works well
for us).

This also adds a feature to create Inactive Collections which is
essentially stating you have no interest in the Collection which will
prevent notification when it is detected again on a future Harvest.

closes #81
  • Loading branch information
JPrevost committed Jun 18, 2015
1 parent 55ed2a9 commit 790eca7
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 33 deletions.
5 changes: 3 additions & 2 deletions app/controllers/Application.scala
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,8 @@ object Application extends Controller with Security {
"policy" -> nonEmptyText,
"created" -> ignored(new Date),
"updated" -> ignored(new Date),
"deposits" -> ignored(0)
"deposits" -> ignored(0),
"active" -> boolean
)(Collection.apply)(Collection.unapply)
)

Expand All @@ -349,7 +350,7 @@ object Application extends Controller with Security {
collForm.bindFromRequest.fold(
errors => BadRequest(views.html.collection.create(pub, errors)),
value => {
val coll = Collection.make(id, value.ctypeId, value.resmapId, value.tag, value.description, value.policy)
val coll = Collection.make(id, value.ctypeId, value.resmapId, value.tag, value.description, value.policy, value.active)
Redirect(routes.Application.publisher(id))
}
)
Expand Down
35 changes: 24 additions & 11 deletions app/models/Collection.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ case class Collection(id: Int,
policy: String,
created: Date,
updated: Date,
deposits: Int) {
deposits: Int,
active: Boolean) {

def recordDeposit {
val newDep = deposits + 1
Expand All @@ -46,9 +47,9 @@ object Collection {
val coll = {
get[Int]("id") ~ get[Int]("publisher_id") ~ get[Int]("content_type_id") ~ get[Int]("resource_map_id") ~
get[String]("tag") ~ get[String]("description") ~ get[String]("policy") ~
get[Date]("created") ~ get[Date]("updated") ~ get[Int]("deposits") map {
case id ~ publisherId ~ ctypeId ~ resmapId ~ tag ~ description ~ policy ~ created ~ updated ~ deposits =>
Collection(id, publisherId, ctypeId, resmapId, tag, description, policy, created, updated, deposits)
get[Date]("created") ~ get[Date]("updated") ~ get[Int]("deposits") ~get[Boolean]("active") map {
case id ~ publisherId ~ ctypeId ~ resmapId ~ tag ~ description ~ policy ~ created ~ updated ~ deposits ~ active =>
Collection(id, publisherId, ctypeId, resmapId, tag, description, policy, created, updated, deposits, active)
}
}

Expand All @@ -64,9 +65,13 @@ object Collection {
}
}

def findByTag(tag: String): Option[Collection] = {
def findByTag(tag: String, active: Boolean = true): Option[Collection] = {
DB.withConnection { implicit c =>
SQL("select * from collection where tag = {tag}").on('tag -> tag).as(coll.singleOpt)
SQL("""
SELECT * FROM collection
WHERE tag = {tag}
AND active = {active}
""").on('tag -> tag, 'active -> active).as(coll.singleOpt)
}
}

Expand All @@ -76,16 +81,24 @@ object Collection {
}
}

def create(publisherId: Int, ctypeId: Int, resmapId: Int, tag: String, description: String, policy: String) = {
def create(publisherId: Int, ctypeId: Int, resmapId: Int, tag: String, description: String, policy: String, active: Boolean = true) = {
val created = new Date
val updated = created
DB.withConnection { implicit c =>
SQL("insert into collection (publisher_id, content_type_id, resource_map_id, tag, description, policy, created, updated, deposits) values ({publisher_id}, {ctype_id}, {resmap_id}, {tag}, {description}, {policy}, {created}, {updated}, {deposits})")
.on('publisher_id -> publisherId, 'ctype_id -> ctypeId, 'resmap_id -> resmapId, 'tag -> tag, 'description -> description, 'policy -> policy, 'created -> created, 'updated -> updated, 'deposits -> 0).executeInsert()
SQL("""
insert into collection (publisher_id, content_type_id, resource_map_id, tag,
description, policy, created, updated, deposits, active)
values ({publisher_id}, {ctype_id}, {resmap_id}, {tag}, {description}, {policy},
{created}, {updated}, {deposits}, {active})
""").on('publisher_id -> publisherId, 'ctype_id -> ctypeId, 'resmap_id -> resmapId,
'tag -> tag, 'description -> description, 'policy -> policy,
'created -> created, 'updated -> updated, 'deposits -> 0, 'active -> active)
.executeInsert()
}
}

def make(publisherId: Int, ctypeId: Int, resmapId: Int, tag: String, description: String, policy: String): Collection = {
findById(create(publisherId, ctypeId, resmapId, tag, description, policy).get.toInt).get
def make(publisherId: Int, ctypeId: Int, resmapId: Int, tag: String, description: String,
policy: String, active: Boolean = true): Collection = {
findById(create(publisherId, ctypeId, resmapId, tag, description, policy, active).get.toInt).get
}
}
1 change: 1 addition & 0 deletions app/views/collection/create.scala.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
@inputText(collForm("tag"))
@textarea(collForm("description"))
@inputText(collForm("policy"))
@select(collForm("active"), options(Map("true" -> "true", "false" -> "false")))
<input id="submit" type="submit" value="Create">
}
</div>
Expand Down
22 changes: 22 additions & 0 deletions app/views/email/unhandled_collections.scala.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
@*****************************************************************************
* Email template used to notify any unhandled collections in a harvest *
* Copyright (c) 2015 MIT Libraries *
*****************************************************************************@
@(harvest: Harvest, collection: List[String])
The following collections were not handled during a Harvest:

Harvest: @harvest.name
Publisher: @harvest.publisher.get.name
Link to Harvest: http://scoap3.topichub.org/harvest/@harvest.id

Unhandled Collections:
@collection.map {c =>
* @c
}

You should add this Collection to the Publisher.

If you are not interested in a Collection listed above, you can stop
receiving these notices by adding it as an Inactive Collection.

If you are interested in a Collection, you should add it as an active Collection.
23 changes: 13 additions & 10 deletions app/workers/Cataloger.scala
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class Cataloger(resmap: ResourceMap, content: StoredContent) {
val (idHits, lblHits) = findValues(Finder.forSchemeAndFormat(scheme.id, format), source)
// add cardinality checking here
var idx = 0
println("IDHits size: " + idHits.size)
// println("IDHits size: " + idHits.size)
for (id <- idHits) {
// check for and utilize existing topics
val topic = Topic.forSchemeAndTag(scheme.tag, id).getOrElse(createTopic(scheme, id, lblHits(idx)))
Expand All @@ -68,13 +68,13 @@ class Cataloger(resmap: ResourceMap, content: StoredContent) {
var lblHits: Seq[String] = null
if (doc != null) {
// do Id & label
println("in process about to evaluate: " + finder.idKey)
// println("in process about to evaluate: " + finder.idKey)
var keyParts = finder.idKey.split(" ")
// new way
println("keyParts0: " + keyParts(0))
// println("keyParts0: " + keyParts(0))
val xp = new ScalesXPath(keyParts(0)).withNameConversion(ScalesXPath.localOnly)
val hits = xp.evaluate(top(doc))
println("Post eval num hits: " + hits.size)
// println("Post eval num hits: " + hits.size)
if (hits.size > 0) {
if (keyParts.length == 2) {
val regX = keyParts(1).r
Expand Down Expand Up @@ -102,16 +102,16 @@ class Cataloger(resmap: ResourceMap, content: StoredContent) {
}
}
// also stow in infoCache
idHits.foreach(println)
//idHits.foreach(println)
//infoCache += ("id" -> idHits)
if (idHits.size > 0) {
val idl = finder.idLabel
// if idl is an XPath, evaluate it
if (idl != null && idl.length > 0 && idl.indexOf("/") >= 0) {
println("in process about to evaluate label: " + idl)
// println("in process about to evaluate label: " + idl)
lblHits = xpathFind(idl, doc)
} else if (idl != null && idl.length > 0) {
println("process filtered value; " + filteredValue(idl, 0))
// println("process filtered value; " + filteredValue(idl, 0))
var lblList = List[String]()
var count = 0
for (a <- idHits) {
Expand Down Expand Up @@ -197,7 +197,7 @@ class Cataloger(resmap: ResourceMap, content: StoredContent) {
// is value cached?
var value = infoCache.get(token) match {
case Some(x) =>
println("In filter token: " + token + " index: " + index + " size: " + x.size)
// println("In filter token: " + token + " index: " + index + " size: " + x.size)
x(index)
case _ => null
}
Expand Down Expand Up @@ -232,7 +232,7 @@ class Cataloger(resmap: ResourceMap, content: StoredContent) {

def docToParse(name: String) = {
val fname = filteredValue(name, 0)
println("doc2p: fname: " + fname)
// println("doc2p: fname: " + fname)
// check doc cache first
docCache.get(fname) match {
case Some(x) => x
Expand Down Expand Up @@ -267,9 +267,12 @@ object Cataloger {
val resmap = ResourceMap.findById(coll.resmapId).get
val cataloger = new Cataloger(resmap, Store.content(item))
val ctype = ContentType.findById(item.ctypeId).get

println(s"Cataloging Item: ${item.objKey}")

// start with metadata schemes
ctype.schemes("meta").foreach( sch => {
println("Found scheme:" + sch.tag)
// println("Found scheme:" + sch.tag)
cataloger.metadata(sch, item) }
)
// next topic schemes
Expand Down
21 changes: 20 additions & 1 deletion app/workers/Harvester.scala
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class Harvester {
}

def oaiHarvest(harvest: Harvest) = {
var unhandledCollections = scala.collection.mutable.ListBuffer.empty[String]

def parse(xml: XMLEventReader) = {
var objId: Option[String] = None
Expand All @@ -80,6 +81,16 @@ class Harvester {
case _ => if (oaiDetected == false && counter > 2) { abortHarvest("OAI xml not detected."); break } else { counter = counter + 1 }
}
}
if (unhandledCollections.toList.distinct.size > 0) {
notifyUnhandledCollections(unhandledCollections.toList.distinct)
}
}

def notifyUnhandledCollections(collections: List[String]) = {
val sysadminEmails = User.allByRole("sysadmin").map(x => x.email).mkString(",")
val msg = views.txt.email.unhandled_collections(harvest, collections).body
println(msg)
Emailer.notify(sysadminEmails, "SCOAP3Hub: An unhandled collection was detected", msg)
}

def handleOaiError(errorText: String, errorCode: String) = {
Expand All @@ -94,7 +105,7 @@ class Harvester {
def processItem(objId: Option[String], collectionKey: Option[String]) = {
println("Got OID:" + objId.getOrElse("Unknown") + " in coll: " + collectionKey.getOrElse("Unknown"))
// look up collection, and process if known & item not already created
val collOpt = Collection.findByTag(collectionKey.get);
val collOpt = Collection.findByTag(collectionKey.get)
if (collOpt.isDefined && Item.findByKey(objId.get).isEmpty) {
// create an Item and send to cataloger worker
val coll = collOpt.get
Expand All @@ -104,6 +115,14 @@ class Harvester {
val item = Item.make(coll.id, coll.ctypeId, "remote:" + resUrl, oid)
coll.recordDeposit
Harvester.cataloger ! item
} else if (collOpt.isDefined) {
println("DEBUG: collection is defined but Item is already cataloged")
} else if (Collection.findByTag(collectionKey.get, false).isDefined) {
println(s"DEBUG: Collection is ignored: ${collectionKey.get}")
} else {
// keep track so we can send an email so someone knows a new collection was found
println(s"DEBUG: Collection is not handled: ${collectionKey.get}")
unhandledCollections += collectionKey.get
}
}

Expand Down
18 changes: 9 additions & 9 deletions app/workers/Indexer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ object Indexer {
def reindex(dtype: String) = {
// delete current index type
if (indexSvc.contains("bonsai.io")) {
println("DEBUG: use basic auth for WS elasticsearch call")
// println("DEBUG: use basic auth for WS elasticsearch call")
WS.url(indexSvc + dtype)
.withAuth(extractCredentials("username", indexSvc),
extractCredentials("password", indexSvc),
WSAuthScheme.BASIC).delete()
} else {
println("DEBUG: no auth for WS elasticsearch call")
// println("DEBUG: no auth for WS elasticsearch call")
WS.url(indexSvc + dtype).delete()
}

Expand All @@ -66,16 +66,16 @@ object Indexer {
val jdata = stringify(toJson(data))
val elastic_url = indexSvc.concat("topic/").concat(topic.id.toString)
// debug
println("Topic index: " + jdata)
// println("Topic index: " + jdata)

if (indexSvc.contains("bonsai.io")) {
println("DEBUG: use basic auth for WS elasticsearch call")
// println("DEBUG: use basic auth for WS elasticsearch call")
WS.url(elastic_url)
.withAuth(extractCredentials("username", indexSvc),
extractCredentials("password", indexSvc),
WSAuthScheme.BASIC).put(jdata)
} else {
println("DEBUG: no auth for WS elasticsearch call")
// println("DEBUG: no auth for WS elasticsearch call")
WS.url(elastic_url).put(jdata)
}
}
Expand All @@ -91,17 +91,17 @@ object Indexer {
dataMap += "topicSchemeTag" -> toJson(item.topics.map(_.scheme.tag))
dataMap += "topicTag" -> toJson(item.topics.map(_.tag))
val jdata = stringify(toJson(dataMap))
println("Item index: " + dataMap)
println(indexSvc + "item/" + item.id)
// println("Item index: " + dataMap)
// println(indexSvc + "item/" + item.id)

if (indexSvc.contains("bonsai.io")) {
println("DEBUG: use basic auth for WS elasticsearch call")
// println("DEBUG: use basic auth for WS elasticsearch call")
WS.url(elastic_url)
.withAuth(extractCredentials("username", indexSvc),
extractCredentials("password", indexSvc),
WSAuthScheme.BASIC).put(jdata)
} else {
println("DEBUG: no auth for WS elasticsearch call")
// println("DEBUG: no auth for WS elasticsearch call")
WS.url(elastic_url).put(jdata)
}
}
Expand Down
7 changes: 7 additions & 0 deletions conf/evolutions/default/11.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# --- !Ups

ALTER TABLE collection ADD COLUMN active boolean DEFAULT true;

# --- !Downs

ALTER TABLE collection DROP COLUMN active;
29 changes: 29 additions & 0 deletions test/unit/CollectionSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,35 @@ class CollectionSpec extends Specification {
}
}

"#findByTag only returns active Collections by default" in {
running(FakeApplication(additionalConfiguration = inMemoryDatabase())) {
val u = User.make("bob", "bob@example.com", "pass", "roley")
val ct = ContentType.make("tag", "label", "desc", Some("logo"))
val rm = ResourceMap.make("tag", "desc", Some("swordurl"))
val pub1 = Publisher.make(u.id, "pubtag", "pubname", "pubdesc", "pubcat", "pubstatus", Some(""), Some(""))
val pub2 = Publisher.make(u.id, "pubtag2", "pubname2", "pubdesc", "pubcat", "pubstatus", Some(""), Some(""))
var c1 = Collection.make(pub1.id, ct.id, rm.id, "coll1", "desc", "open")
var c2 = Collection.make(pub2.id, ct.id, rm.id, "coll2", "desc", "open", false)

Collection.findByTag("coll1").contains(c1) must equalTo(true)
Collection.findByTag("coll2").contains(c2) must equalTo(false)
}
}

"#findByTag returns only inactive Collections if requested" in {
running(FakeApplication(additionalConfiguration = inMemoryDatabase())) {
val u = User.make("bob", "bob@example.com", "pass", "roley")
val ct = ContentType.make("tag", "label", "desc", Some("logo"))
val rm = ResourceMap.make("tag", "desc", Some("swordurl"))
val pub1 = Publisher.make(u.id, "pubtag", "pubname", "pubdesc", "pubcat", "pubstatus", Some(""), Some(""))
val pub2 = Publisher.make(u.id, "pubtag2", "pubname2", "pubdesc", "pubcat", "pubstatus", Some(""), Some(""))
var c1 = Collection.make(pub1.id, ct.id, rm.id, "coll1", "desc", "open", false)

Collection.findByTag("coll1").contains(c1) must equalTo(false)
Collection.findByTag("coll1", false).contains(c1) must equalTo(true)
}
}

"#findById" in {
running(FakeApplication(additionalConfiguration = inMemoryDatabase())) {
User.create("bob", "bob@example.com", "pass", "roley")
Expand Down

0 comments on commit 790eca7

Please sign in to comment.