Skip to content
This repository has been archived by the owner on Oct 24, 2022. It is now read-only.

Commit

Permalink
Handle non-oai response during Harvest
Browse files Browse the repository at this point in the history
closes #214
  • Loading branch information
JPrevost committed Jun 4, 2015
1 parent 36af447 commit f5d5a59
Showing 1 changed file with 17 additions and 3 deletions.
20 changes: 17 additions & 3 deletions app/workers/Harvester.scala
Expand Up @@ -16,6 +16,7 @@ import scala.concurrent.Future
import scala.concurrent.ExecutionContext.Implicits.global
import scala.io.Source
import scala.xml.pull._
import scala.util.control.Breaks._

import akka.actor.Props

Expand Down Expand Up @@ -64,16 +65,19 @@ class Harvester {
var readingSpec = false
var onError = false
var attributes = ""
var oaiDetected = false
var counter = 0
while (xml.hasNext) {
xml.next match {
case EvElemStart(_,"OAI-PMH",_,_) => oaiDetected = true
case EvElemStart(_,"error",attrs,_) => onError = true; attributes = attrs.toString
case EvElemStart(_,"identifier",_,_) => readingId = true
case EvElemStart(_,"setSpec",_,_) => readingSpec = true
case EvText(text) if onError => onError = false;
handleOaiError(text, attributes); attributes = ""
case EvText(text) if readingId => objId = Some(text); readingId = false
case EvText(text) if readingSpec => processItem(objId, Some(text)); readingSpec = false
case _ =>
case _ => if (oaiDetected == false && counter > 2) { abortHarvest("OAI xml not detected."); break } else { counter = counter + 1 }
}
}
}
Expand Down Expand Up @@ -123,14 +127,24 @@ class Harvester {
"&until=" + HubUtils.fmtDate(until)
// To test handling of no records found, you can set url as below.
// val url = "http://repo.scoap3.org/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc&from=2012-05-04&until=2012-05-04"
// To test non-xml
// val url = "http://www.google.com"
// To test xml with no OAI
// val url = "http://api.npr.org/query?id=1029&apiKey=MDE5MDcxMDkzMDE0MzA5MjI3NDgxNDRkMA001"
// debug
println("About to call: " + url)

val request = WS.url(url).get().map { response =>
response.status match {
case 200 => {
parse(new XMLEventReader(Source.fromInputStream(
new ByteArrayInputStream(response.body.getBytes))))
if (response.allHeaders("Content-Type").mkString.contains("text/xml")) {
parse(new XMLEventReader(Source.fromInputStream(
new ByteArrayInputStream(response.body.getBytes))))
} else {
abortHarvest(s"""Got Content-Type ${response.allHeaders("Content-Type").mkString}.
|Expected that should contain 'text/xml' when requesting ${url}"""
.stripMargin)
}
}
case _ => {
abortHarvest(s"${response.status} response code received when requesting ${url}")
Expand Down

0 comments on commit f5d5a59

Please sign in to comment.