From a10df1a512b2be46e04d0ce2e6685ef364d67c5c Mon Sep 17 00:00:00 2001 From: Apil Tamang Date: Tue, 12 Jun 2018 17:40:00 -0400 Subject: [PATCH] Refactor library to use AWS-sdk-java 1.7.4 --- build.sbt | 2 +- .../johnsnowlabs/collections/SearchTrie.scala | 2 +- .../nlp/pretrained/S3ResourceDownloader.scala | 41 +++++++++++++------ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/build.sbt b/build.sbt index 03853347b13f43..c8968902896dfd 100644 --- a/build.sbt +++ b/build.sbt @@ -88,7 +88,7 @@ lazy val utilDependencies = Seq( "org.slf4j" % "slf4j-api" % "1.7.25", "org.apache.commons" % "commons-compress" % "1.15", "org.tensorflow" % "tensorflow" % "1.8.0", - "com.amazonaws" % "aws-java-sdk-s3" % "1.11.313" + "com.amazonaws" % "aws-java-sdk" % "1.7.4" ) lazy val root = (project in file(".")) diff --git a/src/main/scala/com/johnsnowlabs/collections/SearchTrie.scala b/src/main/scala/com/johnsnowlabs/collections/SearchTrie.scala index 79bd60e11ff015..36f887b208b982 100644 --- a/src/main/scala/com/johnsnowlabs/collections/SearchTrie.scala +++ b/src/main/scala/com/johnsnowlabs/collections/SearchTrie.scala @@ -85,7 +85,7 @@ case class SearchTrie object SearchTrie { - def apply(phrases: Array[Array[String]], caseSensitive: Boolean): SearchTrie = { + def apply(phrases: Array[Array[String]], caseSensitive: Boolean = false): SearchTrie = { // Have only root at the beginning val vocab = mutable.Map[String, Int]() diff --git a/src/main/scala/com/johnsnowlabs/nlp/pretrained/S3ResourceDownloader.scala b/src/main/scala/com/johnsnowlabs/nlp/pretrained/S3ResourceDownloader.scala index 86c9714140ee9f..50fe9923a3c29e 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/pretrained/S3ResourceDownloader.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/pretrained/S3ResourceDownloader.scala @@ -6,12 +6,13 @@ import java.sql.Timestamp import java.util.Calendar import java.util.zip.ZipInputStream -import org.apache.hadoop.fs.Path -import com.amazonaws.ClientConfiguration -import com.amazonaws.auth.{AWSCredentials, AWSStaticCredentialsProvider} -import com.amazonaws.services.s3.AmazonS3ClientBuilder +import com.amazonaws.auth.AWSCredentials +import com.amazonaws.regions.RegionUtils +import com.amazonaws.services.s3.AmazonS3Client import com.amazonaws.services.s3.model.GetObjectRequest +import com.amazonaws.{AmazonServiceException, ClientConfiguration} import com.johnsnowlabs.util.ConfigHelper +import org.apache.hadoop.fs.Path import scala.collection.mutable @@ -33,20 +34,23 @@ class S3ResourceDownloader(bucket: String, } lazy val client = { + val regionObj = RegionUtils.getRegion(region) - val builder = AmazonS3ClientBuilder.standard() - if (credentials.isDefined) - builder.setCredentials(new AWSStaticCredentialsProvider(credentials.get)) - - builder.setRegion(region) val config = new ClientConfiguration() val timeout = ConfigHelper.getConfigValue(ConfigHelper.s3SocketTimeout).map(_.toInt).getOrElse(0) config.setSocketTimeout(timeout) - builder.setClientConfiguration(config) - builder.build() - } + val s3Client = { + if (credentials.isDefined) { + new AmazonS3Client(credentials.get, config) + } else { + new AmazonS3Client(config) + } + } + s3Client.setRegion(regionObj) + s3Client + } private def downloadMetadataIfNeed(folder: String): List[ResourceMetadata] = { val lastState = repoFolder2Metadata.get(folder) @@ -171,4 +175,17 @@ class S3ResourceDownloader(bucket: String, .filter(part => part.nonEmpty) .mkString("/") } + + implicit class S3ClientWrapper(client: AmazonS3Client) { + + def doesObjectExist(bucket: String, key: String): Boolean = { + try { + client.getObjectMetadata(bucket, key) + true + } catch { + case e: AmazonServiceException => if (e.getStatusCode == 404) return false else throw e + } + } + } + }