Skip to content

Commit

Permalink
#415 Extract ECS API routines as utility methods.
Browse files Browse the repository at this point in the history
  • Loading branch information
yruslan committed Jun 4, 2024
1 parent 0fce741 commit 78c4835
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 74 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Copyright 2022 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.pramen.extras.notification

import com.typesafe.config.Config
import org.apache.http.HttpStatus
import org.apache.http.config.RegistryBuilder
import org.apache.http.conn.socket.{ConnectionSocketFactory, PlainConnectionSocketFactory}
import org.apache.http.conn.ssl.{NoopHostnameVerifier, SSLConnectionSocketFactory}
import org.apache.http.entity.StringEntity
import org.apache.http.impl.client.{CloseableHttpClient, HttpClients}
import org.apache.http.impl.conn.BasicHttpClientConnectionManager
import org.apache.http.ssl.{SSLContexts, TrustStrategy}
import org.apache.http.util.EntityUtils
import org.slf4j.LoggerFactory
import za.co.absa.pramen.api.{NotificationTarget, TaskNotification}
import za.co.absa.pramen.core.utils.Emoji
import za.co.absa.pramen.extras.sink.HttpDeleteWithBody

import java.security.cert.X509Certificate

/**
* Runs the ECS cleanup API against the target partition after the job jas completed.
*/
class EcsNotificationTarget(conf: Config) extends NotificationTarget {
override def config: Config = conf

override def sendNotification(notification: TaskNotification): Unit = {
// ToDo implementation
}
}

object EcsNotificationTarget {
private val log = LoggerFactory.getLogger(this.getClass)

def cleanUpS3VersionsForPath(partitionPath: String,
apiUrl: String,
apiKey: String,
trustAllSslCerts: Boolean): Unit = {

val body = s"""{"ecs_path":"$partitionPath"}"""
log.info(s"Sending: $body")

// Using Apache HTTP Client.
// Tried using com.lihaoyi:requests:0.8.0,
// but for some strange reason the EnceladusSink class can't be found/loaded
// when this library is used.
val httpClient = getHttpClient(trustAllSslCerts)
val httpDelete = new HttpDeleteWithBody(apiUrl)

httpDelete.addHeader("x-api-key", apiKey)
httpDelete.setEntity(new StringEntity(body))

try {
val response = httpClient.execute(httpDelete)
val statusCode = response.getStatusLine.getStatusCode
val responseBody = EntityUtils.toString(response.getEntity)

if (statusCode != HttpStatus.SC_OK) {
log.error(s"${Emoji.FAILURE} Failed to clean up S3 versions for $partitionPath. Response: $statusCode $responseBody")
} else {
log.info(s"${Emoji.SUCCESS} S3 versions cleanup for $partitionPath was successful. Response: $responseBody")
}
httpClient.close()
} catch {
case ex: Throwable =>
log.error(s"${Emoji.FAILURE} Unable to call the cleanup API via URL: $apiUrl.", ex)
}
}

private[extras] def getHttpClient(trustAllSslCerts: Boolean): CloseableHttpClient = {
if (trustAllSslCerts) {
log.warn("Trusting all SSL certificates for the cleanup API.")
val trustStrategy = new TrustStrategy {
override def isTrusted(x509Certificates: Array[X509Certificate], s: String): Boolean = true
}

val sslContext = SSLContexts.custom.loadTrustMaterial(null, trustStrategy).build
val sslsf = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE)

val socketFactoryRegistry =
RegistryBuilder.create[ConnectionSocketFactory]()
.register("https", sslsf)
.register("http", new PlainConnectionSocketFactory())
.build()

val connectionManager = new BasicHttpClientConnectionManager(socketFactoryRegistry)

HttpClients.custom()
.setSSLSocketFactory(sslsf)
.setConnectionManager(connectionManager)
.build()
} else {
HttpClients.createDefault()
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,18 @@ package za.co.absa.pramen.extras.sink

import com.typesafe.config.Config
import org.apache.hadoop.fs.Path
import org.apache.http.HttpStatus
import org.apache.http.config.RegistryBuilder
import org.apache.http.conn.socket.{ConnectionSocketFactory, PlainConnectionSocketFactory}
import org.apache.http.conn.ssl.{NoopHostnameVerifier, SSLConnectionSocketFactory}
import org.apache.http.entity.StringEntity
import org.apache.http.impl.client.{CloseableHttpClient, HttpClients}
import org.apache.http.impl.conn.BasicHttpClientConnectionManager
import org.apache.http.ssl.{SSLContexts, TrustStrategy}
import org.apache.http.util.EntityUtils
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.slf4j.LoggerFactory
import za.co.absa.pramen.api.{ExternalChannelFactory, MetastoreReader, Sink, SinkResult}
import za.co.absa.pramen.core.utils.ConfigUtils
import za.co.absa.pramen.core.utils.hive.HiveQueryTemplates.TEMPLATES_DEFAULT_PREFIX
import za.co.absa.pramen.core.utils.hive._
import za.co.absa.pramen.core.utils.{ConfigUtils, Emoji}
import za.co.absa.pramen.extras.infofile.InfoFileGeneration
import za.co.absa.pramen.extras.notification.EcsNotificationTarget
import za.co.absa.pramen.extras.query.{QueryExecutor => EnceladusQueryExecutor, QueryExecutorSpark => EnceladusQueryExecutorSpark}
import za.co.absa.pramen.extras.sink.EnceladusConfig.DEFAULT_PUBLISH_PARTITION_TEMPLATE
import za.co.absa.pramen.extras.utils.{FsUtils, MainRunner, PartitionUtils}

import java.security.cert.X509Certificate
import java.time.{Instant, LocalDate}
import scala.util.control.NonFatal
import scala.util.{Failure, Success, Try}
Expand Down Expand Up @@ -418,75 +409,16 @@ class EnceladusSink(sinkConfig: Config,
val rawPartitionPath = getOutputPartitionPath(rawBasePath, infoDate, infoVersion)
val publishPartitionPath = getPublishPartitionPath(publishBase, infoDate, infoVersion)

cleanUpS3VersionsForPath(removeAuthority(rawPartitionPath), apiUrl, apiKey)
cleanUpS3VersionsForPath(removeAuthority(publishPartitionPath), apiUrl, apiKey)
}

private[extras] def cleanUpS3VersionsForPath(partitionPath: String,
apiUrl: String,
apiKey: String): Unit = {

val body = s"""{"ecs_path":"$partitionPath"}"""
log.info(s"Sending: $body")
val trustAllSslCerts = sinkConfig.hasPath(CLEANUP_API_TRUST_SSL_KEY) && sinkConfig.getBoolean(CLEANUP_API_TRUST_SSL_KEY)

// Using Apache HTTP Client.
// Tried using com.lihaoyi:requests:0.8.0,
// but for some strange reason the EnceladusSink class can't be found/loaded
// when this library is used.
val httpClient = getHttpClient
val httpDelete = new HttpDeleteWithBody(apiUrl)

httpDelete.addHeader("x-api-key", apiKey)
httpDelete.setEntity(new StringEntity(body))

try {
val response = httpClient.execute(httpDelete)
val statusCode = response.getStatusLine.getStatusCode
val responseBody = EntityUtils.toString(response.getEntity)

if (statusCode != HttpStatus.SC_OK) {
log.error(s"${Emoji.FAILURE} Failed to clean up S3 versions for $partitionPath. Response: $statusCode $responseBody")
} else {
log.info(s"${Emoji.SUCCESS} S3 versions cleanup for $partitionPath was successful. Response: $responseBody")
}
httpClient.close()
} catch {
case ex: Throwable =>
log.error(s"${Emoji.FAILURE} Unable to call the cleanup API via URL: $apiUrl.", ex)
}
}
EcsNotificationTarget.cleanUpS3VersionsForPath(removeAuthority(rawPartitionPath), apiUrl, apiKey, trustAllSslCerts)
EcsNotificationTarget.cleanUpS3VersionsForPath(removeAuthority(publishPartitionPath), apiUrl, apiKey, trustAllSslCerts)
}

private[extras] def removeAuthority(path: Path): String = {
s"${path.toUri.getHost}${path.toUri.getPath}"
}

private[extras] def getHttpClient: CloseableHttpClient = {
if (sinkConfig.hasPath(CLEANUP_API_TRUST_SSL_KEY) && sinkConfig.getBoolean(CLEANUP_API_TRUST_SSL_KEY)) {
log.warn("Trusting all SSL certificates for the cleanup API.")
val trustStrategy = new TrustStrategy {
override def isTrusted(x509Certificates: Array[X509Certificate], s: String): Boolean = true
}

val sslContext = SSLContexts.custom.loadTrustMaterial(null, trustStrategy).build
val sslsf = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE)

val socketFactoryRegistry =
RegistryBuilder.create[ConnectionSocketFactory]()
.register("https", sslsf)
.register("http", new PlainConnectionSocketFactory())
.build()

val connectionManager = new BasicHttpClientConnectionManager(socketFactoryRegistry)

HttpClients.custom()
.setSSLSocketFactory(sslsf)
.setConnectionManager(connectionManager)
.build()
} else {
HttpClients.createDefault()
}
}

private[extras] def updateTable(hiveTable: String, publishBase: String, infoDate: LocalDate, infoVersion: Int)(implicit spark: SparkSession): Unit = {
if (hiveHelper.doesTableExist(enceladusConfig.hiveDatabase, hiveTable)) {
if (enceladusConfig.preferAddPartition) {
Expand Down

0 comments on commit 78c4835

Please sign in to comment.