### A Data Warehouse on top of Delta lake using Spark
### Scientific Publications Dataset

Authors: Jose Rodrigo Flores Espinosa & Marta Napa  
email: rodrigo.flores@ut.ee & marta.napa@ut.ee

### Objective

- The purpose is to extract data about scientific publications from JSON data that describe a huge number of papers and populate a data warehouse in order to issue analytics queries using SQL.  
- We will use Spark DataFrames to extract and transform the data.  
- We will use also Spark tables (delta tables) to be used for dimensions and fact table

### Dataset

Data set The source of the data is [https://www.aminer.org/citation](https://www.aminer.org/citation), version 13 as it is the most detailed one in JSON format.

### Overview

- Load required libraries
- Download of JSON data into Databricks
- Pre-processing of JSON data
- ETL processing
- Data Warehouse building
  - Save into persistent Delta tables
- Data Warehouse querying

### Spark Session

In [0]:
from pyspark.conf import SparkConf
from pyspark.context import SparkContext

conf = SparkConf()
display(conf.getAll())

_1,_2
spark.executor.memory,8278m
spark.ui.port,40001
spark.executor.extraClassPath,/databricks/spark/dbconf/log4j/executor:/databricks/spark/dbconf/jets3t/:/databricks/spark/dbconf/hadoop:/databricks/hive/conf:/databricks/jars/----com_google_protobuf--timestamp_proto-spark_3.2_2.12-scalabp.jar:/databricks/jars/----glue-catalog-spark3.2-client--glue-catalog-client-common_deploy.jar:/databricks/jars/----glue-catalog-spark3.2-client--glue-catalog-hive2-client_deploy.jar:/databricks/jars/----glue-catalog-spark3.2-client--glue-catalog-shim-common_deploy.jar:/databricks/jars/----glue-catalog-spark3.2-client--glue-catalog-shim-hive1_deploy.jar:/databricks/jars/----glue-catalog-spark3.2-client--glue-catalog-shim-hive2_deploy.jar:/databricks/jars/----glue-catalog-spark3.2-client--glue-catalog-shim-loader_deploy.jar:/databricks/jars/----jackson_annotations_shaded--libjackson-annotations.jar:/databricks/jars/----jackson_core_shaded--libjackson-core.jar:/databricks/jars/----jackson_databind_shaded--libjackson-databind.jar:/databricks/jars/----jackson_datatype_joda_shaded--libjackson-datatype-joda.jar:/databricks/jars/----scalapb_090--com.lihaoyi__fastparse_2.12__2.1.3_shaded.jar:/databricks/jars/----scalapb_090--com.lihaoyi__sourcecode_2.12__0.1.7_shaded.jar:/databricks/jars/----scalapb_090--runtime-unshaded-jetty9-hadoop1_2.12_deploy_shaded.jar:/databricks/jars/----workspace_spark_3_2--common--kvstore--kvstore-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--common--network-common--network-common-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--common--network-shuffle--network-shuffle-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--common--sketch--sketch-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--common--tags--tags-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--common--unsafe--unsafe-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--core--core-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--core--libcore_generated_resources.jar:/databricks/jars/----workspace_spark_3_2--core--libcore_resources.jar:/databricks/jars/----workspace_spark_3_2--core--proto-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--graphx--graphx-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--launcher--launcher-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--antlr--antlr--antlr__antlr__2.7.7.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--amazon-kinesis-client--com.amazonaws__amazon-kinesis-client__1.12.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-autoscaling--com.amazonaws__aws-java-sdk-autoscaling__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-cloudformation--com.amazonaws__aws-java-sdk-cloudformation__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-cloudfront--com.amazonaws__aws-java-sdk-cloudfront__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-cloudhsm--com.amazonaws__aws-java-sdk-cloudhsm__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-cloudsearch--com.amazonaws__aws-java-sdk-cloudsearch__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-cloudtrail--com.amazonaws__aws-java-sdk-cloudtrail__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-cloudwatch--com.amazonaws__aws-java-sdk-cloudwatch__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-cloudwatchmetrics--com.amazonaws__aws-java-sdk-cloudwatchmetrics__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-codedeploy--com.amazonaws__aws-java-sdk-codedeploy__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-cognitoidentity--com.amazonaws__aws-java-sdk-cognitoidentity__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-cognitosync--com.amazonaws__aws-java-sdk-cognitosync__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-config--com.amazonaws__aws-java-sdk-config__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-core--com.amazonaws__aws-java-sdk-core__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-datapipeline--com.amazonaws__aws-java-sdk-datapipeline__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-directconnect--com.amazonaws__aws-java-sdk-directconnect__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-directory--com.amazonaws__aws-java-sdk-directory__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-dynamodb--com.amazonaws__aws-java-sdk-dynamodb__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-ec2--com.amazonaws__aws-java-sdk-ec2__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-ecs--com.amazonaws__aws-java-sdk-ecs__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-efs--com.amazonaws__aws-java-sdk-efs__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-elasticache--com.amazonaws__aws-java-sdk-elasticache__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-elasticbeanstalk--com.amazonaws__aws-java-sdk-elasticbeanstalk__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-elasticloadbalancing--com.amazonaws__aws-java-sdk-elasticloadbalancing__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-elastictranscoder--com.amazonaws__aws-java-sdk-elastictranscoder__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-emr--com.amazonaws__aws-java-sdk-emr__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-glacier--com.amazonaws__aws-java-sdk-glacier__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-glue--com.amazonaws__aws-java-sdk-glue__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-iam--com.amazonaws__aws-java-sdk-iam__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-importexport--com.amazonaws__aws-java-sdk-importexport__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-kinesis--com.amazonaws__aws-java-sdk-kinesis__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-kms--com.amazonaws__aws-java-sdk-kms__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-lambda--com.amazonaws__aws-java-sdk-lambda__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-logs--com.amazonaws__aws-java-sdk-logs__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-machinelearning--com.amazonaws__aws-java-sdk-machinelearning__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-opsworks--com.amazonaws__aws-java-sdk-opsworks__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-rds--com.amazonaws__aws-java-sdk-rds__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-redshift--com.amazonaws__aws-java-sdk-redshift__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-route53--com.amazonaws__aws-java-sdk-route53__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-s3--com.amazonaws__aws-java-sdk-s3__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-ses--com.amazonaws__aws-java-sdk-ses__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-simpledb--com.amazonaws__aws-java-sdk-simpledb__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-simpleworkflow--com.amazonaws__aws-java-sdk-simpleworkflow__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-sns--com.amazonaws__aws-java-sdk-sns__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-sqs--com.amazonaws__aws-java-sdk-sqs__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-ssm--com.amazonaws__aws-java-sdk-ssm__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-storagegateway--com.amazonaws__aws-java-sdk-storagegateway__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-sts--com.amazonaws__aws-java-sdk-sts__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-support--com.amazonaws__aws-java-sdk-support__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-swf-libraries--com.amazonaws__aws-java-sdk-swf-libraries__1.11.22.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--aws-java-sdk-workspaces--com.amazonaws__aws-java-sdk-workspaces__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.amazonaws--jmespath-java--com.amazonaws__jmespath-java__1.12.189.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.chuusai--shapeless_2.12--com.chuusai__shapeless_2.12__2.3.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.clearspring.analytics--stream--com.clearspring.analytics__stream__2.9.6.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.databricks--Rserve--com.databricks__Rserve__1.8-3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.databricks.scalapb--compilerplugin_2.12--com.databricks.scalapb__compilerplugin_2.12__0.4.15-10.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.databricks.scalapb--scalapb-runtime_2.12--com.databricks.scalapb__scalapb-runtime_2.12__0.4.15-10.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.esotericsoftware--kryo-shaded--com.esotericsoftware__kryo-shaded__4.0.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.esotericsoftware--minlog--com.esotericsoftware__minlog__1.3.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.fasterxml--classmate--com.fasterxml__classmate__1.3.4.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.fasterxml.jackson.core--jackson-annotations--com.fasterxml.jackson.core__jackson-annotations__2.12.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.fasterxml.jackson.core--jackson-core--com.fasterxml.jackson.core__jackson-core__2.12.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.fasterxml.jackson.core--jackson-databind--com.fasterxml.jackson.core__jackson-databind__2.12.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.fasterxml.jackson.dataformat--jackson-dataformat-cbor--com.fasterxml.jackson.dataformat__jackson-dataformat-cbor__2.12.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.fasterxml.jackson.datatype--jackson-datatype-joda--com.fasterxml.jackson.datatype__jackson-datatype-joda__2.12.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.fasterxml.jackson.module--jackson-module-paranamer--com.fasterxml.jackson.module__jackson-module-paranamer__2.12.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.fasterxml.jackson.module--jackson-module-scala_2.12--com.fasterxml.jackson.module__jackson-module-scala_2.12__2.12.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.github.ben-manes.caffeine--caffeine--com.github.ben-manes.caffeine__caffeine__2.3.4.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.github.fommil--jniloader--com.github.fommil__jniloader__1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.github.fommil.netlib--core--com.github.fommil.netlib__core__1.1.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.github.fommil.netlib--native_ref-java--com.github.fommil.netlib__native_ref-java__1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.github.fommil.netlib--native_ref-java-natives--com.github.fommil.netlib__native_ref-java-natives__1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.github.fommil.netlib--native_system-java--com.github.fommil.netlib__native_system-java__1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.github.fommil.netlib--native_system-java-natives--com.github.fommil.netlib__native_system-java-natives__1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.github.fommil.netlib--netlib-native_ref-linux-x86_64-natives--com.github.fommil.netlib__netlib-native_ref-linux-x86_64-natives__1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.github.fommil.netlib--netlib-native_system-linux-x86_64-natives--com.github.fommil.netlib__netlib-native_system-linux-x86_64-natives__1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.github.luben--zstd-jni--com.github.luben__zstd-jni__1.5.0-4.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.github.wendykierp--JTransforms--com.github.wendykierp__JTransforms__3.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.google.code.findbugs--jsr305--com.google.code.findbugs__jsr305__3.0.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.google.code.gson--gson--com.google.code.gson__gson__2.8.6.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.google.crypto.tink--tink--com.google.crypto.tink__tink__1.6.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.google.flatbuffers--flatbuffers-java--com.google.flatbuffers__flatbuffers-java__1.9.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.google.guava--guava--com.google.guava__guava__15.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.google.protobuf--protobuf-java--com.google.protobuf__protobuf-java__2.6.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.h2database--h2--com.h2database__h2__1.4.195.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.helger--profiler--com.helger__profiler__1.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.jcraft--jsch--com.jcraft__jsch__0.1.50.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.jolbox--bonecp--com.jolbox__bonecp__0.8.0.RELEASE.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.lihaoyi--sourcecode_2.12--com.lihaoyi__sourcecode_2.12__0.1.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.microsoft.azure--azure-data-lake-store-sdk--com.microsoft.azure__azure-data-lake-store-sdk__2.3.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.ning--compress-lzf--com.ning__compress-lzf__1.0.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.sun.istack--istack-commons-runtime--com.sun.istack__istack-commons-runtime__3.0.8.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.sun.mail--javax.mail--com.sun.mail__javax.mail__1.5.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.tdunning--json--com.tdunning__json__1.8.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.thoughtworks.paranamer--paranamer--com.thoughtworks.paranamer__paranamer__2.8.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.trueaccord.lenses--lenses_2.12--com.trueaccord.lenses__lenses_2.12__0.4.12.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.twitter--chill-java--com.twitter__chill-java__0.10.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.twitter--chill_2.12--com.twitter__chill_2.12__0.10.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.twitter--util-app_2.12--com.twitter__util-app_2.12__7.1.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.twitter--util-core_2.12--com.twitter__util-core_2.12__7.1.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.twitter--util-function_2.12--com.twitter__util-function_2.12__7.1.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.twitter--util-jvm_2.12--com.twitter__util-jvm_2.12__7.1.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.twitter--util-lint_2.12--com.twitter__util-lint_2.12__7.1.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.twitter--util-registry_2.12--com.twitter__util-registry_2.12__7.1.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.twitter--util-stats_2.12--com.twitter__util-stats_2.12__7.1.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.typesafe--config--com.typesafe__config__1.2.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.typesafe.scala-logging--scala-logging_2.12--com.typesafe.scala-logging__scala-logging_2.12__3.7.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.univocity--univocity-parsers--com.univocity__univocity-parsers__2.9.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--com.zaxxer--HikariCP--com.zaxxer__HikariCP__4.0.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--commons-cli--commons-cli--commons-cli__commons-cli__1.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--commons-codec--commons-codec--commons-codec__commons-codec__1.15.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--commons-collections--commons-collections--commons-collections__commons-collections__3.2.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--commons-dbcp--commons-dbcp--commons-dbcp__commons-dbcp__1.4.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--commons-fileupload--commons-fileupload--commons-fileupload__commons-fileupload__1.3.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--commons-httpclient--commons-httpclient--commons-httpclient__commons-httpclient__3.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--commons-io--commons-io--commons-io__commons-io__2.8.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--commons-lang--commons-lang--commons-lang__commons-lang__2.6.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--commons-logging--commons-logging--commons-logging__commons-logging__1.1.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--commons-net--commons-net--commons-net__commons-net__3.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--commons-pool--commons-pool--commons-pool__commons-pool__1.5.4.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--dev.ludovic.netlib--arpack--dev.ludovic.netlib__arpack__2.2.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--dev.ludovic.netlib--blas--dev.ludovic.netlib__blas__2.2.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--dev.ludovic.netlib--lapack--dev.ludovic.netlib__lapack__2.2.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--info.ganglia.gmetric4j--gmetric4j--info.ganglia.gmetric4j__gmetric4j__1.0.10.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.airlift--aircompressor--io.airlift__aircompressor__0.21.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.delta--delta-sharing-spark_2.12--io.delta__delta-sharing-spark_2.12__0.4.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.dropwizard.metrics--metrics-core--io.dropwizard.metrics__metrics-core__4.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.dropwizard.metrics--metrics-graphite--io.dropwizard.metrics__metrics-graphite__4.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.dropwizard.metrics--metrics-healthchecks--io.dropwizard.metrics__metrics-healthchecks__4.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.dropwizard.metrics--metrics-jetty9--io.dropwizard.metrics__metrics-jetty9__4.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.dropwizard.metrics--metrics-jmx--io.dropwizard.metrics__metrics-jmx__4.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.dropwizard.metrics--metrics-json--io.dropwizard.metrics__metrics-json__4.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.dropwizard.metrics--metrics-jvm--io.dropwizard.metrics__metrics-jvm__4.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.dropwizard.metrics--metrics-servlets--io.dropwizard.metrics__metrics-servlets__4.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.netty--netty-all--io.netty__netty-all__4.1.68.Final.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.prometheus--simpleclient--io.prometheus__simpleclient__0.7.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.prometheus--simpleclient_common--io.prometheus__simpleclient_common__0.7.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.prometheus--simpleclient_dropwizard--io.prometheus__simpleclient_dropwizard__0.7.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.prometheus--simpleclient_pushgateway--io.prometheus__simpleclient_pushgateway__0.7.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.prometheus--simpleclient_servlet--io.prometheus__simpleclient_servlet__0.7.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--io.prometheus.jmx--collector--io.prometheus.jmx__collector__0.12.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--jakarta.annotation--jakarta.annotation-api--jakarta.annotation__jakarta.annotation-api__1.3.5.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--jakarta.servlet--jakarta.servlet-api--jakarta.servlet__jakarta.servlet-api__4.0.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--jakarta.validation--jakarta.validation-api--jakarta.validation__jakarta.validation-api__2.0.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--jakarta.ws.rs--jakarta.ws.rs-api--jakarta.ws.rs__jakarta.ws.rs-api__2.1.6.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--javax.activation--activation--javax.activation__activation__1.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--javax.annotation--javax.annotation-api--javax.annotation__javax.annotation-api__1.3.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--javax.el--javax.el-api--javax.el__javax.el-api__2.2.4.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--javax.jdo--jdo-api--javax.jdo__jdo-api__3.0.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--javax.transaction--jta--javax.transaction__jta__1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--javax.transaction--transaction-api--javax.transaction__transaction-api__1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--javax.xml.bind--jaxb-api--javax.xml.bind__jaxb-api__2.2.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--javax.xml.stream--stax-api--javax.xml.stream__stax-api__1.0-2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--javolution--javolution--javolution__javolution__5.5.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--jets3t-0.7--com.databricks--jets3t--com.databricks__jets3t__0.7.1-0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--jets3t-0.7--liball_deps_2.12.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--jline--jline--jline__jline__2.14.6.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--joda-time--joda-time--joda-time__joda-time__2.10.10.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--liball_deps_2.12.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--log4j--apache-log4j-extras--log4j__apache-log4j-extras__1.2.17.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--log4j--log4j--log4j__log4j__1.2.17.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--net.java.dev.jna--jna--net.java.dev.jna__jna__5.8.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--net.razorvine--pyrolite--net.razorvine__pyrolite__4.30.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--net.sf.jpam--jpam--net.sf.jpam__jpam__1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--net.sf.opencsv--opencsv--net.sf.opencsv__opencsv__2.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--net.sf.supercsv--super-csv--net.sf.supercsv__super-csv__2.2.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--net.snowflake--snowflake-ingest-sdk--net.snowflake__snowflake-ingest-sdk__0.9.6.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--net.snowflake--snowflake-jdbc--net.snowflake__snowflake-jdbc__3.13.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--net.snowflake--spark-snowflake_2.12--net.snowflake__spark-snowflake_2.12__2.9.0-spark_3.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--net.sourceforge.f2j--arpack_combined_all--net.sourceforge.f2j__arpack_combined_all__0.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.acplt.remotetea--remotetea-oncrpc--org.acplt.remotetea__remotetea-oncrpc__1.1.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.antlr--ST4--org.antlr__ST4__4.0.4.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.antlr--antlr-runtime--org.antlr__antlr-runtime__3.5.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.antlr--antlr4-runtime--org.antlr__antlr4-runtime__4.8.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.antlr--stringtemplate--org.antlr__stringtemplate__3.2.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.ant--ant--org.apache.ant__ant__1.9.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.ant--ant-jsch--org.apache.ant__ant-jsch__1.9.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.ant--ant-launcher--org.apache.ant__ant-launcher__1.9.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.arrow--arrow-format--org.apache.arrow__arrow-format__2.0.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.arrow--arrow-memory-core--org.apache.arrow__arrow-memory-core__2.0.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.arrow--arrow-memory-netty--org.apache.arrow__arrow-memory-netty__2.0.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.arrow--arrow-vector--org.apache.arrow__arrow-vector__2.0.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.avro--avro--org.apache.avro__avro__1.10.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.avro--avro-ipc--org.apache.avro__avro-ipc__1.10.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.avro--avro-mapred--org.apache.avro__avro-mapred__1.10.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.commons--commons-compress--org.apache.commons__commons-compress__1.21.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.commons--commons-crypto--org.apache.commons__commons-crypto__1.1.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.commons--commons-lang3--org.apache.commons__commons-lang3__3.12.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.commons--commons-math3--org.apache.commons__commons-math3__3.4.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.commons--commons-text--org.apache.commons__commons-text__1.6.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.curator--curator-client--org.apache.curator__curator-client__2.13.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.curator--curator-framework--org.apache.curator__curator-framework__2.13.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.curator--curator-recipes--org.apache.curator__curator-recipes__2.13.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.derby--derby--org.apache.derby__derby__10.14.2.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hadoop--hadoop-client-api--org.apache.hadoop__hadoop-client-api__3.3.1-databricks.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hadoop--hadoop-client-runtime--org.apache.hadoop__hadoop-client-runtime__3.3.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hive--hive-beeline--org.apache.hive__hive-beeline__2.3.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hive--hive-cli--org.apache.hive__hive-cli__2.3.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hive--hive-jdbc--org.apache.hive__hive-jdbc__2.3.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hive--hive-llap-client--org.apache.hive__hive-llap-client__2.3.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hive--hive-llap-common--org.apache.hive__hive-llap-common__2.3.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hive--hive-serde--org.apache.hive__hive-serde__2.3.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hive--hive-shims--org.apache.hive__hive-shims__2.3.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hive--hive-storage-api--org.apache.hive__hive-storage-api__2.7.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hive.shims--hive-shims-0.23--org.apache.hive.shims__hive-shims-0.23__2.3.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hive.shims--hive-shims-common--org.apache.hive.shims__hive-shims-common__2.3.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.hive.shims--hive-shims-scheduler--org.apache.hive.shims__hive-shims-scheduler__2.3.9.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.htrace--htrace-core4--org.apache.htrace__htrace-core4__4.1.0-incubating.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.httpcomponents--httpclient--org.apache.httpcomponents__httpclient__4.5.13.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.httpcomponents--httpcore--org.apache.httpcomponents__httpcore__4.4.12.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.ivy--ivy--org.apache.ivy__ivy__2.5.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.mesos--mesos-shaded-protobuf--org.apache.mesos__mesos-shaded-protobuf__1.4.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.orc--orc-core--org.apache.orc__orc-core__1.6.13.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.orc--orc-mapreduce--org.apache.orc__orc-mapreduce__1.6.13.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.orc--orc-shims--org.apache.orc__orc-shims__1.6.13.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.parquet--parquet-column--org.apache.parquet__parquet-column__1.12.0-databricks-0004.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.parquet--parquet-common--org.apache.parquet__parquet-common__1.12.0-databricks-0004.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.parquet--parquet-encoding--org.apache.parquet__parquet-encoding__1.12.0-databricks-0004.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.parquet--parquet-format-structures--org.apache.parquet__parquet-format-structures__1.12.0-databricks-0004.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.parquet--parquet-hadoop--org.apache.parquet__parquet-hadoop__1.12.0-databricks-0004.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.parquet--parquet-jackson--org.apache.parquet__parquet-jackson__1.12.0-databricks-0004.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.thrift--libfb303--org.apache.thrift__libfb303__0.9.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.thrift--libthrift--org.apache.thrift__libthrift__0.12.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.xbean--xbean-asm9-shaded--org.apache.xbean__xbean-asm9-shaded__4.20.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.yetus--audience-annotations--org.apache.yetus__audience-annotations__0.5.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.zookeeper--zookeeper--org.apache.zookeeper__zookeeper__3.6.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.apache.zookeeper--zookeeper-jute--org.apache.zookeeper__zookeeper-jute__3.6.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.checkerframework--checker-qual--org.checkerframework__checker-qual__3.5.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.codehaus.jackson--jackson-core-asl--org.codehaus.jackson__jackson-core-asl__1.9.13.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.codehaus.jackson--jackson-mapper-asl--org.codehaus.jackson__jackson-mapper-asl__1.9.13.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.codehaus.janino--commons-compiler--org.codehaus.janino__commons-compiler__3.0.16.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.codehaus.janino--janino--org.codehaus.janino__janino__3.0.16.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.datanucleus--datanucleus-api-jdo--org.datanucleus__datanucleus-api-jdo__4.2.4.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.datanucleus--datanucleus-core--org.datanucleus__datanucleus-core__4.1.17.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.datanucleus--datanucleus-rdbms--org.datanucleus__datanucleus-rdbms__4.1.19.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.datanucleus--javax.jdo--org.datanucleus__javax.jdo__3.2.0-m3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-client--org.eclipse.jetty__jetty-client__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-continuation--org.eclipse.jetty__jetty-continuation__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-http--org.eclipse.jetty__jetty-http__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-io--org.eclipse.jetty__jetty-io__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-jndi--org.eclipse.jetty__jetty-jndi__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-plus--org.eclipse.jetty__jetty-plus__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-proxy--org.eclipse.jetty__jetty-proxy__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-security--org.eclipse.jetty__jetty-security__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-server--org.eclipse.jetty__jetty-server__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-servlet--org.eclipse.jetty__jetty-servlet__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-servlets--org.eclipse.jetty__jetty-servlets__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-util--org.eclipse.jetty__jetty-util__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-util-ajax--org.eclipse.jetty__jetty-util-ajax__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-webapp--org.eclipse.jetty__jetty-webapp__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty--jetty-xml--org.eclipse.jetty__jetty-xml__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty.websocket--websocket-api--org.eclipse.jetty.websocket__websocket-api__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty.websocket--websocket-client--org.eclipse.jetty.websocket__websocket-client__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty.websocket--websocket-common--org.eclipse.jetty.websocket__websocket-common__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty.websocket--websocket-server--org.eclipse.jetty.websocket__websocket-server__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.eclipse.jetty.websocket--websocket-servlet--org.eclipse.jetty.websocket__websocket-servlet__9.4.43.v20210629.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.fusesource.leveldbjni--leveldbjni-all--org.fusesource.leveldbjni__leveldbjni-all__1.8.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.hk2--hk2-api--org.glassfish.hk2__hk2-api__2.6.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.hk2--hk2-locator--org.glassfish.hk2__hk2-locator__2.6.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.hk2--hk2-utils--org.glassfish.hk2__hk2-utils__2.6.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.hk2--osgi-resource-locator--org.glassfish.hk2__osgi-resource-locator__1.0.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.hk2.external--aopalliance-repackaged--org.glassfish.hk2.external__aopalliance-repackaged__2.6.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.hk2.external--jakarta.inject--org.glassfish.hk2.external__jakarta.inject__2.6.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.jaxb--jaxb-runtime--org.glassfish.jaxb__jaxb-runtime__2.3.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.jersey.containers--jersey-container-servlet--org.glassfish.jersey.containers__jersey-container-servlet__2.34.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.jersey.containers--jersey-container-servlet-core--org.glassfish.jersey.containers__jersey-container-servlet-core__2.34.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.jersey.core--jersey-client--org.glassfish.jersey.core__jersey-client__2.34.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.jersey.core--jersey-common--org.glassfish.jersey.core__jersey-common__2.34.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.jersey.core--jersey-server--org.glassfish.jersey.core__jersey-server__2.34.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.glassfish.jersey.inject--jersey-hk2--org.glassfish.jersey.inject__jersey-hk2__2.34.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.hibernate.validator--hibernate-validator--org.hibernate.validator__hibernate-validator__6.1.0.Final.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.javassist--javassist--org.javassist__javassist__3.25.0-GA.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.jboss.logging--jboss-logging--org.jboss.logging__jboss-logging__3.3.2.Final.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.jdbi--jdbi--org.jdbi__jdbi__2.63.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.jetbrains--annotations--org.jetbrains__annotations__17.0.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.joda--joda-convert--org.joda__joda-convert__1.7.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.jodd--jodd-core--org.jodd__jodd-core__3.5.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.json4s--json4s-ast_2.12--org.json4s__json4s-ast_2.12__3.7.0-M11.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.json4s--json4s-core_2.12--org.json4s__json4s-core_2.12__3.7.0-M11.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.json4s--json4s-jackson_2.12--org.json4s__json4s-jackson_2.12__3.7.0-M11.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.json4s--json4s-scalap_2.12--org.json4s__json4s-scalap_2.12__3.7.0-M11.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.lz4--lz4-java--org.lz4__lz4-java__1.7.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.mariadb.jdbc--mariadb-java-client--org.mariadb.jdbc__mariadb-java-client__2.2.5.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.objenesis--objenesis--org.objenesis__objenesis__2.5.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.postgresql--postgresql--org.postgresql__postgresql__42.2.19.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.roaringbitmap--RoaringBitmap--org.roaringbitmap__RoaringBitmap__0.9.14.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.roaringbitmap--shims--org.roaringbitmap__shims__0.9.14.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.rocksdb--rocksdbjni--org.rocksdb__rocksdbjni__6.24.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.rosuda.REngine--REngine--org.rosuda.REngine__REngine__2.1.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scala-lang--scala-compiler_2.12--org.scala-lang__scala-compiler__2.12.14.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scala-lang--scala-library_2.12--org.scala-lang__scala-library__2.12.14.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scala-lang--scala-reflect_2.12--org.scala-lang__scala-reflect__2.12.14.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scala-lang.modules--scala-collection-compat_2.12--org.scala-lang.modules__scala-collection-compat_2.12__2.4.3.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scala-lang.modules--scala-parser-combinators_2.12--org.scala-lang.modules__scala-parser-combinators_2.12__1.1.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scala-lang.modules--scala-xml_2.12--org.scala-lang.modules__scala-xml_2.12__1.2.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scala-sbt--test-interface--org.scala-sbt__test-interface__1.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scalacheck--scalacheck_2.12--org.scalacheck__scalacheck_2.12__1.14.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scalactic--scalactic_2.12--org.scalactic__scalactic_2.12__3.0.8.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scalanlp--breeze-macros_2.12--org.scalanlp__breeze-macros_2.12__1.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scalanlp--breeze_2.12--org.scalanlp__breeze_2.12__1.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.scalatest--scalatest_2.12--org.scalatest__scalatest_2.12__3.0.8.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.slf4j--jcl-over-slf4j--org.slf4j__jcl-over-slf4j__1.7.30.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.slf4j--jul-to-slf4j--org.slf4j__jul-to-slf4j__1.7.30.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.slf4j--slf4j-api--org.slf4j__slf4j-api__1.7.30.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.slf4j--slf4j-log4j12--org.slf4j__slf4j-log4j12__1.7.30.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.spark-project.spark--unused--org.spark-project.spark__unused__1.0.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.threeten--threeten-extra--org.threeten__threeten-extra__1.5.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.tukaani--xz--org.tukaani__xz__1.8.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.typelevel--algebra_2.12--org.typelevel__algebra_2.12__2.0.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.typelevel--cats-kernel_2.12--org.typelevel__cats-kernel_2.12__2.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.typelevel--macro-compat_2.12--org.typelevel__macro-compat_2.12__1.1.1.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.typelevel--spire-macros_2.12--org.typelevel__spire-macros_2.12__0.17.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.typelevel--spire-platform_2.12--org.typelevel__spire-platform_2.12__0.17.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.typelevel--spire-util_2.12--org.typelevel__spire-util_2.12__0.17.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.typelevel--spire_2.12--org.typelevel__spire_2.12__0.17.0.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.wildfly.openssl--wildfly-openssl--org.wildfly.openssl__wildfly-openssl__1.0.7.Final.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.xerial--sqlite-jdbc--org.xerial__sqlite-jdbc__3.8.11.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.xerial.snappy--snappy-java--org.xerial.snappy__snappy-java__1.1.8.4.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--org.yaml--snakeyaml--org.yaml__snakeyaml__1.24.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--oro--oro--oro__oro__2.0.8.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--pl.edu.icm--JLargeArrays--pl.edu.icm__JLargeArrays__1.5.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--software.amazon.ion--ion-java--software.amazon.ion__ion-java__1.0.2.jar:/databricks/jars/----workspace_spark_3_2--maven-trees--hive-2.3__hadoop-3.2--stax--stax-api--stax__stax-api__1.0.1.jar:/databricks/jars/----workspace_spark_3_2--mllib--libmllib_resources.jar:/databricks/jars/----workspace_spark_3_2--mllib--mllib-hive-2.3__hadoop-3.2_2.12_deploy_shaded.jar:/databricks/jars/----workspace_spark_3_2--mllib--org.jpmml__pmml-model__1.4.8_shaded-for-mllib-hive-2.3__hadoop-3.2.jar:/databricks/jars/----workspace_spark_3_2--mllib-local--mllib-local-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--patched-hive-with-glue--hive-12679-patch-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--patched-hive-with-glue--hive-common_shaded.jar:/databricks/jars/----workspace_spark_3_2--patched-hive-with-glue--hive-exec-core_shaded.jar:/databricks/jars/----workspace_spark_3_2--patched-hive-with-glue--hive-metastore_shaded.jar:/databricks/jars/----workspace_spark_3_2--py4j--py4j-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--repl--repl-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--sql--catalyst--catalyst-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--sql--catalyst--libcatalyst_resources.jar:/databricks/jars/----workspace_spark_3_2--sql--catalyst--libspark-sql-parser-compiled.jar:/databricks/jars/----workspace_spark_3_2--sql--catalyst--proto-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--sql--core--core-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--sql--core--libcore_resources.jar:/databricks/jars/----workspace_spark_3_2--sql--core--proto_deploy.jar:/databricks/jars/----workspace_spark_3_2--sql--core--spark-sql-databricks-command-parser_java_out.srcjar:/databricks/jars/----workspace_spark_3_2--sql--hive--hive-hive-2.3__hadoop-3.2_2.12_deploy_shaded.jar:/databricks/jars/----workspace_spark_3_2--sql--hive--libhive_resources.jar:/databricks/jars/----workspace_spark_3_2--sql--hive--org.apache.commons__commons-pool2__2.6.2_shaded-for-hive-hive-2.3__hadoop-3.2.jar:/databricks/jars/----workspace_spark_3_2--sql--hive-thriftserver--hive-thriftserver-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--sql--hive-thriftserver--hive-thriftserver-protocol-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--streaming--libstreaming_resources.jar:/databricks/jars/----workspace_spark_3_2--streaming--streaming-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--third_party--bigquery-connector--fatJar-assembly-0.22.2-SNAPSHOT.jar_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--bigquery-connector--gcs-connector-hadoop2-2.0.0-shaded.jar_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.azure__azure-core-http-netty__1.6.1_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.azure__azure-core__1.8.1_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.azure__azure-identity__1.1.3_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.azure__azure-security-keyvault-keys__4.2.1_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.fasterxml.jackson.dataformat__jackson-dataformat-xml__2.11.2_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.fasterxml.jackson.datatype__jackson-datatype-jsr310__2.11.2_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.fasterxml.jackson.module__jackson-module-jaxb-annotations__2.11.2_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.fasterxml.woodstox__woodstox-core__6.2.1_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.fasterxml__aalto-xml__1.0.0_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.github.stephenc.jcip__jcip-annotations__1.0-1_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.madgag.spongycastle__core__1.54.0.0_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.microsoft.azure__msal4j-persistence-extension__1.0.0_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.microsoft.azure__msal4j__1.7.1_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.microsoft.sqlserver__mssql-jdbc__9.2.1.jre8_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.nimbusds__content-type__2.0_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.nimbusds__lang-tag__1.4.4_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.nimbusds__nimbus-jose-jwt__8.8_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--com.nimbusds__oauth2-oidc-sdk__7.1.1_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-buffer__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-codec-http2__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-codec-http__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-codec-socks__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-codec__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-common__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-handler-proxy__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-handler__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-resolver__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-tcnative-boringssl-static__2.0.31.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-transport-native-epoll-linux-x86_64__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-transport-native-kqueue-osx-x86_64__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-transport-native-unix-common__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.netty__netty-transport__4.1.51.Final_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.projectreactor.netty__reactor-netty__0.9.11.RELEASE_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--io.projectreactor__reactor-core__3.3.9.RELEASE_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--jakarta.activation__jakarta.activation-api__1.2.1_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--jakarta.xml.bind__jakarta.xml.bind-api__2.3.2_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--libmssql-shaded-meta-services_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--net.java.dev.jna__jna-platform__5.6.0_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--net.minidev__accessors-smart__1.2_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--net.minidev__json-smart__2.3_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.bouncycastle__bcprov-jdk15on__1.69_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.codehaus.woodstox__stax2-api__4.2.1_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.linguafranca.pwdb__KeePassJava2-dom__2.1.4_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.linguafranca.pwdb__KeePassJava2-jaxb__2.1.4_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.linguafranca.pwdb__KeePassJava2-kdb__2.1.4_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.linguafranca.pwdb__KeePassJava2-kdbx__2.1.4_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.linguafranca.pwdb__KeePassJava2-simple__2.1.4_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.linguafranca.pwdb__KeePassJava2__2.1.4_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.linguafranca.pwdb__database__2.1.4_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.nanohttpd__nanohttpd__2.3.1_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.ow2.asm__asm__5.0.4_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.reactivestreams__reactive-streams__1.0.3_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--org.simpleframework__simple-xml__2.7.1_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--stax__stax__1.2.0_shaded.jar:/databricks/jars/----workspace_spark_3_2--third_party--mssql--xpp3__xpp3__1.1.3.3_shaded.jar:/databricks/jars/----workspace_spark_3_2--vendor--avro--avro-hive-2.3__hadoop-3.2_2.12_deploy_shaded.jar:/databricks/jars/----workspace_spark_3_2--vendor--avro--io.confluent__common-utils__4.0.0_shaded-for-avro-hive-2.3__hadoop-3.2.jar:/databricks/jars/----workspace_spark_3_2--vendor--avro--io.confluent__kafka-schema-registry-client__4.0.0_shaded-for-avro-hive-2.3__hadoop-3.2.jar:/databricks/jars/----workspace_spark_3_2--vendor--avro--libavro_resources_shaded-for-avro-hive-2.3__hadoop-3.2.jar:/databricks/jars/----workspace_spark_3_2--vendor--file-notification-common--file-notification-common-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--vendor--file-notification-common--libfile-notification-common_resources.jar:/databricks/jars/----workspace_spark_3_2--vendor--iceberg--iceberg-converter_2.12_deploy_shaded.jar:/databricks/jars/----workspace_spark_3_2--vendor--iceberg--iceberg-spark3-runtime_shaded_shaded-for-iceberg-converter.jar:/databricks/jars/----workspace_spark_3_2--vendor--kafka-0-10-hive-2.3__hadoop-3.2_2.12_deploy_shaded.jar:/databricks/jars/----workspace_spark_3_2--vendor--kafka-0-10-token-provider--libkafka-0-10-token-provider_resources.jar:/databricks/jars/----workspace_spark_3_2--vendor--kafka-0-10-token-provider-unshaded-hive-2.3__hadoop-3.2_2.12_deploy_shaded-for-kafka-0-10-hive-2.3__hadoop-3.2.jar:/databricks/jars/----workspace_spark_3_2--vendor--libkafka-0-10-resources_shaded-for-kafka-0-10-hive-2.3__hadoop-3.2.jar:/databricks/jars/----workspace_spark_3_2--vendor--org.apache.kafka__kafka-clients__2.8.0_shaded-for-kafka-0-10-hive-2.3__hadoop-3.2.jar:/databricks/jars/----workspace_spark_3_2--vendor--redshift--redshift-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--vendor--spark-ganglia-lgpl--libmetrics-ganglia.jar:/databricks/jars/----workspace_spark_3_2--vendor--spark-ganglia-lgpl--spark-ganglia-lgpl-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--vendor--sql-aws-connectors--libsql-aws-connectors_resources.jar:/databricks/jars/----workspace_spark_3_2--vendor--sql-aws-connectors--sql-aws-connectors-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--vendor--sql-azure-connectors--libsql-azure-connectors_resources.jar:/databricks/jars/----workspace_spark_3_2--vendor--sql-azure-connectors--sql-azure-connectors-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--vendor--sql-dw--sql-dw-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/----workspace_spark_3_2--vendor--sql-gcp-connectors--libsql-gcp-connectors_resources.jar:/databricks/jars/----workspace_spark_3_2--vendor--sql-gcp-connectors--sql-gcp-connectors-hive-2.3__hadoop-3.2_2.12_deploy.jar:/databricks/jars/acl--api--helpers--helpers-spark_3.2_2.12_deploy.jar:/databricks/jars/acl--auth--api--api-spark_3.2_2.12_deploy.jar:/databricks/jars/api--common--workspace-spark_3.2_2.12_deploy.jar:/databricks/jars/api--rpc--rpc_parser-spark_3.2_2.12_deploy.jar:/databricks/jars/api-base--api-base-spark_3.2_2.12_deploy.jar:/databricks/jars/api-base--api-base_java-spark_3.2_2.12_deploy.jar:/databricks/jars/armeria-reference-service--logging--request_proto-spark_3.2_2.12-scalabp.jar:/databricks/jars/central--api--api-spark_3.2_2.12_deploy.jar:/databricks/jars/chauffeur-api--api--endpoints--endpoints-spark_3.2_2.12_deploy.jar:/databricks/jars/chauffeur-api--chauffeur-api-spark_3.2_2.12_deploy.jar:/databricks/jars/cluster-common--storage-context-type-spark_3.2_2.12_deploy.jar:/databricks/jars/cmdexec--api--api-experimental-proto-spark_3.2_2.12_deploy.jar:/databricks/jars/cmdexec--api--api-experimental-spark_3.2_2.12_deploy.jar:/databricks/jars/common--attribution-context--attribution-context-spark_3.2_2.12_deploy.jar:/databricks/jars/common--build-info--build-info-spark_3.2_2.12_deploy.jar:/databricks/jars/common--client--client-spark_3.2_2.12_deploy.jar:/databricks/jars/common--cloudstorage--bucketversioningstatus-spark_3.2_2.12_deploy.jar:/databricks/jars/common--cloudstorage--presigned-url-spark_3.2_2.12_deploy.jar:/databricks/jars/common--command-exceptions--command-exceptions-spark_3.2_2.12_deploy.jar:/databricks/jars/common--command-result--command-result-spark_3.2_2.12_deploy.jar:/databricks/jars/common--common-spark_3.2_2.12_deploy.jar:/databricks/jars/common--conf--base-spark_3.2_2.12_deploy.jar:/databricks/jars/common--conf--blacklist--blacklist-spark_3.2_2.12_deploy.jar:/databricks/jars/common--conf--deployer--deployer-spark_3.2_2.12_deploy.jar:/databricks/jars/common--conf--materializer--materializer-spark_3.2_2.12_deploy.jar:/databricks/jars/common--conf--project--project-spark_3.2_2.12_deploy.jar:/databricks/jars/common--conf--project-conf--project-conf-spark_3.2_2.12_deploy.jar:/databricks/jars/common--conf--trusted--trusted-spark_3.2_2.12_deploy.jar:/databricks/jars/common--credentials--credentials-spark_3.2_2.12_deploy.jar:/databricks/jars/common--crypto-providers--amazon-corretto-crypto-provider--libamazon-corretto-crypto-provider.jar:/databricks/jars/common--database--metrics--metrics-spark_3.2_2.12_deploy.jar:/databricks/jars/common--dbsql-config--dbsql-config-spark_3.2_2.12_deploy.jar:/databricks/jars/common--destroyable--destroyable-spark_3.2_2.12_deploy.jar:/databricks/jars/common--doughnut-buffer--doughnut-buffer-spark_3.2_2.12_deploy.jar:/databricks/jars/common--encryption--com.amazonaws__aws-encryption-sdk-java__2.3.2_shaded.jar:/databricks/jars/common--encryption--com.azure__azure-core-http-netty__1.10.2_shaded.jar:/databricks/jars/common--encryption--com.azure__azure-core__1.19.0_shaded.jar:/databricks/jars/common--encryption--com.azure__azure-identity__1.3.5_shaded.jar:/databricks/jars/common--encryption--com.azure__azure-security-keyvault-keys__4.3.2_shaded.jar:/databricks/jars/common--encryption--com.azure__azure-security-keyvault-secrets__4.3.2_shaded.jar:/databricks/jars/common--encryption--com.fasterxml.jackson.core__jackson-annotations__2.12.4_shaded.jar:/databricks/jars/common--encryption--com.fasterxml.jackson.core__jackson-core__2.6.0_shaded.jar:/databricks/jars/common--encryption--com.fasterxml.jackson.core__jackson-databind__2.12.4_shaded.jar:/databricks/jars/common--encryption--com.fasterxml.jackson.dataformat__jackson-dataformat-xml__2.12.4_shaded.jar:/databricks/jars/common--encryption--com.fasterxml.jackson.datatype__jackson-datatype-jsr310__2.12.4_shaded.jar:/databricks/jars/common--encryption--com.microsoft.azure__azure-keyvault-core__1.0.0_shaded.jar:/databricks/jars/common--encryption--com.microsoft.azure__azure-storage__7.0.1_shaded.jar:/databricks/jars/common--encryption--com.microsoft.azure__msal4j__1.10.1_shaded.jar:/databricks/jars/common--encryption--com.nimbusds__content-type__2.1_shaded.jar:/databricks/jars/common--encryption--com.nimbusds__lang-tag__1.5_shaded.jar:/databricks/jars/common--encryption--com.nimbusds__nimbus-jose-jwt__9.9.3_shaded.jar:/databricks/jars/common--encryption--com.nimbusds__oauth2-oidc-sdk__9.7_shaded.jar:/databricks/jars/common--encryption--cpk_encryption_utils-spark_3.2_2.12_deploy.jar:/databricks/jars/common--encryption--io.netty__netty-buffer__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-codec-dns__4.1.65.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-codec-http2__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-codec-http__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-codec-socks__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-codec__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-common__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-handler-proxy__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-handler__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-resolver-dns-native-macos-osx-x86_64__4.1.65.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-resolver-dns__4.1.65.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-resolver__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-tcnative-boringssl-static__2.0.40.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-transport-native-epoll-linux-x86_64__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-transport-native-kqueue-osx-x86_64__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-transport-native-unix-common__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.netty__netty-transport__4.1.66.Final_shaded.jar:/databricks/jars/common--encryption--io.projectreactor.netty__reactor-netty-core__1.0.9_shaded.jar:/databricks/jars/common--encryption--io.projectreactor.netty__reactor-netty-http-brave__1.0.9_shaded.jar:/databricks/jars/common--encryption--io.projectreactor.netty__reactor-netty-http__1.0.9_shaded.jar:/databricks/jars/common--encryption--io.projectreactor.netty__reactor-netty__1.0.9_shaded.jar:/databricks/jars/common--encryption--io.projectreactor__reactor-core__3.4.8_shaded.jar:/databricks/jars/common--encryption--libmeta-services.jar:/databricks/jars/common--encryption--net.minidev__accessors-smart__2.4.7_shaded.jar:/databricks/jars/common--encryption--net.minidev__json-smart__2.4.7_shaded.jar:/databricks/jars/common--encryption--org.json__json__20170516_shaded.jar:/databricks/jars/common--encryption--org.reactivestreams__reactive-streams__1.0.3_shaded.jar:/databricks/jars/common--encryption--types--types-spark_3.2_2.12_deploy.jar:/databricks/jars/common--entities--entities-spark_3.2_2.12_deploy.jar:/databricks/jars/common--error-code--error-code-spark_3.2_2.12_deploy.jar:/databricks/jars/common--hadoop--hadoop-spark_3.2_2.12_deploy.jar:/databricks/jars/common--http--headers--headers-spark_3.2_2.12_deploy.jar:/databricks/jars/common--http--multipart-form--multipart-form-spark_3.2_2.12_deploy.jar:/databricks/jars/common--http--service--service-spark_3.2_2.12_deploy.jar:/databricks/jars/common--http--util--util-spark_3.2_2.12_deploy.jar:/databricks/jars/common--instrumentation--cache-exporter--cache-exporter-spark_3.2_2.12_deploy.jar:/databricks/jars/common--instrumentation--error-state-recorder--error-state-recorder-spark_3.2_2.12_deploy.jar:/databricks/jars/common--instrumentation--instrumentation-spark_3.2_2.12_deploy.jar:/databricks/jars/common--instrumentation--servlets--servlets-spark_3.2_2.12_deploy.jar:/databricks/jars/common--instrumentation--stack-trace-reporter--stack-trace-reporter-spark_3.2_2.12_deploy.jar:/databricks/jars/common--instrumentation--thread-pool--thread-pool-spark_3.2_2.12_deploy.jar:/databricks/jars/common--java-flight-recorder--java-flight-recorder-spark_3.2_2.12_deploy.jar:/databricks/jars/common--jetty--client--client-spark_3.2_2.12_deploy.jar:/databricks/jars/common--jetty--server--server-spark_3.2_2.12_deploy.jar:/databricks/jars/common--jetty--shared--shared-spark_3.2_2.12_deploy.jar:/databricks/jars/common--jupyter-utils--jupyter_utils-spark_3.2_2.12_deploy.jar:/databricks/jars/common--jws-data--jws-data-spark_3.2_2.12_deploy.jar:/databricks/jars/common--lazy--lazy-spark_3.2_2.12_deploy.jar:/databricks/jars/common--libcommon_resources.jar:/databricks/jars/common--logging--all--all-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--appenders--appenders-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--appenders--service--service-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--audit--audit-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--audit-event--audit-event-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--billing--billing-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--filters--filters-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--filters--service--service-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--lineage--lineage-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--logging-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--metrics--metrics-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--optypes--optypes-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--redactor--redactor-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--stats--stats-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--tags--tags-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--throttled-logger--throttled-logger-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--topic--topic-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--usage--async--async-spark_3.2_2.12_deploy.jar:/databricks/jars/common--logging--usage--usage-spark_3.2_2.12_deploy.jar:/databricks/jars/common--login-credentials--login-credentials-spark_3.2_2.12_deploy.jar:/databricks/jars/common--main--main-spark_3.2_2.12_deploy.jar:/databricks/jars/common--manifest-info--manifest-info-spark_3.2_2.12_deploy.jar:/databricks/jars/common--network--network-spark_3.2_2.12_deploy.jar:/databricks/jars/common--node-types--node-types-spark_3.2_2.12_deploy.jar:/databricks/jars/common--org-id-conf--org-id-conf-spark_3.2_2.12_deploy.jar:/databricks/jars/common--path--path-spark_3.2_2.12_deploy.jar:/databricks/jars/common--pricing--pricing-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rate-limiter--rate-limiter-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rcp--rcp-spark_3.2_2.12_deploy.jar:/databricks/jars/common--reflection--reflection-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--checksum--checksum-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--client--client-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--clienttype--clienttype-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--draining--draining-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--exceptions--exceptions-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--hooks--hooks-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--metrics--metrics-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--metrics--server_metrics_recorder-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--metrics--tls_metrics-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--proxy--proxy-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--rpc-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--server--server-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--ssl--ssl-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--util--util-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc--validator--validator-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc-conf-spark_3.2_2.12_deploy.jar:/databricks/jars/common--rpc-context--rpc-context-spark_3.2_2.12_deploy.jar:/databricks/jars/common--scalalogging--scalalogging-spark_3.2_2.12_deploy.jar:/databricks/jars/common--shutdown-hook-manager--shutdown-hook-manager-spark_3.2_2.12_deploy.jar:/databricks/jars/common--storage--storage-spark_3.2_2.12_deploy.jar:/databricks/jars/common--storage-driver-utils--com.github.vladimir-bukhtoyarov__rolling-metrics__2.0.5_shaded.jar:/databricks/jars/common--storage-driver-utils--org.hdrhistogram__HdrHistogram__2.1.12_shaded.jar:/databricks/jars/common--storage-driver-utils--storage_driver_utils-spark_3.2_2.12_deploy.jar:/databricks/jars/common--threading--executor--executor-spark_3.2_2.12_deploy.jar:/databricks/jars/common--threading--future--future-spark_3.2_2.12_deploy.jar:/databricks/jars/common--threading--idle-indicator--idle-indicator-spark_3.2_2.12_deploy.jar:/databricks/jars/common--threading--parallel-run-helper--parallel-run-helper-spark_3.2_2.12_deploy.jar:/databricks/jars/common--threading--singleton-job--singleton-job-spark_3.2_2.12_deploy.jar:/databricks/jars/common--threading--threading-spark_3.2_2.12_deploy.jar:/databricks/jars/common--threading--timer--timer-spark_3.2_2.12_deploy.jar:/databricks/jars/common--tracing--tracing-spark_3.2_2.12_deploy.jar:/databricks/jars/common--tracing--util--span_util-spark_3.2_2.12_deploy.jar:/databricks/jars/common--tracing--util--util-spark_3.2_2.12_deploy.jar:/databricks/jars/common--user-context--user-context-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--cache-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--command-line-helper-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--emailclient-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--file-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--guid-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--locks-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--math-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--named-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--network-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--random-string-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--remote-command-helper-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--service-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--shell-command-executor-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--simple-path-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--sliding-window-rate-limiter-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--string-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--time-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--untrusted-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--urihelper-spark_3.2_2.12_deploy.jar:/databricks/jars/common--util--with-spark_3.2_2.12_deploy.jar:/databricks/jars/common--workflow-progress--workflow-progress-spark_3.2_2.12_deploy.jar:/databricks/jars/credentials-manager--api--api-spark_3.2_2.12_deploy.jar:/databricks/jars/daemon--data--client--client-spark_3.2_2.12_deploy.jar:/databricks/jars/daemon--data--client--conf--conf-spark_3.2_2.12_deploy.jar:/databricks/jars/daemon--data--client--utils-spark_3.2_2.12_deploy.jar:/databricks/jars/daemon--data--data-common--data-common-spark_3.2_2.12_deploy.jar:/databricks/jars/dbfs--exceptions--exceptions-spark_3.2_2.12_deploy.jar:/databricks/jars/dbfs--utils--dbfs-utils-spark_3.2_2.12_deploy.jar:/databricks/jars/delta-sharing--api--api-base-spark_3.2_2.12_deploy.jar:/databricks/jars/delta-sharing--api--api-spark_3.2_2.12_deploy.jar:/databricks/jars/dummyservice--logging--request_proto-spark_3.2_2.12-scalabp.jar:/databricks/jars/extern--acl--auth--auth-spark_3.2_2.12_deploy.jar:/databricks/jars/extern--extern-spark_3.2_2.12_deploy.jar:/databricks/jars/extern--libaws-regions.jar:/databricks/jars/feature-flag--client--client-spark_3.2_2.12_deploy.jar:/databricks/jars/feature-flag--client--experimentation_lib-spark_3.2_2.12_deploy.jar:/databricks/jars/feature-flag--client--experimentation_protos-spark_3.2_2.12-scalabp.jar:/databricks/jars/jsonutil--base-object-mapper-spark_3.2_2.12_deploy.jar:/databricks/jars/jsonutil--jsonutil-spark_3.2_2.12_deploy.jar:/databricks/jars/libraries--api--managedLibraries--managedLibraries-spark_3.2_2.12_deploy.jar:/databricks/jars/libraries--api--typemappers--typemappers-spark_3.2_2.12_deploy.jar:/databricks/jars/libraries--libraries-spark_3.2_2.12_deploy.jar:/databricks/jars/logging--activity--builder-spark_3.2_2.12_deploy.jar:/databricks/jars/logging--activity--context-spark_3.2_2.12_deploy.jar:/databricks/jars/logging--activity--logger-spark_3.2_2.12_deploy.jar:/databricks/jars/logging--activity--proto--request_activity_proto-spark_3.2_2.12-scalabp.jar:/databricks/jars/logging--log4j-mod--log4j-mod-spark_3.2_2.12_deploy.jar:/databricks/jars/logging--proto--eventid_proto-spark_3.2_2.12-scalabp.jar:/databricks/jars/logging--utils--logging-utils-spark_3.2_2.12_deploy.jar:/databricks/jars/macros--ratelimitedlogger--ratelimitedlogger-spark_3.2_2.12_deploy.jar:/databricks/jars/macros--sourcecode--sourcecode-spark_3.2_2.12_deploy.jar:/databricks/jars/managed-catalog--api--api-spark_3.2_2.12_deploy.jar:/databricks/jars/managed-catalog--api--api_common-spark_3.2_2.12_deploy.jar:/databricks/jars/manifest.jar:/databricks/jars/maven-trees--amazon-corretto-crypto-provider--software.amazon.cryptools--AmazonCorrettoCryptoProvider-linux-x86_64--software.amazon.cryptools__AmazonCorrettoCryptoProvider-linux-x86_64__1.4.0.jar:/databricks/jars/rcp--api--api-spark_3.2_2.12_deploy.jar:/databricks/jars/rcp--common-api--common-api-spark_3.2_2.12_deploy.jar:/databricks/jars/s3--s3-spark_3.2_2.12_deploy.jar:/databricks/jars/s3commit--api--api-spark_3.2_2.12_deploy.jar:/databricks/jars/s3commit--client--client-spark_3.2_2.12_deploy.jar:/databricks/jars/s3commit--common--common-spark_3.2_2.12_deploy.jar:/databricks/jars/secret-manager--api--api-spark_3.2_2.12_deploy.jar:/databricks/jars/secret-manager--libsecret-manager-crypto.jar:/databricks/jars/spark--command--command-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--common--spark-common-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--common-utils--utils-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--conf-reader--conf-reader_lib-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--dbutils--dbutils-api-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--driver--antlr--parser-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--driver--common--driver-common-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--driver--dbutils_impl--data--rendering--rendering-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--driver--display--display-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--driver--driver-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--driver--events-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--driver--ml--ml-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--driver--secret-redaction-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--driver--spark--resources-resources.jar:/databricks/jars/spark--logging--logging-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--sql-extension--sql-extension-spark_3.2_2.12_deploy.jar:/databricks/jars/spark--versions--3.2--shim_2.12_deploy.jar:/databricks/jars/spark--versions--3.2--spark_2.12_deploy.jar:/databricks/jars/spark--warmuptracing--libinterceptor_3.2.jar:/databricks/jars/spark--warmuptracing--traced_proxies_3.2.jar:/databricks/jars/sqlgateway--common--api--api-spark_3.2_2.12_deploy.jar:/databricks/jars/sqlgateway--common--endpoint_id-spark_3.2_2.12_deploy.jar:/databricks/jars/sqlgateway--history--api--api-spark_3.2_2.12_deploy.jar:/databricks/jars/sqlgateway--history--api--client-spark_3.2_2.12_deploy.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.aayushatharva.brotli4j_brotli4j_com.aayushatharva.brotli4j__brotli4j__1.6.0_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.fasterxml.jackson.core_jackson-annotations_com.fasterxml.jackson.core__jackson-annotations__2.13.1_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.fasterxml.jackson.core_jackson-core_com.fasterxml.jackson.core__jackson-core__2.13.1_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.fasterxml.jackson.core_jackson-databind_com.fasterxml.jackson.core__jackson-databind__2.13.1_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.google.android_annotations_com.google.android__annotations__4.1.1.4_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.google.api.grpc_proto-google-common-protos_com.google.api.grpc__proto-google-common-protos__2.0.1_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.google.code.findbugs_jsr305_com.google.code.findbugs__jsr305__3.0.2_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.google.code.gson_gson_com.google.code.gson__gson__2.8.9_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.google.errorprone_error_prone_annotations_com.google.errorprone__error_prone_annotations__2.9.0_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.google.guava_failureaccess_com.google.guava__failureaccess__1.0.1_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.google.guava_guava_com.google.guava__guava__30.1.1-android_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.google.guava_listenablefuture_com.google.guava__listenablefuture__9999.0-empty-to-avoid-conflict-with-guava_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.google.j2objc_j2objc-annotations_com.google.j2objc__j2objc-annotations__1.3_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.google.protobuf_protobuf-java-util_com.google.protobuf__protobuf-java-util__3.19.2_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.google.protobuf_protobuf-java_com.google.protobuf__protobuf-java__3.19.2_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.linecorp.armeria_armeria-grpc-protocol_com.linecorp.armeria__armeria-grpc-protocol__1.14.1_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.linecorp.armeria_armeria-grpc_com.linecorp.armeria__armeria-grpc__1.14.1_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_com.linecorp.armeria_armeria_com.linecorp.armeria__armeria__1.14.1_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.grpc_grpc-api_io.grpc__grpc-api__1.43.2_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.grpc_grpc-context_io.grpc__grpc-context__1.43.2_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.grpc_grpc-core_io.grpc__grpc-core__1.43.2_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.grpc_grpc-protobuf-lite_io.grpc__grpc-protobuf-lite__1.43.2_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.grpc_grpc-protobuf_io.grpc__grpc-protobuf__1.43.2_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.grpc_grpc-services_io.grpc__grpc-services__1.43.2_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.grpc_grpc-stub_io.grpc__grpc-stub__1.43.2_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.micrometer_micrometer-core_io.micrometer__micrometer-core__1.8.2_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-buffer_io.netty__netty-buffer__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-codec-dns_io.netty__netty-codec-dns__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-codec-haproxy_io.netty__netty-codec-haproxy__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-codec-http2_io.netty__netty-codec-http2__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-codec-http_io.netty__netty-codec-http__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-codec-socks_io.netty__netty-codec-socks__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-codec_io.netty__netty-codec__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-common_io.netty__netty-common__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-handler-proxy_io.netty__netty-handler-proxy__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-handler_io.netty__netty-handler__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-resolver-dns_io.netty__netty-resolver-dns__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-resolver_io.netty__netty-resolver__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-tcnative-classes_io.netty__netty-tcnative-classes__2.0.46.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.netty_netty-transport_io.netty__netty-transport__4.1.73.Final_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_io.perfmark_perfmark-api_io.perfmark__perfmark-api__0.23.0_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_liball_deps_2.12_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_net.bytebuddy_byte-buddy_net.bytebuddy__byte-buddy__1.12.6_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_org.checkerframework_checker-compat-qual_org.checkerframework__checker-compat-qual__2.5.5_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_org.curioswitch.curiostack_protobuf-jackson_org.curioswitch.curiostack__protobuf-jackson__2.0.0_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_org.hdrhistogram_HdrHistogram_org.hdrhistogram__HdrHistogram__2.1.12_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_org.joda_joda-convert_org.joda__joda-convert__2.2.1_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_org.latencyutils_LatencyUtils_org.latencyutils__LatencyUtils__2.0.3_shaded.jar:/databricks/jars/third_party--armeria--maven-trees_armeria_org.reactivestreams_reactive-streams_org.reactivestreams__reactive-streams__1.0.3_shaded.jar:/databricks/jars/third_party--armeria--service_discovery-resources.jar:/databricks/jars/third_party--azure--com.fasterxml.jackson.core__jackson-core__2.7.2_shaded.jar:/databricks/jars/third_party--azure--com.microsoft.azure__azure-client-runtime__1.7.12_container_shaded.jar:/databricks/jars/third_party--azure--com.microsoft.azure__azure-keyvault-core__1.0.0_shaded.jar:/databricks/jars/third_party--azure--com.microsoft.azure__azure-storage__8.6.4_shaded.jar:/databricks/jars/third_party--azure--com.microsoft.rest__client-runtime__1.7.12_container_shaded.jar:/databricks/jars/third_party--azure--org.apache.commons__commons-lang3__3.4_shaded.jar:/databricks/jars/third_party--datalake--datalake-spark_3.2_2.12_deploy.jar:/databricks/jars/third_party--dropwizard-metrics-log4j-v3.2.6--metrics-log4j-spark_3.2_2.12_deploy.jar:/databricks/jars/third_party--gcp-java--animal-sniffer-annotations_shaded.jar:/databricks/jars/third_party--gcp-java--api-common_shaded.jar:/databricks/jars/third_party--gcp-java--auto-value-annotations_shaded.jar:/databricks/jars/third_party--gcp-java--checker-compat-qual_shaded.jar:/databricks/jars/third_party--gcp-java--commons-codec_shaded.jar:/databricks/jars/third_party--gcp-java--commons-lang3_shaded.jar:/databricks/jars/third_party--gcp-java--commons-logging_shaded.jar:/databricks/jars/third_party--gcp-java--error_prone_annotations_shaded.jar:/databricks/jars/third_party--gcp-java--failureaccess_shaded.jar:/databricks/jars/third_party--gcp-java--gax-grpc_shaded.jar:/databricks/jars/third_party--gcp-java--gax-httpjson_shaded.jar:/databricks/jars/third_party--gcp-java--gax_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-client_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-cloudkms_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-cloudresourcemanager_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-compute_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-container_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-deploymentmanager_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-dns_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-iam_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-networkmanagement_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-servicecontrol_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-serviceusage_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-storage_shaded.jar:/databricks/jars/third_party--gcp-java--google-api-services-sts_shaded.jar:/databricks/jars/third_party--gcp-java--google-auth-library-credentials_shaded.jar:/databricks/jars/third_party--gcp-java--google-auth-library-oauth2-http_shaded.jar:/databricks/jars/third_party--gcp-java--google-cloud-core-http_shaded.jar:/databricks/jars/third_party--gcp-java--google-cloud-core_shaded.jar:/databricks/jars/third_party--gcp-java--google-cloud-notification_shaded.jar:/databricks/jars/third_party--gcp-java--google-cloud-pubsub_shaded.jar:/databricks/jars/third_party--gcp-java--google-cloud-storage_shaded.jar:/databricks/jars/third_party--gcp-java--google-http-client-apache-v2_shaded.jar:/databricks/jars/third_party--gcp-java--google-http-client-appengine_shaded.jar:/databricks/jars/third_party--gcp-java--google-http-client-jackson2_shaded.jar:/databricks/jars/third_party--gcp-java--google-http-client_shaded.jar:/databricks/jars/third_party--gcp-java--google-oauth-client_shaded.jar:/databricks/jars/third_party--gcp-java--grpc-alts_shaded.jar:/databricks/jars/third_party--gcp-java--grpc-api_shaded.jar:/databricks/jars/third_party--gcp-java--grpc-auth_shaded.jar:/databricks/jars/third_party--gcp-java--grpc-context_shaded.jar:/databricks/jars/third_party--gcp-java--grpc-core_shaded.jar:/databricks/jars/third_party--gcp-java--grpc-grpclb_shaded.jar:/databricks/jars/third_party--gcp-java--grpc-netty-shaded_shaded.jar:/databricks/jars/third_party--gcp-java--grpc-protobuf-lite_shaded.jar:/databricks/jars/third_party--gcp-java--grpc-protobuf_shaded.jar:/databricks/jars/third_party--gcp-java--grpc-stub_shaded.jar:/databricks/jars/third_party--gcp-java--gson_shaded.jar:/databricks/jars/third_party--gcp-java--guava_shaded.jar:/databricks/jars/third_party--gcp-java--httpclient_shaded.jar:/databricks/jars/third_party--gcp-java--httpcore_shaded.jar:/databricks/jars/third_party--gcp-java--j2objc-annotations_shaded.jar:/databricks/jars/third_party--gcp-java--jackson-core_shaded.jar:/databricks/jars/third_party--gcp-java--javax.annotation-api_shaded.jar:/databricks/jars/third_party--gcp-java--jsr305_shaded.jar:/databricks/jars/third_party--gcp-java--listenablefuture_shaded.jar:/databricks/jars/third_party--gcp-java--opencensus-api_shaded.jar:/databricks/jars/third_party--gcp-java--opencensus-contrib-http-util_shaded.jar:/databricks/jars/third_party--gcp-java--perfmark-api_shaded.jar:/databricks/jars/third_party--gcp-java--proto-google-cloud-pubsub-v1_shaded.jar:/databricks/jars/third_party--gcp-java--proto-google-common-protos_shaded.jar:/databricks/jars/third_party--gcp-java--proto-google-iam-v1_shaded.jar:/databricks/jars/third_party--gcp-java--protobuf-java-util_shaded.jar:/databricks/jars/third_party--gcp-java--protobuf-java_shaded.jar:/databricks/jars/third_party--gcp-java--threetenbp_shaded.jar:/databricks/jars/third_party--gcs-private--animal-sniffer-annotations_shaded.jar:/databricks/jars/third_party--gcs-private--annotations_shaded.jar:/databricks/jars/third_party--gcs-private--auto-value-annotations_shaded.jar:/databricks/jars/third_party--gcs-private--checker-compat-qual_shaded.jar:/databricks/jars/third_party--gcs-private--checker-qual_shaded.jar:/databricks/jars/third_party--gcs-private--commons-codec_shaded.jar:/databricks/jars/third_party--gcs-private--commons-logging_shaded.jar:/databricks/jars/third_party--gcs-private--conscrypt-openjdk-uber_shaded.jar:/databricks/jars/third_party--gcs-private--error_prone_annotations_shaded.jar:/databricks/jars/third_party--gcs-private--failureaccess_shaded.jar:/databricks/jars/third_party--gcs-private--flogger-system-backend_shaded.jar:/databricks/jars/third_party--gcs-private--flogger_shaded.jar:/databricks/jars/third_party--gcs-private--gcs-connector-hadoop3_shaded.jar:/databricks/jars/third_party--gcs-private--gcs-shaded-spark_3.2_2.12_deploy.jar:/databricks/jars/third_party--gcs-private--gcsio-hadoop3_shaded.jar:/databricks/jars/third_party--gcs-private--google-api-client-jackson2_shaded.jar:/databricks/jars/third_party--gcs-private--google-api-client_shaded.jar:/databricks/jars/third_party--gcs-private--google-api-services-iamcredentials_shaded.jar:/databricks/jars/third_party--gcs-private--google-api-services-storage_shaded.jar:/databricks/jars/third_party--gcs-private--google-auth-library-credentials_shaded.jar:/databricks/jars/third_party--gcs-private--google-auth-library-oauth2-http_shaded.jar:/databricks/jars/third_party--gcs-private--google-extensions_shaded.jar:/databricks/jars/third_party--gcs-private--google-http-client-apache-v2_shaded.jar:/databricks/jars/third_party--gcs-private--google-http-client-gson_shaded.jar:/databricks/jars/third_party--gcs-private--google-http-client-jackson2_shaded.jar:/databricks/jars/third_party--gcs-private--google-http-client_shaded.jar:/databricks/jars/third_party--gcs-private--google-oauth-client_shaded.jar:/databricks/jars/third_party--gcs-private--grpc-alts_shaded.jar:/databricks/jars/third_party--gcs-private--grpc-api_shaded.jar:/databricks/jars/third_party--gcs-private--grpc-auth_shaded.jar:/databricks/jars/third_party--gcs-private--grpc-context_shaded.jar:/databricks/jars/third_party--gcs-private--grpc-core_shaded.jar:/databricks/jars/third_party--gcs-private--grpc-netty-shaded_shaded.jar:/databricks/jars/third_party--gcs-private--grpc-protobuf-lite_shaded.jar:/databricks/jars/third_party--gcs-private--grpc-protobuf_shaded.jar:/databricks/jars/third_party--gcs-private--grpc-stub_shaded.jar:/databricks/jars/third_party--gcs-private--gson_shaded.jar:/databricks/jars/third_party--gcs-private--guava_shaded.jar:/databricks/jars/third_party--gcs-private--httpclient_shaded.jar:/databricks/jars/third_party--gcs-private--httpcore_shaded.jar:/databricks/jars/third_party--gcs-private--j2objc-annotations_shaded.jar:/databricks/jars/third_party--gcs-private--jackson-core_shaded.jar:/databricks/jars/third_party--gcs-private--libmeta-services.jar:/databricks/jars/third_party--gcs-private--listenablefuture_shaded.jar:/databricks/jars/third_party--gcs-private--opencensus-api_shaded.jar:/databricks/jars/third_party--gcs-private--opencensus-contrib-http-util_shaded.jar:/databricks/jars/third_party--gcs-private--perfmark-api_shaded.jar:/databricks/jars/third_party--gcs-private--proto-google-common-protos_shaded.jar:/databricks/jars/third_party--gcs-private--proto-google-iam-v1_shaded.jar:/databricks/jars/third_party--gcs-private--protobuf-java-util_shaded.jar:/databricks/jars/third_party--gcs-private--protobuf-java_shaded.jar:/databricks/jars/third_party--gcs-private--util-hadoop-hadoop3_shaded.jar:/databricks/jars/third_party--gcs-private--util-hadoop3_shaded.jar:/databricks/jars/third_party--hadoop--hadoop-tools--hadoop-aws--lib-spark_3.2_2.12_deploy_shaded.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--aopalliance__aopalliance__1.0_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.fasterxml.jackson.core__jackson-annotations__2.7.0_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.fasterxml.jackson.core__jackson-core__2.7.2_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.fasterxml.jackson.core__jackson-core__2.7.2_2.12_shaded_20180920_b33d810_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.fasterxml.jackson.core__jackson-databind__2.7.2_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.fasterxml.jackson.datatype__jackson-datatype-joda__2.7.2_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.google.code.findbugs__jsr305__1.3.9_2.12_shaded_20180920_b33d810_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.google.guava__guava__11.0.2_2.12_shaded_20180920_b33d810_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.google.guava__guava__16.0_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.google.inject__guice__3.0_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.microsoft.azure__azure-annotations__1.2.0_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.microsoft.azure__azure-keyvault-core__1.0.0_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.microsoft.azure__azure-keyvault-core__1.0.0_2.12_shaded_20180920_b33d810_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.microsoft.azure__azure-storage__7.0.0_2.12_shaded_20180920_b33d810_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.microsoft.azure__azure-storage__8.6.4_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.microsoft.rest__client-runtime__1.1.0_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.squareup.okhttp3__logging-interceptor__3.3.1_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.squareup.okhttp3__okhttp-urlconnection__3.3.1_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.squareup.okhttp3__okhttp__3.3.1_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.squareup.okio__okio__1.8.0_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.squareup.retrofit2__adapter-rxjava__2.1.0_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.squareup.retrofit2__converter-jackson__2.1.0_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.squareup.retrofit2__retrofit__2.1.0_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--com.sun.xml.bind__jaxb-impl__2.2.3-1_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--commons-codec__commons-codec__1.9_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--commons-codec__commons-codec__1.9_2.12_shaded_20180920_b33d810_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--commons-logging__commons-logging__1.2_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--commons-logging__commons-logging__1.2_2.12_shaded_20180920_b33d810_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--hadoop-azure-2.7.3-abfs-external-20180625_3682417-spark_3.2_2.12_deploy_shaded.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--hadoop-azure-2.7.3-abfs-external-20180920_b33d810-spark_3.2_2.12_deploy_shaded.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--io.netty__netty-all__4.0.52.Final_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--io.reactivex__rxjava__1.2.4_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--javax.activation__activation__1.1_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--javax.inject__javax.inject__1_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--javax.xml.bind__jaxb-api__2.2.2_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--javax.xml.stream__stax-api__1.0-2_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--joda-time__joda-time__2.4_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--org.apache.htrace__htrace-core__3.1.0-incubating_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--org.apache.httpcomponents__httpclient__4.5.2_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--org.apache.httpcomponents__httpclient__4.5.2_2.12_shaded_20180920_b33d810_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--org.apache.httpcomponents__httpcore__4.4.4_2.12_shaded_20180625_3682417_spark_3.2.jar:/databricks/jars/third_party--hadoop-azure-2.7.3-abfs--org.apache.httpcomponents__httpcore__4.4.4_2.12_shaded_20180920_b33d810_spark_3.2.jar:/databricks/jars/third_party--hadoop_azure_abfs--hadoop-tools--hadoop-azure--lib-spark_3.2_2.12_deploy.jar_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--animal-sniffer-annotations_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--annotations_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--auto-value-annotations_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--checker-compat-qual_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--checker-qual_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--commons-codec_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--commons-logging_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--conscrypt-openjdk-uber_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--error_prone_annotations_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--failureaccess_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--flogger-log4j-backend_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--flogger-system-backend-shaded_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--flogger_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--gcs-shaded-spark_3.2_2.12_deploy.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--gcsio_proto_library-speed-src_shaded_spark_3.2_2.12_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-api-client-jackson2_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-api-client-java6_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-api-client_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-api-services-iamcredentials_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-api-services-storage_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-auth-library-credentials_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-auth-library-oauth2-http_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-extensions_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-http-client-apache-v2_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-http-client-gson_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-http-client-jackson2_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-http-client_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-oauth-client-java6_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--google-oauth-client_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--grpc-alts_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--grpc-api_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--grpc-auth_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--grpc-context_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--grpc-core_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--grpc-grpclb_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--grpc-netty-shaded_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--grpc-protobuf-lite_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--grpc-protobuf_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--grpc-stub_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--gson_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--guava_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--httpclient_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--httpcore_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--j2objc-annotations_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--jackson-core_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--jsr305_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--libgcs-connector-spark_3.2_2.12_spark_3.2_2.12_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--libgcsio-spark_3.2_2.12_spark_3.2_2.12_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--libgcsio_iam_proto_library-speed_shaded_spark_3.2_2.12_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--libgcsio_proto_library-speed_shaded_spark_3.2_2.12_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--libmeta-services.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--libutil-hadoop-spark_3.2_2.12_spark_3.2_2.12_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--libutil-spark_3.2_2.12_spark_3.2_2.12_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--listenablefuture_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--opencensus-api_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--opencensus-contrib-http-util_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--perfmark-api_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--proto-google-common-protos_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--proto-google-iam-v1_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--protobuf-java-util_shaded.jar:/databricks/jars/third_party--hadoop_gcs--hadoop-connectors--protobuf-java_shaded.jar:/databricks/jars/third_party--jackson--guava_only_shaded.jar:/databricks/jars/third_party--jackson--jackson-module-scala-shaded_2.12_deploy.jar:/databricks/jars/third_party--jackson--jsr305_only_shaded.jar:/databricks/jars/third_party--jackson--paranamer_only_shaded.jar:/databricks/jars/third_party--jetty-client--jetty-client_shaded.jar:/databricks/jars/third_party--jetty-client--jetty-http_shaded.jar:/databricks/jars/third_party--jetty-client--jetty-io_shaded.jar:/databricks/jars/third_party--jetty-client--jetty-util_shaded.jar:/databricks/jars/third_party--jsonwebtoken--jackson-annotations_shaded.jar:/databricks/jars/third_party--jsonwebtoken--jackson-core_shaded.jar:/databricks/jars/third_party--jsonwebtoken--jackson-databind_shaded.jar:/databricks/jars/third_party--jsonwebtoken--jjwt-api_shaded.jar:/databricks/jars/third_party--jsonwebtoken--jjwt-impl_shaded.jar:/databricks/jars/third_party--jsonwebtoken--jjwt-jackson_shaded.jar:/databricks/jars/third_party--opencensus-shaded--com.google.code.findbugs__jsr305__3.0.2_shaded.jar:/databricks/jars/third_party--opencensus-shaded--com.google.code.gson__gson__2.8.2_shaded.jar:/databricks/jars/third_party--opencensus-shaded--com.google.errorprone__error_prone_annotations__2.1.3_shaded.jar:/databricks/jars/third_party--opencensus-shaded--com.google.guava__guava__26.0-android_shaded.jar:/databricks/jars/third_party--opencensus-shaded--com.google.j2objc__j2objc-annotations__1.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--com.lmax__disruptor__3.4.2_shaded.jar:/databricks/jars/third_party--opencensus-shaded--com.squareup.okhttp3__okhttp__3.9.0_shaded.jar:/databricks/jars/third_party--opencensus-shaded--com.squareup.okio__okio__1.13.0_shaded.jar:/databricks/jars/third_party--opencensus-shaded--commons-codec__commons-codec__1.9_shaded.jar:/databricks/jars/third_party--opencensus-shaded--commons-logging__commons-logging__1.2_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.grpc__grpc-context__1.19.0_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.jaegertracing__jaeger-client__0.33.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.jaegertracing__jaeger-core__0.33.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.jaegertracing__jaeger-thrift__0.33.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.jaegertracing__jaeger-tracerresolver__0.33.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.opencensus__opencensus-api__0.22.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.opencensus__opencensus-exporter-trace-jaeger__0.22.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.opencensus__opencensus-exporter-trace-util__0.22.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.opencensus__opencensus-impl-core__0.22.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.opencensus__opencensus-impl__0.22.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.opentracing.contrib__opentracing-tracerresolver__0.1.5_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.opentracing__opentracing-api__0.31.0_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.opentracing__opentracing-noop__0.31.0_shaded.jar:/databricks/jars/third_party--opencensus-shaded--io.opentracing__opentracing-util__0.31.0_shaded.jar:/databricks/jars/third_party--opencensus-shaded--org.apache.httpcomponents__httpclient__4.4.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--org.apache.httpcomponents__httpcore__4.4.1_shaded.jar:/databricks/jars/third_party--opencensus-shaded--org.apache.thrift__libthrift__0.11.0_shaded.jar:/databricks/jars/third_party--opencensus-shaded--org.checkerframework__checker-compat-qual__2.5.2_shaded.jar:/databricks/jars/third_party--opencensus-shaded--org.codehaus.mojo__animal-sniffer-annotations__1.14_shaded.jar:/databricks/jars/third_party--zeromq--jeromq_shaded.jar:/databricks/jars/third_party--zeromq--jnacl_shaded.jar:/databricks/jars/utils--process_utils-spark_3.2_2.12_deploy.jar:/databricks/jars/workflow--workflow-spark_3.2_2.12_deploy.jar
spark.executor.extraJavaOptions,-Djava.io.tmpdir=/local_disk0/tmp -XX:ReservedCodeCacheSize=512m -XX:+UseCodeCacheFlushing -Djava.security.properties=/databricks/spark/dbconf/java/extra.security -XX:-UseContainerSupport -XX:+PrintFlagsFinal -XX:+PrintGCDateStamps -XX:+PrintGCDetails -verbose:gc -Xss4m -Djava.library.path=/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib:/usr/lib/x86_64-linux-gnu/jni:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/usr/lib/jni -Djavax.xml.datatype.DatatypeFactory=com.sun.org.apache.xerces.internal.jaxp.datatype.DatatypeFactoryImpl -Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -Djavax.xml.validation.SchemaFactory:http://www.w3.org/2001/XMLSchema=com.sun.org.apache.xerces.internal.jaxp.validation.XMLSchemaFactory -Dorg.xml.sax.driver=com.sun.org.apache.xerces.internal.parsers.SAXParser -Dorg.w3c.dom.DOMImplementationSourceList=com.sun.org.apache.xerces.internal.dom.DOMXSImplementationSourceImpl -Djavax.net.ssl.sessionCacheSize=10000 -Dscala.reflect.runtime.disable.typetag.cache=true -Dcom.google.cloud.spark.bigquery.repackaged.io.netty.tryReflectionSetAccessible=true -Dlog4j2.formatMsgNoLookups=true -Ddatabricks.serviceName=spark-executor-1


#### Spark Configuration

In [0]:
# partitions
spark.conf.set("spark.sql.shuffle.partitions", sc.defaultParallelism) # reduce parallelism for aggregations and joins, set it to amount of cores. Faster processing
# spark.conf.set("spark.sql.shuffle.partitions", 2)

In [0]:
# spark.conf.get('spark.sql.files.maxPartitionBytes')
spark.conf.get('spark.sql.shuffle.partitions')


Out[3]: '8'

### Libraries

In [0]:
%sh 

# installing libraries
pip install py7zr unidecode --quiet --disable-pip-version-check


In [0]:
# libraries

import pyspark.sql.functions as F
from pyspark.sql.types import *

from IPython.display import Image 
from IPython.core.display import HTML 

import numpy as np
import pandas as pd

from string import ascii_letters, punctuation
from unidecode import unidecode
from urllib.request import urlopen
import urllib 
import json
import py7zr
import re
import os

### Data importing

- Downloading of main JSON zipped data (~2.4 Gb)  
- Decompression of JSON data (~17 Gb)  
- Fetching of manually curated data from [EGI Confluence](https://confluence.egi.eu/display/EGIG/Scientific+Disciplines) on normalized disciplines classification

In [0]:
%sh

ls -lh /mnt/

total 8.0K
drwxr-xr-x 2 root root 4.0K May 27 07:02 driver-daemon
drwxr-xr-x 2 root root 4.0K May 27 07:01 readonly


In [0]:
# local variables

local_path = '/mnt/'

In [0]:
# Retrieve the zip file
# https://docs.databricks.com/data/data-sources/zip-files.html
file_url = 'https://originalstatic.aminer.cn/misc/dblp.v13.7z'
file = local_path + 'dblp.v13.7z'

print("Downloading file from source")
urllib.request.urlretrieve(file_url, file)

# Unzip compressed file  
print("Unzipping 7z file")
archive = py7zr.SevenZipFile(file, mode='r')
archive.extractall(path=local_path)
archive.close()

Downloading file from source
Unzipping 7z file


In [0]:
%sh 

echo 'Deleting zipped JSON file'
rm /mnt/dblp.v13.7z


Deleting zipped JSON file


In [0]:
%sh

echo 'Fetching 'field of study' (fos) data'
wget --quiet -P /mnt https://raw.githubusercontent.com/JRodrigoF/bdm_g6/main/p1/data/dictionary_fos.txt

ls -lsh /mnt/

Fetching field of study (fos) data
total 32K
 24K -rw-r--r-- 1 root root  21K Jun 10 07:05 dictionary_fos.txt
4.0K drwxr-xr-x 2 root root 4.0K Jun 10 07:02 driver-daemon
4.0K drwxr-xr-x 2 root root 4.0K Jun 10 07:02 readonly


In [0]:
%sh

head -50 /mnt/dictionary_fos.txt

[
{

		"mathematics" : "natural sciences",
		"computer sciences" : "natural sciences",
		"information sciences" : "natural sciences",
		"earth sciences" : "natural sciences",
		"biology science" : "natural sciences",
		"physical sciences" : "natural sciences",
		"chemical sciences" : "natural sciences",

		"civil engineering" : "engineering and technology",
		"electrical, electronic and information engineering" : "engineering and technology",
		"mechanical engineering" : "engineering and technology",
		"aerospace engineering" : "engineering and technology",
		"chemical engineering" : "engineering and technology",
		"materials engineering" : "engineering and technology",
		"bioengineering and biomedical engineering" : "engineering and technology",
		"environmental engineering" : "engineering and technology",
		"environmental biotechnology" : "engineering and technology",
		"industrial biotechnology" : "engineering and technology",
		"nano-technology" : "engineering and technology",
		"n

### Data pre-processing

- Parsing of JSON file
  - Making JSON data format-conforming
    - Fixing of 'string' fields into integer
  - Splitting file into chunks/parts with 250K entries each
  - Output gets stored in the local file system (/mnt/)

In [0]:
input_file = local_path + 'dblpv13.json'
output_file = local_path + 'dblpv13_edited_part_1.json'

out = open(output_file, 'w')
n_elements = 250000
counter = 0
file = 1
entries = 0

print('Processing JSON data')
print('Writing compliant JSON file part {}'.format(file))
with open(input_file, 'r') as f:
    for line in f:
        if 'NumberInt' in line:
            line = line.replace("NumberInt(", "").replace(")", "")
            out.write(line)
        else:
            if re.match('^},', line):
                counter += 1
                entries += 1
                if counter == n_elements:
                    out.write('}]')
                    out.close()
                    file += 1
                    print('Writing compliant JSON file part {}'.format(file))
                    output_file = local_path + 'dblpv13_edited_part_{}.json'.format(file)
                    out = open(output_file, 'w')
                    out.write('[')
                    counter = 0
                else:
                    out.write(line)
            else:
                out.write(line)
    out.close()

print('{} entries processed'.format(entries))
            

Processing JSON data
Writing compliant JSON file part 1
Writing compliant JSON file part 2
Writing compliant JSON file part 3
Writing compliant JSON file part 4
Writing compliant JSON file part 5
Writing compliant JSON file part 6
Writing compliant JSON file part 7
Writing compliant JSON file part 8
Writing compliant JSON file part 9
Writing compliant JSON file part 10
Writing compliant JSON file part 11
Writing compliant JSON file part 12
Writing compliant JSON file part 13
Writing compliant JSON file part 14
Writing compliant JSON file part 15
Writing compliant JSON file part 16
Writing compliant JSON file part 17
Writing compliant JSON file part 18
Writing compliant JSON file part 19
Writing compliant JSON file part 20
Writing compliant JSON file part 21
Writing compliant JSON file part 22
5354308 entries processed


In [0]:
%sh

echo 'Deleting JSON single file'
rm /mnt/dblpv13.json

ls -lsh /mnt/

Deleting JSON single file
total 16G
655M -rw-r--r-- 1 root root 655M Jun  8 08:29 dblpv13_edited_part_1.json
751M -rw-r--r-- 1 root root 751M Jun  8 08:34 dblpv13_edited_part_10.json
759M -rw-r--r-- 1 root root 759M Jun  8 08:35 dblpv13_edited_part_11.json
813M -rw-r--r-- 1 root root 813M Jun  8 08:35 dblpv13_edited_part_12.json
679M -rw-r--r-- 1 root root 679M Jun  8 08:36 dblpv13_edited_part_13.json
717M -rw-r--r-- 1 root root 717M Jun  8 08:36 dblpv13_edited_part_14.json
706M -rw-r--r-- 1 root root 706M Jun  8 08:36 dblpv13_edited_part_15.json
798M -rw-r--r-- 1 root root 798M Jun  8 08:37 dblpv13_edited_part_16.json
818M -rw-r--r-- 1 root root 818M Jun  8 08:37 dblpv13_edited_part_17.json
837M -rw-r--r-- 1 root root 837M Jun  8 08:38 dblpv13_edited_part_18.json
818M -rw-r--r-- 1 root root 818M Jun  8 08:38 dblpv13_edited_part_19.json
734M -rw-r--r-- 1 root root 734M Jun  8 08:30 dblpv13_edited_part_2.json
804M -rw-r--r-- 1 root root 804M Jun  8 08:39 dblpv13_edited_part_20.json
747M

### Data Extraction & Transformation

- Reading of 'field of study' (fos) data into two dictionaries
  - general fos
  - specific fos
- ETL functions    
- Schema specification
  - To be used for storing/reading data as Delta tables
- ETL processing  
  (Performed iteratively on each JSON part)  

  - Entries are discarded if:
    - title is empty or consist of only one word
    - "foreword", "editor" or "book" appears in the title 
    - if the title is contained in a pre-defined "blacklist" ("[review article]", "In this issue", "about the cover", ..)  
    - if the number of citations in the entry is 0 or not present 
  - Authors / organizations
    - entries with no authors listed are removed
    - authors present in a pre-defined "blacklist" ("anonymous", "NA", "dr.", ..) of non-valid authors are removed from the entry
    - author names are cleaned for any extra punctuation symbols both at the begging and at the end of the string
    - author organizations are cleaned for any extra punctuation symbols both at the begging and at the end of the string
  - DOI
    - entries are searched for a valid DOI pattern, if not matched, "NA" gets assigned
    - string 'https://doi.org/' is removed from the DOI string
  - ISSN
    - if ISSN is not present in the entry, an attempt is made to obtain it from the DOI, which is cleaned first
    - entries are matched to the pattern they should have and in case of mismatch "NA" is assigned
  - ISBN
    - entries are matched to the pattern and length they should have, which is not unique but more than one posibility exist. In case of mismatch "NA" is assigned
  - Abstract
    - entries are cleaned for any extra punctuation symbols or non-informative strings both at the begging and at the end of the string
  - Venue
    - Only fields "name", "type", "raw" are retained
    - Fields are cleaned for extra punctuation symbols both at the begging and at the end of the string
    - Using a combination of 1) text search based on venue fields "name" and "raw", 2) text searches in the cleaned field "abstract" from the main entry, 3) presence of valid ISBN and heuristics (e.g. venues where an acronym is given), the following document categories are determined
      - journal
      - book
      - conference
      - workshop
      - seminar
      - colloquium
      - symposium
      - unknown
  - Year
    - field is retained. Non-integer entries are turned to "NA"
  - Volume
    - values are cleaned for any ascii characters before isistance(int) is applied
  - Language
    - entries are kept. If not present or empty then "NA" is recorded
  - Field of Study
    - Two categories have been curated from [EGI Confluence](https://confluence.egi.eu/display/EGIG/Scientific+Disciplines) on normalized disciplines classifications
      - general
      - specific
    - curation involved spliting many composed unormalized terms into simpler versions more likely to be found as such. For example:  
      "electrical and electronic engineering" becomes
        "electrical engineering"  
        "electronic engineering"
    
    - all unormalized fos categories are match against the curated fos hash tables and if a specific category finds a hit, a second more general categroy defined is also recorded
  - Keywords
    - entries are cleaned for any extra punctuation symbols or non-informative strings both at the begging and at the end of the string

- Resulting Spark DataFrames are stored persistently as Delta tables in DBFS
  - One for each JSON part

#### Extraction & Transformation functions

In [0]:
input_file = local_path + "dictionary_fos.txt"

with open(input_file) as f:
    dict_fos = f.read()
    dict_fos = json.loads(dict_fos)

dict_fos_discipline = dict_fos[0]
dict_fos_subdiscipline = dict_fos[1]

In [0]:
# parsing functions for raw dict entries

def doi2issn(doi):
    if '/' in doi:
        issn = doi.split('/')[1].split('(')[0]
        if re.match("[0-9]{4}-[0-9]{3}[X0-9]", issn):
            return issn
        else:
            return 'NA'
    else:
        return 'NA'

def parse_issn(issn, paper):
    issn = paper['issn'].split(' ')[0]
    if re.match("[0-9]{4}-[0-9]{3}[X0-9]", issn):
        return issn
    else:
        return 'NA'

def words_in_string(word_list, a_string):
    return set(word_list).intersection(a_string.split())

# https://stackoverflow.com/questions/11150239/natural-sorting
def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] 
    return sorted(l, key=alphanum_key)

title_blacklist = {
    '[review article]':-1,
    'In the news':-1,
    '(no title)':-1,
    'In this issue …':-1,
    '[title page]':-1,
    '[without title]':-1,
    'about the cover':-1,
    'about the issue':-1,
    'about the proceedings':-1,
    'about the authors':-1,
    'about this journal':-1,
    }

keyword_blacklist = {'03-03':-1,
                     '! !"! ! !###! ! !! ! "':-1}
    
def etl_processing(dict_entry, title_blacklist=title_blacklist, keyword_blacklist=keyword_blacklist):
    
    paper = dict_entry
            
    # Title
    if 'title' not in paper or paper['title'] == "" or paper['title'] == None or paper['title'] == 'None':
        return None
    
    title = paper['title'].strip('.: ').lower()
    if len(title.split(' ')) == 1:
        return None
        
    if 'foreword' in title or 'editor' in title or 'book' in title or title in title_blacklist:
        return None

    if 'n_citation' not in paper or paper['n_citation'] == 0:
        return None
    
    clean_paper = {}
    clean_paper['id'] = paper['_id']
    clean_paper['title'] = paper['title'].strip('.: ')
    clean_paper['n_citation'] = paper['n_citation']

    # authors & organization
    if 'authors' not in paper or paper['authors'] == []:
        return None
    
    authors_blacklist = ['staff', 'ieee', 'anonymous', 'NA', 'dr.', '']
    authors = [] 
    
    for author in paper['authors']:
        
        clean_author = {
            'name': 'NA',
            'org': 'NA'
        }
        
        if 'name' in author and author['name'] != '' and not words_in_string(authors_blacklist, author['name'].lower()):
            clean_author['name'] = author['name'].strip('.:,-&/ ')
        else:
            continue
            
        if 'org' in author and author['org'] != '' and author['org'] not in 'Corresponding author':
            clean_author['org'] = author['org'].strip('(Correspd.:,-&/)#N*<i> ')
            
        authors.append(clean_author) 
    
    if authors == []:
        return None
    else:
        clean_paper['authors'] = authors      
        
    # issn
    if 'issn' not in paper and 'doi' in paper:
        clean_paper['issn'] = doi2issn(paper['doi'])
    elif 'issn' not in paper and 'doi' not in paper:
        clean_paper['issn'] = 'NA'
    elif 'issn' in paper and 'doi' in paper:
        issn = parse_issn(paper['issn'], paper)
        if issn == 'NA':
            clean_paper['issn'] = doi2issn(paper['doi'])
        else:
            clean_paper['issn'] = issn
    else:
        clean_paper['issn'] = parse_issn(paper['issn'], paper)

    # doi
    if 'doi' not in paper:
        clean_paper['doi'] = 'NA'
    else:
        if '/' in paper['doi']:
            doi = paper['doi'].strip('https://doi.org/')
            if re.match("10.[0-9]{4}", doi):
                clean_paper['doi'] = doi
            else:
                clean_paper['doi'] = 'NA'
        else:
            clean_paper['doi'] = 'NA'
            
    # isbn
    if 'isbn' not in paper or paper['isbn'] == '' or paper['isbn'] == 'isbn':
        clean_paper['isbn'] = 'NA'
    else:
        isbn = paper['isbn']
#         if re.match("[0-9]{3}-[0-9]{1}-[0-9]{4}-[0-9]{4}-[0-9X]{1}", isbn):
#             clean_paper['isbn'] = isbn
        str_ = re.sub('-', '', isbn)
        if len(str_) == 10 or len(str_) == 13 or len(str_) == 9:
            clean_paper['isbn'] = isbn
        elif re.match("[0-9]{9}[0-9X]{1}", isbn):
            clean_paper['isbn'] = '{}-{}-{}-{}'.format(isbn[0],isbn[1:6],isbn[6:9],isbn[9])
        elif re.match("[0-9]{13}", isbn):
            clean_paper['isbn'] = '{}-{}-{}-{}-{}'.format(n[0:3],n[3],n[4:8],n[8:12],n[12])
        else:
            clean_paper['isbn'] = 'NA'

    # type of publication
    # pre-checks and captures abstract field
    abstract = 'NA'
    if 'abstract' in paper and paper['abstract'] != '' and paper['abstract'] != 'Without Abstract':
         abstract = paper['abstract'].strip(punctuation).strip()
    
    # venue
    venue_dict = {
        'name': 'NA',
        'type': 'NA',
        'raw': 'NA'
    }

    if 'venue' in paper:
        if 'raw' in paper['venue'] and paper['venue']['raw'] != None:
            venue_dict['raw'] =  paper['venue']['raw'].rstrip('.:()01234567890 ')
        if 'name_d' in paper['venue'] and paper['venue']['name_d'] != None:
            venue_dict['name'] = paper['venue']['name_d'].rstrip('.: ')
        if 'type' in paper['venue'] and paper['venue']['type'] != None:
            venue_dict['type'] = int(str(paper['venue']['type']).rstrip('.: '))
    
    clean_paper['venue'] = venue_dict
    
    VENUE = venue_dict['raw'] + ' ' + venue_dict['name']
    venue = VENUE.lower()
    
    # Journal case
    if 'journal' in venue or 'trans' in venue or 'ann' in venue or 'review' in venue or 'j.' in venue:
        doc_type = 'journal'
    elif 'this paper' in abstract.lower():
        doc_type = 'journal'
    # Book case
    elif clean_paper['isbn'] != 'NA':
        doc_type = 'book'
    # Conference case
    elif venue_dict['raw'] != 'NA' and venue_dict['raw'].isupper(): 
        doc_type = 'conference'
    elif 'conference' in venue or 'IEEE' in VENUE or 'congress' in venue:
        doc_type = 'conference'
    # Workshop case
    elif 'workshop' in venue:
        doc_type = 'workshop'
    # Seminar case
    elif 'seminar' in venue:
        doc_type = 'seminar'
    # Colloquium case
    elif 'colloquium' in venue:
        doc_type = 'colloquium'
    # Symposium case
    elif 'symposium' in venue:
        doc_type = 'symposium'
    else:
        doc_type = 'unknown'
    
    clean_paper['doc_type'] = doc_type
                
    # year
    clean_paper['year'] = 'NA'
    if 'year' in paper and paper['year'] != '0':
        clean_paper['year'] = paper['year']
        
    # volume        
    clean_paper['volume'] = 'NA'
    if 'volume' in paper and paper['volume'] != '':
        volume = paper['volume'].strip('.:-&/ ' + ascii_letters)
        if isinstance(volume, int):
            clean_paper['volume'] = paper['volume']

    # language
    clean_paper['lang'] = 'NA'
    if 'lang' in paper and paper['lang'] != '':
        clean_paper['lang'] = paper['lang']
    
    # field of study (fos)
    clean_paper['fos_general'] = 'NA'
    clean_paper['fos_specific'] = 'NA'
    if 'fos' in paper and paper['fos'] != []:
        for fos in paper['fos']:
            fos = fos.lower()
            if fos.lower() in dict_fos_subdiscipline:
                clean_paper['fos_specific'] = dict_fos_subdiscipline[fos]
                clean_paper['fos_general'] = dict_fos_discipline[clean_paper['fos_specific']]
                break
            elif fos in dict_fos_discipline:
                clean_paper['fos_general'] = dict_fos_discipline[fos]
                break           
                    
    # keywords
    clean_paper['keywords'] = ['NA']
    clean_keywords = []
    if 'keywords' in paper and paper['keywords'] != []:
        for keyword in paper['keywords']:
            keyword = keyword.strip('.- ').lower()

            if re.match('[0-9]{2}[a-z]+[0-9]{2}$', keyword):
                continue
            elif re.match('[0-9]{2} [a-z]+ [0-9]{2}$', keyword):
                continue
            elif re.match('[a-z]{1}[0-9]{2}$', keyword):
                continue
            elif re.match('[0-9]{2}\.[0-9]{2}\.[a-z]{2}$', keyword):
                continue
            elif re.match('[0-9]+\.[0-9]{2}\..*$', keyword):
                continue
            elif keyword in keyword_blacklist:
                continue
            elif isinstance(keyword, int):
                continue
            elif re.match('[0-9]+$', keyword):
                continue
            elif keyword == '':
                continue
            else:
                clean_keywords.append(keyword.lower())
                
        if clean_keywords != []:
            clean_paper['keywords'] = clean_keywords
        
    # abstract
    clean_paper['abstract'] = abstract
        
    return clean_paper

#### dblpv13 schema

In [0]:
Image(url= "https://raw.githubusercontent.com/JRodrigoF/bdm_g6/main/p1/data/dw_schema.png")

In [0]:
dblpv13_schema = (StructType()
                      .add("id", StringType(), False)
                      .add("title", StringType(), False)
                      .add("authors", ArrayType(
                                      StructType()
                                          .add("name", StringType(), True)
                                          .add("org", StringType(), True)
                                      ,False
                                      )
                          )
                      .add("doc_type", StringType(), True)
                      .add("n_citation", IntegerType(), True)
                      .add("issn", StringType(), True)
                      .add("isbn", StringType(), True)
                      .add("doi", StringType(), True)
                      .add("volume", StringType(), True)
                      .add("year", IntegerType(), True)
                      .add("venue", StructType()
                                      .add("name", StringType(), True)
                                      .add("type", StringType(), True)
                                      .add("raw", StringType(), True)
                          )
                      .add("lang", StringType(), True)
                      .add("fos_general", StringType(), True)
                      .add("fos_specific", StringType(), True)
                      .add("keywords", ArrayType(
                                           StringType(), True
                                       ), True
                          )
                      .add("abstract", StringType(), True)
                 )


#### JSON to Delta tables

In [0]:
# ETL processing on each JSON part and storage of the resulting new JSOn data in DBFS

json_files = natural_sort([f for f in os.listdir(local_path) if f.startswith('dblpv13_edited_part_')])

for file in json_files:
    
    input_file = local_path + file
    output_file = input_file.replace('edited_part_', 'edited_parsed_part_')
    table_name = output_file.replace('.json', '').replace(local_path, '')
    
    print("Loading JSON file {}".format(input_file))
    f = open(input_file)
    data = json.load(f)
    f.close()
    parsed_dict = []

    print("---- Parsing file {}".format(input_file))
    for dict_entry in data:
        clean_entry = etl_processing(dict_entry)
        if clean_entry != None:
            parsed_dict.append(clean_entry)
    
    print("---- Writing file {}".format(output_file))
    with open(output_file, 'w', encoding='utf-8') as out:
           json.dump(parsed_dict, out)
        
    print("---- Writing delta table {}".format(table_name))
    dblpv13_df = spark.read.schema(dblpv13_schema).json("file:{}".format(output_file))
    dblpv13_df.write.format("delta").saveAsTable(table_name, mode='overwrite')
    
    print("---- Deleting (!) JSON files")
    os.remove(input_file)
    os.remove(output_file)


Loading JSON file /mnt/dblpv13_edited_part_1.json
---- Parsing file /mnt/dblpv13_edited_part_1.json
---- Writing file /mnt/dblpv13_edited_parsed_part_1.json
---- Writing delta table dblpv13_edited_parsed_part_1
---- Deleting (!) JSON files
Loading JSON file /mnt/dblpv13_edited_part_2.json
---- Parsing file /mnt/dblpv13_edited_part_2.json
---- Writing file /mnt/dblpv13_edited_parsed_part_2.json
---- Writing delta table dblpv13_edited_parsed_part_2
---- Deleting (!) JSON files
Loading JSON file /mnt/dblpv13_edited_part_3.json
---- Parsing file /mnt/dblpv13_edited_part_3.json
---- Writing file /mnt/dblpv13_edited_parsed_part_3.json
---- Writing delta table dblpv13_edited_parsed_part_3
---- Deleting (!) JSON files
Loading JSON file /mnt/dblpv13_edited_part_4.json
---- Parsing file /mnt/dblpv13_edited_part_4.json
---- Writing file /mnt/dblpv13_edited_parsed_part_4.json
---- Writing delta table dblpv13_edited_parsed_part_4
---- Deleting (!) JSON files
Loading JSON file /mnt/dblpv13_edited_pa

In [0]:
%fs

ls /user/hive/warehouse/dblpv13_edited_parsed_part_1/

path,name,size,modificationTime
dbfs:/user/hive/warehouse/dblpv13_edited_parsed_part_1/_SUCCESS,_SUCCESS,0,1653637259000
dbfs:/user/hive/warehouse/dblpv13_edited_parsed_part_1/_committed_8058540948611346725,_committed_8058540948611346725,122,1653637258000
dbfs:/user/hive/warehouse/dblpv13_edited_parsed_part_1/_started_8058540948611346725,_started_8058540948611346725,0,1653637235000
dbfs:/user/hive/warehouse/dblpv13_edited_parsed_part_1/part-00000-tid-8058540948611346725-1a6da28f-d011-4bb9-a266-30ae3d369f2a-0-1-c000.snappy.parquet,part-00000-tid-8058540948611346725-1a6da28f-d011-4bb9-a266-30ae3d369f2a-0-1-c000.snappy.parquet,116165071,1653637255000


In [0]:
display(spark.sql("SELECT * FROM {}.`{}`".format(table_format, dblpv13_delta_parts[0])))

id,title,authors,doc_type,n_citation,issn,isbn,doi,volume,year,venue,lang,fos_general,fos_specific,keywords,abstract
53e99784b7602d9701f3e151,A solution to the problem of touching and broken characters,"List(List(Jairo Rocha, NA), List(Theo Pavlidis, NA))",conference,17.0,,,10.1109/ICDAR.1993.395663,,1993.0,"List(International Conference on Document Analysis and Recognition, 0, ICDAR-)",en,natural sciences,computer sciences,"List(handwriting recognition, prototypes, image segmentation, computer science, expert systems, knowledge base, pattern recognition, usability, optical character recognition, shape, feature extraction)",
53e99784b7602d9701f3e15d,Timing yield estimation using statistical static timing analysis,"List(List(Min Pan, NA), List(Chris C. N. Chu, NA), List(Hai Zhou, NA))",book,28.0,,0-7803-8834-8,10.1109/ISCAS.2005.1465124,,2005.0,"List(International Symposium on Circuits and Systems, 0, ISCAS)",en,,,"List(sequential circuits, statistical distributions, set-up time constraints, register-to-register paths, statistical static timing analysis, integrated circuit modelling, parameter estimation, statistical analysis, circuit model, path delays, deep sub-micron technology, timing, delay distributions, delays, circuit timing, shortest path variations, hold time constraints, integrated circuit yield, process variations, integrated circuit layout, high-performance circuit designs, clock skew, timing yield estimation, deterministic static timing analysis, monte carlo simulation, design method, static timing analysis, design methodology, process variation, shortest path, registers, circuit design, circuit analysis)","As process variations become a significant problem in deep sub-micron technology, a shift from deterministic static timing analysis to statistical static timing analysis for high-performance circuit designs could reduce the excessive conservatism that is built into current timing design methods. We address the timing yield problem for sequential circuits and propose a statistical approach to handle it. We consider the spatial and path reconvergence correlations between path delays, set-up time and hold time constraints, and clock skew due to process variations. We propose a method to get the timing yield based on the delay distributions of register-to-register paths in the circuit On average, the timing yield results obtained by our approach have average errors of less than 1.0% in comparison with Monte Carlo simulation. Experimental results show that shortest path variations and clock skew due to process variations have considerable impact on circuit timing, which could bias the timing yield results. In addition, the correlation between longest and shortest path delays is not significant"
53e99784b7602d9701f3e922,International Conference on Nano/Micro Engineered and Molecular Systems,"List(List(Jungil Park, NA), List(Sunyoung Ahn, NA), List(Youngmi Kim Pak, NA), List(James Jungho Pak, NA))",conference,1.0,,,10.1109/NEMS.2009.5068754,,2009.0,"List(NA, NA, NEMS)",en,,,List(NA),
53e99784b7602d9701f3f411,Using XML to Integrate Existing Software Systems into the Web,"List(List(Harry M. Sneed, NA))",book,28.0,,0-7695-1727-7,10.1109/CMPSAC.2002.1044548,,2002.0,"List(Computer Software and Applications Conference, 0, COMPSAC)",en,natural sciences,computer sciences,"List(internet, hypermedia markup languages, information resources, systems re-engineering, cobol, pl/i, world wide web, xml, batch programs, data conversion, e-commerce, extensible markup language, enterprise application integration, interface reengineering, legacy programs, online programs, software reengineering, subprograms, systems integration)","The eXtensible Markup Language 驴 XML 驴 is not only a language for communication between humans and the web, it is also a language for communication between programs. Rather than passing parameters, programs can pass documents from one to another, containing not only pure data, but control information as well. Even legacy programs written in ancient languages such as COBOL and PL/I can be adapted by means ofinterface reengineering to process and to generate XML documents"
53e99784b7602d9701f3f5fe,Research on resource allocation for multi-tier web applications in a virtualization environment,"List(List(Shuguo Yang, School of Mathematics and Physics, Qingdao University of Science and Technology, Qingdao, China 266061))",journal,2.0,1673-7350,,10.1007/s11704-011-0127-6,,2011.0,"List(NA, 0, Frontiers of Computer Science in China)",en,natural sciences,computer sciences,"List(resource allocation, cpu utilization, quality of service)","Resource allocation for multi-tier web applications in virtualization environments is one of the most important problems in autonomous computing. On one hand, the more resources that are provisioned to a multitier web application, the easier it is to meet service level objectives (SLO). On the other hand, the virtual machine which hosts the multi-tier web application needs to be consolidated as much as possible in order to maintain high resource utilization. This paper presents an adaptive resource controller which consists of a feedback utilization controller and an auto-regressive and moving average model (ARMA)-based model estimator. It can meet application-level quality of service (QoS) goals while achieving high resource utilization. To evaluate the proposed controllers, simulations are performed on a testbed simulating a virtual data center using Xen virtual machines. Experimental results indicate that the controllers can improve CPU utilization and make the best tradeoff between resource utilization and performance for multi-tier web applications"
53e99792b7602d9701f54b53,""" SCIATICA ""","List(List(A.S.Blundell Bankart, NA))",unknown,89.0,,,10.1016/S0140-6736(00)72002-2,,1941.0,"List(NA, 10, SIGGRAPH Computer Animation Festival)",en,,,List(NA),
53e99792b7602d9701f5af1a,The design of awareness and operation module for the symbiotic applications,"List(List(Shigeru Fujita, NA), List(Kenji Sugawara, NA), List(Claude Moulin, NA), List(Jean-Paul A. Barthès, NA))",conference,4.0,,,10.1109/COGINF.2010.5599834,,2010.0,"List(IEEE International Conference on Cognitive Informatics, 0, IEEE ICCI)",en,natural sciences,computer sciences,"List(cognition, multi-agent systems, ubiquitous computing, adips-dash, omas, syma, actuators, awareness and operation module, cognition functions, decision functions, intelligent multiagent system, multiparadigm-multiagent framework, perceptual interaction, social interaction, symbiotic base mechanism, symbiotic multiagent system, awareness, cognition layer model, multi-agent system, social-ware, symbiotic computing)",
53e99792b7602d9701f5af27,Short-Term Traffic Flow Forecasting Based on MARS,"List(List(Shengqi Ye, Tsinghua Univ, Natl Lab Informat Sci & Technol, Dept Automat, Beijing 100084, Peoples R China), List(Yingjia He, Tsinghua Univ, Natl Lab Informat Sci & Technol, Dept Automat, Beijing 100084, Peoples R China), List(Jianming Hu, Tsinghua Univ, Natl Lab Informat Sci & Technol, Dept Automat, Beijing 100084, Peoples R China), List(Zuo Zhang, Tsinghua Univ, Natl Lab Informat Sci & Technol, Dept Automat, Beijing 100084, Peoples R China))",journal,11.0,,,10.1109/FSKD.2008.678,,2008.0,"List(NA, 0, FSKD)",en,natural sciences,computer sciences,"List(considerable accuracy, promising traffic flow forecasting, proposed mars method, neural networks, short-term traffic flow forecasting, historical traffic flow data, traffic flow forecasting, multivariate adaptive regression splines, mars model, analytical function, mars method, analytic function, forecasting, predictive models, spline, mars, traffic flow, neural network, detectors, data models, regression analysis)","A promising traffic flow forecasting model based on Multivariate Adaptive Regression Splines (MARS) is developed in this paper. First, the historical traffic flow data is obtained from the loop detectors installed on the road network of Beijing. Then, part of the data is selected for training the MARS model while the rest is used to test the method. The results based on MARS method are compared with those of other methods such as the Neural Networks. The proposed MARS method is proved to have a considerable accuracy. Moreover, the model constructed with MARS can be described with analytical functions, which helps a lot in the further research on traffic flow forecasting"
53e99792b7602d9701f5af35,An approach to feature location in distributed systems,"List(List(Dennis Edwards, Department of Computer Science, University of West Florida, 11000 University Parkway, Pensacola, FL 32514, USA), List(Sharon Simmons, Department of Computer Science, University of West Florida, 11000 University Parkway, Pensacola, FL 32514, USA), List(Norman Wilde, nding author. Tel.: +1 850 474 2542; fax: +1 850 857 6056))",journal,62.0,0164-1212,,10.1016/j.jss.2004.12.018,,2006.0,"List(NA, 0, Journal of Systems and Software)",en,natural sciences,information sciences,"List(feature location, distributed systems, software reconnaissance)","This paper describes an approach to the feature location problem for distributed systems, that is, to the problem of locating which code components are important in providing a particular feature for an end user. A feature is located by observing system execution and noting time intervals in which it is active. Traces of execution in intervals with and without the feature are compared. Earlier experience has shown that this analysis is difficult because distributed systems often exhibit stochastic behavior and because time intervals are hard to identify with precision. To get around these difficulties, the paper proposes a definition of time interval based on the causality analysis introduced by Lamport and others. A strict causal interval may be defined, but it must often be extended to capture latent events and to represent the inherent imprecision in time measurement. This extension is modeled using a weighting function which may be customized to the specific circumstances of each study. The end result of the analysis is a component relevance index, denoted p""c, which can be used to measure the relevance of a software component to a particular feature. Software engineers may focus their analysis efforts on the top components as ranked according to p""c. Two case studies are presented. The first study demonstrates the feasibility of p""c by applying our method to a well-defined distributed system. The second study demonstrates the versatility of p""c by applying our method to message logs obtained from a large military system. Both studies indicate that the suggested approach could be an effective guide for a software engineer who is maintaining or enhancing a distributed system"
53e99792b7602d9701f5b06f,Nested Graph-Structured Representations for Cases,"List(List(Luís Macedo, NA), List(Amílcar Cardoso, NA))",journal,20.0,0302-9743,3-540-64990-5,10.1007/BFb0056317,,1998.0,"List(NA, 0, EWCBR)",en,humanities,arts,"List(nested graph-structured representations, adjacency matrix)","This paper describes an approach to representing cases as nested graph-structures, i.e., as hierarchically, spatially, temporally and causally interconnected nodes (case nodes), which may be themselves recursively described by other sets of interconnected nodes. Each case node represents a case piece (sub-case). An adjacency matrix may represent these nested graph-structured cases. Within our approach, new cases are constructed using an iterative context-guided retrieval of case nodes from multiple cases. In order to illustrate the expressiveness of this case representation approach, we discuss its application to the diagnosis and therapeutics of neurological diseases, to architectural design and to storytelling. Some issues that come out of this approach, like its contribution to the representation of cases of CBR and to integrate ordinary and creative reasoning, are discussed."


### Data Warehouse

- Reading and joining of all persisted Delta tables (120 Mb each) into a single one
- Schema definition for dimension and fact tables
  - Intended for faster reading of data from persisted Delta tables
  - \#todo add schema option to spark.sql.read
- Building and storage (Delta tables) of dimension tables
- Building and storage (Delta tables) of fact table

In [0]:
%sh

ls -lh /mnt/

total 8.0K
drwxr-xr-x 2 root root 4.0K Jun  6 19:21 driver-daemon
drwxr-xr-x 2 root root 4.0K Jun  6 19:21 readonly


#### Joining Delta Tables

In [0]:
%sh

ls /dbfs

In [0]:
# fetches all delta table file names/paths present in DBFS

d = dbutils.fs.ls("/user/hive/warehouse/")
dblpv13_delta_parts = []

for l in d:
    file = str(l).split()[0]
    if 'dblpv13_edited_parsed_part' in file:
        dblpv13_delta_parts.append((file.replace("FileInfo(path='dbfs:", "").replace("/',", "")))

dblpv13_delta_parts = (natural_sort(dblpv13_delta_parts))
print("There are {} Delta tables present at /user/hive/warehouse/\n".format(len(dblpv13_delta_parts)))
print(dblpv13_delta_parts)


There are 22 Delta tables present at /user/hive/warehouse/

['/user/hive/warehouse/dblpv13_edited_parsed_part_1', '/user/hive/warehouse/dblpv13_edited_parsed_part_2', '/user/hive/warehouse/dblpv13_edited_parsed_part_3', '/user/hive/warehouse/dblpv13_edited_parsed_part_4', '/user/hive/warehouse/dblpv13_edited_parsed_part_5', '/user/hive/warehouse/dblpv13_edited_parsed_part_6', '/user/hive/warehouse/dblpv13_edited_parsed_part_7', '/user/hive/warehouse/dblpv13_edited_parsed_part_8', '/user/hive/warehouse/dblpv13_edited_parsed_part_9', '/user/hive/warehouse/dblpv13_edited_parsed_part_10', '/user/hive/warehouse/dblpv13_edited_parsed_part_11', '/user/hive/warehouse/dblpv13_edited_parsed_part_12', '/user/hive/warehouse/dblpv13_edited_parsed_part_13', '/user/hive/warehouse/dblpv13_edited_parsed_part_14', '/user/hive/warehouse/dblpv13_edited_parsed_part_15', '/user/hive/warehouse/dblpv13_edited_parsed_part_16', '/user/hive/warehouse/dblpv13_edited_parsed_part_17', '/user/hive/warehouse/dblpv13_

In [0]:
# function to concatenate delta tables in the form of Spark dataframes

def delta_concat(df_tgt, src_table_file, table_format):
    tmp_tbl = spark.sql("SELECT * FROM {}.`{}`".format(table_format, src_table_file))
    df_tgt = df_tgt.union(tmp_tbl)
    del tmp_tbl
    return df_tgt


In [0]:
# seeding table

# table_format="parquet"
table_format="delta"

df = spark.sql("SELECT * FROM {}.`{}`".format(table_format, dblpv13_delta_parts[0]))


In [0]:
# table_format="parquet"
table_format="delta"

for src_table_file in dblpv13_delta_parts[1:]:
    df = delta_concat(df, src_table_file, table_format)


In [0]:
display(df)
print("Number of total rows in the combined new Delta table:")
display(f"{df.count():,}") 
display(df.printSchema())


id,title,authors,doc_type,n_citation,issn,isbn,doi,volume,year,venue,lang,fos_general,fos_specific,keywords,abstract
53e99784b7602d9701f3e151,A solution to the problem of touching and broken characters,"List(List(Jairo Rocha, NA), List(Theo Pavlidis, NA))",conference,17.0,,,10.1109/ICDAR.1993.395663,,1993.0,"List(International Conference on Document Analysis and Recognition, 0, ICDAR-)",en,natural sciences,computer sciences,"List(handwriting recognition, prototypes, image segmentation, computer science, expert systems, knowledge base, pattern recognition, usability, optical character recognition, shape, feature extraction)",
53e99784b7602d9701f3e15d,Timing yield estimation using statistical static timing analysis,"List(List(Min Pan, NA), List(Chris C. N. Chu, NA), List(Hai Zhou, NA))",book,28.0,,0-7803-8834-8,10.1109/ISCAS.2005.1465124,,2005.0,"List(International Symposium on Circuits and Systems, 0, ISCAS)",en,,,"List(sequential circuits, statistical distributions, set-up time constraints, register-to-register paths, statistical static timing analysis, integrated circuit modelling, parameter estimation, statistical analysis, circuit model, path delays, deep sub-micron technology, timing, delay distributions, delays, circuit timing, shortest path variations, hold time constraints, integrated circuit yield, process variations, integrated circuit layout, high-performance circuit designs, clock skew, timing yield estimation, deterministic static timing analysis, monte carlo simulation, design method, static timing analysis, design methodology, process variation, shortest path, registers, circuit design, circuit analysis)","As process variations become a significant problem in deep sub-micron technology, a shift from deterministic static timing analysis to statistical static timing analysis for high-performance circuit designs could reduce the excessive conservatism that is built into current timing design methods. We address the timing yield problem for sequential circuits and propose a statistical approach to handle it. We consider the spatial and path reconvergence correlations between path delays, set-up time and hold time constraints, and clock skew due to process variations. We propose a method to get the timing yield based on the delay distributions of register-to-register paths in the circuit On average, the timing yield results obtained by our approach have average errors of less than 1.0% in comparison with Monte Carlo simulation. Experimental results show that shortest path variations and clock skew due to process variations have considerable impact on circuit timing, which could bias the timing yield results. In addition, the correlation between longest and shortest path delays is not significant"
53e99784b7602d9701f3e922,International Conference on Nano/Micro Engineered and Molecular Systems,"List(List(Jungil Park, NA), List(Sunyoung Ahn, NA), List(Youngmi Kim Pak, NA), List(James Jungho Pak, NA))",conference,1.0,,,10.1109/NEMS.2009.5068754,,2009.0,"List(NA, NA, NEMS)",en,,,List(NA),
53e99784b7602d9701f3f411,Using XML to Integrate Existing Software Systems into the Web,"List(List(Harry M. Sneed, NA))",book,28.0,,0-7695-1727-7,10.1109/CMPSAC.2002.1044548,,2002.0,"List(Computer Software and Applications Conference, 0, COMPSAC)",en,natural sciences,computer sciences,"List(internet, hypermedia markup languages, information resources, systems re-engineering, cobol, pl/i, world wide web, xml, batch programs, data conversion, e-commerce, extensible markup language, enterprise application integration, interface reengineering, legacy programs, online programs, software reengineering, subprograms, systems integration)","The eXtensible Markup Language 驴 XML 驴 is not only a language for communication between humans and the web, it is also a language for communication between programs. Rather than passing parameters, programs can pass documents from one to another, containing not only pure data, but control information as well. Even legacy programs written in ancient languages such as COBOL and PL/I can be adapted by means ofinterface reengineering to process and to generate XML documents"
53e99784b7602d9701f3f5fe,Research on resource allocation for multi-tier web applications in a virtualization environment,"List(List(Shuguo Yang, School of Mathematics and Physics, Qingdao University of Science and Technology, Qingdao, China 266061))",journal,2.0,1673-7350,,10.1007/s11704-011-0127-6,,2011.0,"List(NA, 0, Frontiers of Computer Science in China)",en,natural sciences,computer sciences,"List(resource allocation, cpu utilization, quality of service)","Resource allocation for multi-tier web applications in virtualization environments is one of the most important problems in autonomous computing. On one hand, the more resources that are provisioned to a multitier web application, the easier it is to meet service level objectives (SLO). On the other hand, the virtual machine which hosts the multi-tier web application needs to be consolidated as much as possible in order to maintain high resource utilization. This paper presents an adaptive resource controller which consists of a feedback utilization controller and an auto-regressive and moving average model (ARMA)-based model estimator. It can meet application-level quality of service (QoS) goals while achieving high resource utilization. To evaluate the proposed controllers, simulations are performed on a testbed simulating a virtual data center using Xen virtual machines. Experimental results indicate that the controllers can improve CPU utilization and make the best tradeoff between resource utilization and performance for multi-tier web applications"
53e99792b7602d9701f54b53,""" SCIATICA ""","List(List(A.S.Blundell Bankart, NA))",unknown,89.0,,,10.1016/S0140-6736(00)72002-2,,1941.0,"List(NA, 10, SIGGRAPH Computer Animation Festival)",en,,,List(NA),
53e99792b7602d9701f5af1a,The design of awareness and operation module for the symbiotic applications,"List(List(Shigeru Fujita, NA), List(Kenji Sugawara, NA), List(Claude Moulin, NA), List(Jean-Paul A. Barthès, NA))",conference,4.0,,,10.1109/COGINF.2010.5599834,,2010.0,"List(IEEE International Conference on Cognitive Informatics, 0, IEEE ICCI)",en,natural sciences,computer sciences,"List(cognition, multi-agent systems, ubiquitous computing, adips-dash, omas, syma, actuators, awareness and operation module, cognition functions, decision functions, intelligent multiagent system, multiparadigm-multiagent framework, perceptual interaction, social interaction, symbiotic base mechanism, symbiotic multiagent system, awareness, cognition layer model, multi-agent system, social-ware, symbiotic computing)",
53e99792b7602d9701f5af27,Short-Term Traffic Flow Forecasting Based on MARS,"List(List(Shengqi Ye, Tsinghua Univ, Natl Lab Informat Sci & Technol, Dept Automat, Beijing 100084, Peoples R China), List(Yingjia He, Tsinghua Univ, Natl Lab Informat Sci & Technol, Dept Automat, Beijing 100084, Peoples R China), List(Jianming Hu, Tsinghua Univ, Natl Lab Informat Sci & Technol, Dept Automat, Beijing 100084, Peoples R China), List(Zuo Zhang, Tsinghua Univ, Natl Lab Informat Sci & Technol, Dept Automat, Beijing 100084, Peoples R China))",journal,11.0,,,10.1109/FSKD.2008.678,,2008.0,"List(NA, 0, FSKD)",en,natural sciences,computer sciences,"List(considerable accuracy, promising traffic flow forecasting, proposed mars method, neural networks, short-term traffic flow forecasting, historical traffic flow data, traffic flow forecasting, multivariate adaptive regression splines, mars model, analytical function, mars method, analytic function, forecasting, predictive models, spline, mars, traffic flow, neural network, detectors, data models, regression analysis)","A promising traffic flow forecasting model based on Multivariate Adaptive Regression Splines (MARS) is developed in this paper. First, the historical traffic flow data is obtained from the loop detectors installed on the road network of Beijing. Then, part of the data is selected for training the MARS model while the rest is used to test the method. The results based on MARS method are compared with those of other methods such as the Neural Networks. The proposed MARS method is proved to have a considerable accuracy. Moreover, the model constructed with MARS can be described with analytical functions, which helps a lot in the further research on traffic flow forecasting"
53e99792b7602d9701f5af35,An approach to feature location in distributed systems,"List(List(Dennis Edwards, Department of Computer Science, University of West Florida, 11000 University Parkway, Pensacola, FL 32514, USA), List(Sharon Simmons, Department of Computer Science, University of West Florida, 11000 University Parkway, Pensacola, FL 32514, USA), List(Norman Wilde, nding author. Tel.: +1 850 474 2542; fax: +1 850 857 6056))",journal,62.0,0164-1212,,10.1016/j.jss.2004.12.018,,2006.0,"List(NA, 0, Journal of Systems and Software)",en,natural sciences,information sciences,"List(feature location, distributed systems, software reconnaissance)","This paper describes an approach to the feature location problem for distributed systems, that is, to the problem of locating which code components are important in providing a particular feature for an end user. A feature is located by observing system execution and noting time intervals in which it is active. Traces of execution in intervals with and without the feature are compared. Earlier experience has shown that this analysis is difficult because distributed systems often exhibit stochastic behavior and because time intervals are hard to identify with precision. To get around these difficulties, the paper proposes a definition of time interval based on the causality analysis introduced by Lamport and others. A strict causal interval may be defined, but it must often be extended to capture latent events and to represent the inherent imprecision in time measurement. This extension is modeled using a weighting function which may be customized to the specific circumstances of each study. The end result of the analysis is a component relevance index, denoted p""c, which can be used to measure the relevance of a software component to a particular feature. Software engineers may focus their analysis efforts on the top components as ranked according to p""c. Two case studies are presented. The first study demonstrates the feasibility of p""c by applying our method to a well-defined distributed system. The second study demonstrates the versatility of p""c by applying our method to message logs obtained from a large military system. Both studies indicate that the suggested approach could be an effective guide for a software engineer who is maintaining or enhancing a distributed system"
53e99792b7602d9701f5b06f,Nested Graph-Structured Representations for Cases,"List(List(Luís Macedo, NA), List(Amílcar Cardoso, NA))",journal,20.0,0302-9743,3-540-64990-5,10.1007/BFb0056317,,1998.0,"List(NA, 0, EWCBR)",en,humanities,arts,"List(nested graph-structured representations, adjacency matrix)","This paper describes an approach to representing cases as nested graph-structures, i.e., as hierarchically, spatially, temporally and causally interconnected nodes (case nodes), which may be themselves recursively described by other sets of interconnected nodes. Each case node represents a case piece (sub-case). An adjacency matrix may represent these nested graph-structured cases. Within our approach, new cases are constructed using an iterative context-guided retrieval of case nodes from multiple cases. In order to illustrate the expressiveness of this case representation approach, we discuss its application to the diagnosis and therapeutics of neurological diseases, to architectural design and to storytelling. Some issues that come out of this approach, like its contribution to the representation of cases of CBR and to integrate ordinary and creative reasoning, are discussed."


Number of total rows in the combined new Delta table:
'3,412,291'root
 |-- id: string (nullable = true)
 |-- title: string (nullable = true)
 |-- authors: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- name: string (nullable = true)
 |    |    |-- org: string (nullable = true)
 |-- doc_type: string (nullable = true)
 |-- n_citation: integer (nullable = true)
 |-- issn: string (nullable = true)
 |-- isbn: string (nullable = true)
 |-- doi: string (nullable = true)
 |-- volume: string (nullable = true)
 |-- year: integer (nullable = true)
 |-- venue: struct (nullable = true)
 |    |-- name: string (nullable = true)
 |    |-- type: string (nullable = true)
 |    |-- raw: string (nullable = true)
 |-- lang: string (nullable = true)
 |-- fos_general: string (nullable = true)
 |-- fos_specific: string (nullable = true)
 |-- keywords: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- abstract: string (nullable = true)



#### DWH schema definition

In [0]:
from pyspark.sql.types import *

#### Dimension tables

# dim date
date_sc = StructType([
    StructField("id", IntegerType(), False),
    StructField("year", IntegerType(), False)
  ])

# dim document type
document_type_sc = StructType([
    StructField("id", IntegerType(), False),
    StructField("text", StringType(), False)
  ])

# dim publication
publication_sc = StructType([
    StructField("id", IntegerType(), False),
    StructField("title", StringType(), False),
    StructField("volume", StringType(), False),
    StructField("issn", StringType(), False),
    StructField("isbn", StringType(), False),
    StructField("doi", StringType(), False),
    StructField("n_citations", IntegerType(), False)
  ])

# dim venue
venue_sc = StructType([
    StructField("id", IntegerType(), False),
    StructField("name", StringType(), False)
  ])

# dim field of study (fos) general
fos_general_sc = StructType([
    StructField("id", IntegerType(), False),
    StructField("category", StringType(), False)
  ])

# dim field of study (fos) specific
fos_specific_sc = StructType([
    StructField("id", IntegerType(), False),
    StructField("category", StringType(), False)
  ])

# dim organization
organization_sc = StructType([
    StructField("id", IntegerType(), False),
    StructField("name", StringType(), False)
  ])

# dim author
author_sc = StructType([
    StructField("id", IntegerType(), False),
    StructField("firstname", StringType(), False),
    StructField("lastname", StringType(), False)
  ])

# dim language
language_sc = StructType([
    StructField("id", IntegerType(), False),
    StructField("text", StringType(), False)
  ])

#### Fact table

dblpFactTable_schema = StructType([
    StructField("date_id", IntegerType(), True), 
    StructField("doc_type_id", IntegerType(), True),
    StructField("publication_id", IntegerType(), True),
    StructField("venue_id", IntegerType(), True),
    StructField("fos_general_id", IntegerType(), True),
    StructField("fos_specific_id", IntegerType(), True),
    StructField("org_id", IntegerType(), True),
    StructField("author_id", IntegerType(), True),
    StructField("author_rank", IntegerType(), True)
  ])

#### Dimension tables

In [0]:
# dim date
(df
    .select(F.col("year"))
    .distinct()
    .sort("year")
    .withColumn("id", F.monotonically_increasing_id()+1)
    .selectExpr("id", "year")
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("dim_date")
)

# dim doc type
(df
    .selectExpr("doc_type")
    .dropDuplicates(["doc_type"])
    .withColumn("id", F.monotonically_increasing_id()+1)
    .selectExpr("id", "doc_type")
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("dim_doc_type")
)

# dim publication
(df
    .selectExpr("title", "volume", "issn", "isbn", "doi", "n_citation")
    .dropDuplicates(["title"])
    .withColumn("id", F.monotonically_increasing_id()+1)
    .selectExpr("id", "title", "volume", "issn", "isbn", "doi", "n_citation")
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("dim_publication")
)

# dim venue
(df
    .selectExpr("venue.name", "venue.raw")
    .replace("NA", None)
    .withColumn("name",F.coalesce("name","raw"))
    .dropDuplicates(["name"])
    .na.fill("NA")
    .withColumn("id", F.monotonically_increasing_id()+1)
    .select("id", F.col("name").alias("venue_name"))
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("dim_venue")
)

# dim fos general
(df
    .selectExpr("fos_general")
    .dropDuplicates(["fos_general"])
    .withColumn("id", F.monotonically_increasing_id()+1)
    .selectExpr("id", "fos_general")
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("dim_fos_general")
)

# dim fos specific
(df
    .selectExpr("fos_specific")
    .dropDuplicates(["fos_specific"])
    .withColumn("id", F.monotonically_increasing_id()+1)
    .selectExpr("id", "fos_specific")
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("dim_fos_specific")
)

# dim organization
(df
    .selectExpr("authors")
    .withColumn('authors', F.explode('authors'))
    .selectExpr("authors.org" )
    .dropDuplicates(["org"])
    .withColumn("id", F.monotonically_increasing_id()+1)
    .selectExpr("id", "org")
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("dim_organization")
)    

# dim author
df_temp = (df
    .selectExpr("authors")
    .withColumn('authors', F.explode('authors'))
    .selectExpr("authors.name" )
    .dropDuplicates(["name"])        
    .withColumn("FirstName",F.split(F.col("name")," ").getItem(0))
    .withColumn("MiddleName",F.split(F.col("name")," ").getItem(1))
    .withColumn("MiddleName_backup",F.split(F.col("name")," ").getItem(1))
    .withColumn("LastName",F.split(F.col("name")," ").getItem(2))
    .withColumn("LastName_backup",F.split(F.col("name")," ").getItem(2))
    .withColumn("LastName",F.coalesce("LastName","MiddleName"))
    .drop("name")
    .na.fill("NA")
    )

(df_temp.withColumn("MiddleName", F.when(df_temp.LastName_backup == "NA", "NA")
           .otherwise(df_temp.MiddleName))
#            .replace("NA", None)
           .withColumn("id", F.monotonically_increasing_id()+1)
           .selectExpr("id", "FirstName", "MiddleName", "LastName" )
           .write
           .format("delta")
           .mode("overwrite")
           .saveAsTable("dim_author")
)

del df_temp

# dim language
(df
    .selectExpr("lang")
    .dropDuplicates(["lang"])
    .withColumn("id", F.monotonically_increasing_id()+1)
    .selectExpr("id", "lang")
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("dim_language")
)


In [0]:
# data frames from delta tables

df_date = spark.sql("SELECT * FROM delta.`/user/hive/warehouse/dim_date`")
df_doc_type = spark.sql("SELECT * FROM delta.`/user/hive/warehouse/dim_doc_type`")
df_publication = spark.sql("SELECT * FROM delta.`/user/hive/warehouse/dim_publication`")
df_venue = spark.sql("SELECT * FROM delta.`/user/hive/warehouse/dim_venue`")
df_fos_general = spark.sql("SELECT * FROM delta.`/user/hive/warehouse/dim_fos_general`")
df_fos_specific = spark.sql("SELECT * FROM delta.`/user/hive/warehouse/dim_fos_specific`")
df_organization = spark.sql("SELECT * FROM delta.`/user/hive/warehouse/dim_organization`")
df_author = spark.sql("SELECT * FROM delta.`/user/hive/warehouse/dim_author`")
df_language = spark.sql("SELECT * FROM delta.`/user/hive/warehouse/dim_language`")

df_date.createOrReplaceTempView("dim_date")
df_doc_type.createOrReplaceTempView("dim_doc_type")
df_publication.createOrReplaceTempView("dim_publication")
df_venue.createOrReplaceTempView("dim_venue")
df_fos_general.createOrReplaceTempView("dim_fos_general")
df_fos_specific.createOrReplaceTempView("dim_fos_specific")
df_organization.createOrReplaceTempView("dim_organization")
df_author.createOrReplaceTempView("dim_author")
df_language.createOrReplaceTempView("dim_language")


#### Fact table

In [0]:
# fact table
df = (df
    .drop("id", "abstract", "keywords", "issn", "isbn", "volume", "doi", "n_citation")

    # authors flattening
    .select("*", F.posexplode("authors"))
    .select("*", F.col("pos").alias("author_rank"), "col.*")

    .withColumn("FirstName",F.split(F.col("name")," ").getItem(0))
    .withColumn("MiddleName",F.split(F.col("name")," ").getItem(1))
    .withColumn("MiddleName_backup",F.split(F.col("name")," ").getItem(1))
    .withColumn("LastName",F.split(F.col("name")," ").getItem(2))
    .withColumn("LastName_backup",F.split(F.col("name")," ").getItem(2))
    .withColumn("LastName",F.coalesce("LastName","MiddleName"))
    .drop("name", "pos", "col", "authors")
    .na.fill("NA")
    # .select("*", authors_udf("MiddleName", "LastName_backup"))
)

df = (df
    .withColumn("MiddleName", F.when(df.LastName_backup == "NA", "NA")
    .otherwise(df.MiddleName))
    .drop("MiddleName_backup", "LastName_backup")
        
    # venue flattening
    .select("*", "venue.*")
    .replace("NA", None)
    .withColumn("venue_name", F.coalesce("name","raw"))
    .drop("name", "raw", "venue", "type")
    .na.fill("NA")
    
    # date/year
    .join(df_date, "year")
    .select(F.col("id").alias("date_id"), "*")
    .drop("year", "id")

    # doc_type
    .join(df_doc_type, "doc_type")
    .select(F.col("id").alias("doc_type_id"), "*")
    .drop("doc_type", "id")

    # publication / title
    .join(df_publication, "title")
    .select(F.col("id").alias("publication_id"), "*")
    .drop("volume", "issn", "isbn", "doi", "n_citation", "title", "id")

    # venue
    .join(df_venue, "venue_name")
    .select(F.col("id").alias("venue_id"), "*")
    .drop("venue_name", "id")

    # fos general
    .join(df_fos_general, "fos_general")
    .select(F.col("id").alias("fos_general_id"), "*")
    .drop("fos_general", "id")

    # fos specific
    .join(df_fos_specific, "fos_specific")
    .select(F.col("id").alias("fos_specific_id"), "*")
    .drop("fos_specific", "id")
        
    # organization
    .join(df_organization, "org")
    .select(F.col("id").alias("org_id"), "*")
    .drop("org", "id")
    
    # language
    .join(df_language, "lang")
    .select(F.col("id").alias("language_id"), "*")
    .drop("lang", "id")
)

# author
df = (df
    .join(df_author, (df.FirstName == df_author.FirstName)
                     & (df.MiddleName == df_author.MiddleName)
                     & (df.LastName == df_author.LastName)
         )
    .select(F.col("id").alias("author_id"), "*")
    .drop("id", "FirstName", "MiddleName", "LastName")
    .withColumn("author_rank", df.author_rank+1)
    .select("date_id", "doc_type_id", "publication_id", "venue_id", "fos_general_id", "fos_specific_id", "org_id", "author_id", "language_id", "author_rank")
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("fact_dblp")
)

del df
                           

In [0]:
%fs

ls /user/hive/warehouse/fact_dblp/

path,name,size,modificationTime
dbfs:/user/hive/warehouse/fact_dblp/_delta_log/,_delta_log/,0,0
dbfs:/user/hive/warehouse/fact_dblp/part-00000-3c5a6ab6-3cbc-4da5-af7d-7dbcd9e29018-c000.snappy.parquet,part-00000-3c5a6ab6-3cbc-4da5-af7d-7dbcd9e29018-c000.snappy.parquet,15269503,1654626543000
dbfs:/user/hive/warehouse/fact_dblp/part-00001-8bb69190-6000-4b0f-b991-6beaf0f79a2a-c000.snappy.parquet,part-00001-8bb69190-6000-4b0f-b991-6beaf0f79a2a-c000.snappy.parquet,15424336,1654626545000
dbfs:/user/hive/warehouse/fact_dblp/part-00002-170515b7-3f33-44e1-a750-75cc4c0743e6-c000.snappy.parquet,part-00002-170515b7-3f33-44e1-a750-75cc4c0743e6-c000.snappy.parquet,15375910,1654626543000
dbfs:/user/hive/warehouse/fact_dblp/part-00003-89ffdf85-7fdf-4d4e-95ea-4c4953f122cb-c000.snappy.parquet,part-00003-89ffdf85-7fdf-4d4e-95ea-4c4953f122cb-c000.snappy.parquet,15423639,1654626545000
dbfs:/user/hive/warehouse/fact_dblp/part-00004-59f6cc1d-2187-4ed3-8c8d-b1f5cb947605-c000.snappy.parquet,part-00004-59f6cc1d-2187-4ed3-8c8d-b1f5cb947605-c000.snappy.parquet,15414746,1654626545000
dbfs:/user/hive/warehouse/fact_dblp/part-00005-b9801452-ae42-45fa-8f18-1a38dedd1844-c000.snappy.parquet,part-00005-b9801452-ae42-45fa-8f18-1a38dedd1844-c000.snappy.parquet,14715322,1654626540000
dbfs:/user/hive/warehouse/fact_dblp/part-00006-6c52ae5f-f993-433b-a0d1-47aef5345725-c000.snappy.parquet,part-00006-6c52ae5f-f993-433b-a0d1-47aef5345725-c000.snappy.parquet,15184920,1654626541000
dbfs:/user/hive/warehouse/fact_dblp/part-00007-b8d4c3be-a483-4825-a29c-729a9a1e4caf-c000.snappy.parquet,part-00007-b8d4c3be-a483-4825-a29c-729a9a1e4caf-c000.snappy.parquet,15346548,1654626545000
dbfs:/user/hive/warehouse/fact_dblp/part-00008-de87c0a6-f223-40b8-a45c-8414cd9272b8-c000.snappy.parquet,part-00008-de87c0a6-f223-40b8-a45c-8414cd9272b8-c000.snappy.parquet,15657366,1654626559000


In [0]:
# data frames from delta tables

df_bdlp = spark.sql("SELECT * FROM delta.`/user/hive/warehouse/fact_dblp`")
df_bdlp.createOrReplaceTempView("fact_dblp")


In [0]:
display(df_bdlp)
display(df_bdlp.count())
display(df_bdlp.printSchema())


date_id,doc_type_id,publication_id,venue_id,fos_general_id,fos_specific_id,org_id,author_id,language_id,author_rank
100,1,8589990338,17179870565,6,6,60129547352,25770092289,1,1
105,1,17180277236,25769805978,1,6,8589940963,25769958724,1,3
107,1,17180048281,1722,1,6,25769901315,17180023505,1,2
106,2,60129688127,42949674281,6,8,17179985220,25769964495,1,3
87,1,60129636876,8589934972,6,19,8589940963,172341,1,1
99,1,42949958938,60129543404,6,8,8589940963,51539697127,1,2
103,2,209592,42949674458,6,8,17179979304,51539697127,1,2
93,8,60129668208,51539608208,1,6,8589940963,14839,1,4
113,1,8590191779,60129547085,1,6,34359754834,51539900834,1,2
82,1,17180205086,34359739442,1,6,8589940963,34359824296,1,2


10852684root
 |-- date_id: long (nullable = true)
 |-- doc_type_id: long (nullable = true)
 |-- publication_id: long (nullable = true)
 |-- venue_id: long (nullable = true)
 |-- fos_general_id: long (nullable = true)
 |-- fos_specific_id: long (nullable = true)
 |-- org_id: long (nullable = true)
 |-- author_id: long (nullable = true)
 |-- language_id: long (nullable = true)
 |-- author_rank: integer (nullable = true)



### DWH Queries

- top k authors based on their h-index that publish in the field "computer sciences"

In [0]:
### Query top-k authors, based on their H-index that publish in a specific fos

# The h-index is calculated by counting the number of publications for which an author has been cited by other authors at least that same number of times.
# For instance, an h-index of 17 means that the scientist has published at least 17 papers that have each been cited at least 17 times. 


In [0]:
# spark 'query' to the fcat table
(df_bdlp
    .select("publication_id", "fos_general_id", "fos_specific_id", "author_id", "author_rank")
    .join(df_publication, (df_bdlp.publication_id == df_publication.id))
    .drop("publication_id", "volume", "issn", "isbn", "doi", "id")
    .join(df_author, (df_bdlp.author_id == df_author.id))
    .drop("id", "author_id")
    .join(df_fos_general, (df_bdlp.fos_general_id == df_fos_general.id))
    .drop("fos_general_id", "id")
    .join(df_fos_specific, (df_bdlp.fos_specific_id == df_fos_specific.id))
    .drop("fos_specific_id", "id")
    .filter(F.col("fos_specific") == "computer sciences")
    .drop("fos_general", "author_rank")
    .replace("NA", None)
    .withColumn("FullName", F.concat_ws(',', F.col("FirstName"),F.col("MiddleName"),F.col("LastName")))
    .drop("FirstName", "MiddleName", "LastName")
    .select(F.col("FullName").alias("Author"), F.col("n_citation").alias("PaperCitation"))
    .filter(F.col("Author") != "")
    .write
    .saveAsTable("df_hindex")
)



In [0]:
df_hindex = spark.sql("SELECT * FROM delta.`/user/hive/warehouse/df_hindex`")
df_hindex.createOrReplaceTempView("df_hindex")

display(df_hindex)
display(df_hindex.count())

Author,PaperCitation
"Hod,Lipson",42
"Hod,Lipson",186
"Hod,Lipson",4
"Hod,Lipson",83
"Hod,Lipson",40
"Hod,Lipson",305
"Hod,Lipson",5497
"Hod,Lipson",95
"Hod,Lipson",8
"Hod,Lipson",19


2978124

In [0]:
hindex_df = (spark.sql("""
SELECT Author, MAX(Ranking) AS Hindex
FROM (SELECT Author, PaperCitation, ROW_NUMBER() OVER (PARTITION BY Author ORDER BY PaperCitation DESC) AS Ranking
    FROM df_hindex)
WHERE Ranking <= PaperCitation
GROUP BY Author
ORDER BY MAX(Ranking) DESC
"""))

In [0]:
display(hindex_df)

Author,Hindex
"Anil,K.,Jain",154
"Wil,M.,P.",134
"Andrew,Zisserman",126
"Yoshua,Bengio",124
"A.,H.,M.",124
"Jitendra,Malik",112
"Xiaoou,Tang",112
"Thomas,S.,Huang",111
"Dacheng,Tao",111
"Xiaogang,Wang",111


In [0]:
# pandas-based solution for the h-index query
# https://stackoverflow.com/questions/29671726/efficient-way-to-calculate-h-index-impact-productivity-of-author-publication-i

# pd_df_hindex = df_hindex.toPandas()
# pd_df_hindex = pd_df_hindex.sort_values(by=["Author","PaperCitation"], ascending=[1,0])
# groups = pd_df_hindex.groupby("Author")

# ind2 = np.array([np.arange(len(g))+1 for g in groups.groups.values()],dtype=object)
# pd_df_hindex['newindex'] = np.hstack(ind2)
# pd_df_hindex['condition'] = pd_df_hindex['PaperCitation'] >= pd_df_hindex['newindex']
# hindex = pd_df_hindex.groupby('Author').sum()['condition']

# df_hindex = (pd.DataFrame({"Author":list(hindex.index), "h-index":hindex.values}))
# df_hindex = df_hindex.sort_values(by=["h-index"], ascending=[0])
# display(df_hindex)


In [0]:
display(df_hindex)

Author,h-index
"Anil,K.,Jain",154
"Wil,M.,P.",134
"Andrew,Zisserman",126
"A.,H.,M.",124
"Yoshua,Bengio",124
"Jitendra,Malik",112
"Xiaoou,Tang",112
"Thomas,S.,Huang",111
"Xiaogang,Wang",111
"Dacheng,Tao",111


### API Queries

#### Gender API

In [0]:
pd_hindex_df = hindex_df.limit(500).toPandas()

In [0]:
top_500_authors = (pd_hindex_df
        .Author
        .tolist()
)

for i in range(0, len(top_500_authors)):
    top_500_authors[i] = top_500_authors[i].replace(",", " ")


In [0]:
my_genders = []

for i in range(20, len(top_500_authors)):
    
    author = top_500_authors[i].split()
    if len(author) == 3:
        firsrname = unidecode(author[0]).replace('.', '')
        lastname = unidecode(author[2]).replace('.', '')
    else:
        firsrname = unidecode(author[0]).replace('.', '')
        lastname = unidecode(author[1]).replace('.', '')
        
    print(i, firsrname, lastname)
    
    myKey = "sUae6FCgrVhPNtwK3W6FTh3nMBbMmtLRwACw"
    # url = "https://gender-api.com/get?key=" + myKey + "&name={}".format('Anil K Jain')
    url = "https://gender-api.com/get?split={}%20{}&key={}".format(firsrname, lastname, myKey)
    response = urlopen(url)
    decoded = response.read().decode('utf-8')
    data = json.loads(decoded)
    
    my_genders.append(data["gender"])


20 Maria Carmen
21 Xuelong Li
22 Christopher Manning
23 Takeo Kanade
24 Mubarak Shah
25 Rajkumar Buyya
26 Luc Gool
27 Wolfram Burgard
28 Andrew Ng
29 Stan Li
30 Pascal Fua
31 David Zhang
32 Fernando la
33 Ion Stoica
34 Larry Davis
35 Shree Nayar
36 Marc Pollefeys
37 Rama Chellappa
38 Yi Yang
39 Dieter Fox
40 Martial Hebert
41 Richard Szeliski
42 Ian Reid
43 Michael Black
44 Jian Sun
45 Tieniu Tan
46 Geoffrey Hinton
47 Hermann Ney
48 Philip Yu
49 Shaogang Gong
50 Pieter Abbeel
51 Scott Shenker
52 B K
53 Sergey Levine
54 William Freeman
55 Robert Schapire
56 Zhi-Hua Zhou
57 Wen Gao
58 Antonio Torralba
59 Hans-Peter Seidel
60 Daniel Cremers
61 Raquel Urtasun
62 Luc Van
63 Qiang Yang
64 Qi Tian
65 A M
66 Roland Siegwart
67 Roberto Cipolla
68 Jiaya Jia
69 Kristen Grauman
70 Daphne Koller
71 Bernhard Scholkopf
72 Michael Jordan
73 Jack Dongarra
74 Alan Yuille
75 Laurens der
76 Li Fei-Fei
77 Peter Stone
78 Chunhua Shen
79 Shih-Fu Chang
80 A Tekalp
81 Dan Roth
82 Pietro Perona
83 Fredo Durand


In [0]:
print("Total: ", len(genders))

print(genders.count('female'))
print(genders.count('male'))
print(genders.count('unknown'))

Total:  455
46
402
7


### Purge delta tables

In [0]:
# JSON parts

for delta_table in dblpv13_delta_parts:
    dbutils.fs.rm("dbfs:{}".format(delta_table), True)
    print(delta_table, "deleted")


/user/hive/warehouse/dblpv13_edited_parsed_part_1 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_2 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_3 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_4 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_5 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_6 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_7 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_8 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_9 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_10 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_11 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_12 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_13 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_14 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_15 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_16 deleted
/user/hive/warehouse/dblpv13_edited_parsed_part_17 deleted
/user/

In [0]:
# dimensions

dbutils.fs.rm("dbfs:/user/hive/warehouse/dim_date", True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/dim_doc_type", True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/dim_publication", True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/dim_venue", True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/dim_fos_general", True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/dim_fos_specific", True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/dim_organization", True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/dim_author", True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/dim_language", True)

Out[12]: True

In [0]:
# fact table

dbutils.fs.rm("dbfs:/user/hive/warehouse/fact_dblp", True)


Out[13]: True

In [0]:
# extra

dbutils.fs.rm("dbfs:/user/hive/warehouse/df_hindex", True)

Out[30]: True

In [0]:
%sh

ls -lsh /mnt

total 32K
 24K -rw-r--r-- 1 root root  21K May 28 08:58 dictionary_fos.txt
4.0K drwxr-xr-x 2 root root 4.0K May 28 07:37 driver-daemon
4.0K drwxr-xr-x 2 root root 4.0K May 28 07:37 readonly


In [0]:
%fs

ls /user/hive/warehouse/

path,name,size,modificationTime
dbfs:/user/hive/warehouse/_committed_140098711880695482,_committed_140098711880695482,222,1653503414000
dbfs:/user/hive/warehouse/_committed_1823990921207546219,_committed_1823990921207546219,223,1653503026000
dbfs:/user/hive/warehouse/_committed_2618448479493195264,_committed_2618448479493195264,222,1653500590000
dbfs:/user/hive/warehouse/_committed_276202957567659857,_committed_276202957567659857,222,1653500378000
dbfs:/user/hive/warehouse/_committed_4560362115751107150,_committed_4560362115751107150,223,1653503230000
dbfs:/user/hive/warehouse/_committed_4781748796520471704,_committed_4781748796520471704,222,1653502480000
dbfs:/user/hive/warehouse/_committed_4945852690702862520,_committed_4945852690702862520,223,1653500814000
dbfs:/user/hive/warehouse/_committed_5454470160437766372,_committed_5454470160437766372,223,1653501036000
dbfs:/user/hive/warehouse/_committed_641909369749824286,_committed_641909369749824286,222,1653502199000
dbfs:/user/hive/warehouse/_committed_7206901163515394298,_committed_7206901163515394298,223,1653502861000
