In [5]:
# !conda install -y -c conda-forge pyspark
# !conda install -y -c conda-forge black
# !conda install -y -c conda-forge pip
# !pip install delta-spark==2.1.1
# !sudo wget --user jovyan --password jovyan https://repo1.maven.org/maven2/io/delta/delta-core_2.12/2.1.0/delta-core_2.12-2.1.0.jar -P $SPARK_HOME/jars/


In [6]:
import pandas as pd
import numpy as np
from pyspark.sql import SparkSession, DataFrame
from pyspark.conf import SparkConf
import pyspark.sql.functions as F
from pyspark.sql.window import Window
from pyspark.sql.types import *
from functools import reduce
import json
import glob
import tempfile
from delta import *

builder = (
    SparkSession.builder.master("local")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config(
        "spark.sql.catalog.spark_catalog",
        "org.apache.spark.sql.delta.catalog.DeltaCatalog",
    )
)
spark = configure_spark_with_delta_pip(builder).getOrCreate()

# spark = SparkSession.builder \
#     .master("local") \
#     .getOrCreate()

In [7]:
all_files = all_files = glob.glob("../data/AssignmentData/*.csv")
all_files

['../data/AssignmentData/SMESES000065103320166_2016_07_29_Loan_Data_8396a20b-86e1-4cac-8d51-dbecb46130c6.csv',
 '../data/AssignmentData/SMESES000065103320166_2016_10_31_Loan_Data_5370b5fe-b398-4b40-ace5-2691fc3e1504.csv',
 '../data/AssignmentData/SMESES000065103320166_2017_01_31_Loan_Data_6997f5f3-c2e5-41a4-a038-a5497066fe0e.csv',
 '../data/AssignmentData/SMESES000065103320166_2017_04_30_Loan_Data_52e3e2b7-f8b6-49fd-b482-72a0445cfa9a.csv',
 '../data/AssignmentData/SMESES000065103320166_2017_07_31_Loan_Data_d9629927-17f7-48c0-b1f6-ffbd213308d6.csv',
 '../data/AssignmentData/SMESES000065103320166_2017_10_31_Loan_Data_fb648083-daad-4968-beaa-dfc851ec2b4a.csv',
 '../data/AssignmentData/SMESES000065103320166_2018_01_31_Loan_Data_464b3e03-7988-4c86-a729-a0d31f3e7622.csv',
 '../data/AssignmentData/SMESES000065103320166_2018_04_30_Loan_Data_3a6e2077-7e61-4b77-9dd6-6b4ea1794d05.csv',
 '../data/AssignmentData/SMESES000065103320166_2018_07_31_Loan_Data_af106c4d-a055-49a3-8f62-fede0acb7ce9.csv',
 

In [8]:
asset_columns = {'AS1':DateType(),
'AS2':StringType(),
'AS3':StringType(),
'AS4':StringType(),
'AS5':StringType(),
'AS6':StringType(),
'AS7':StringType(),
'AS8':StringType(),
'AS15':StringType(),
'AS16':StringType(),
'AS17':StringType(),
'AS18':StringType(),
'AS19':DateType(),
'AS20':DateType(),
'AS21':StringType(),
'AS22':StringType(),
'AS23':BooleanType(),
'AS24':StringType(),
'AS25':StringType(),
'AS26':StringType(),
'AS27':DoubleType(),
'AS28':DoubleType(),
'AS29':BooleanType(),
'AS30':DoubleType(),
'AS31':DateType(),
'AS32':StringType(),
'AS33':StringType(),
'AS34':StringType(),
'AS35':StringType(),
'AS36':StringType(),
'AS37':DoubleType(),
'AS38':DoubleType(),
'AS39':DoubleType(),
'AS40':DoubleType(),
'AS41':DoubleType(),
'AS42':StringType(),
'AS43':StringType(),
'AS44':DoubleType(),
'AS45':StringType(),
'AS50':DateType(),
'AS51':DateType(),
'AS52':StringType(),
'AS53':BooleanType(),
'AS54':DoubleType(),
'AS55':DoubleType(),
'AS56':DoubleType(),
'AS57':StringType(),
'AS58':StringType(),
'AS59':StringType(),
'AS60':DoubleType(),
'AS61':DoubleType(),
'AS62':StringType(),
'AS63':DoubleType(),
'AS64':DoubleType(),
'AS65':StringType(),
'AS66':DoubleType(),
'AS67':DateType(),
'AS68':StringType(),
'AS69':DoubleType(),
'AS70':DateType(),
'AS71':DateType(),
'AS80':DoubleType(),
'AS81':DoubleType(),
'AS82':DoubleType(),
'AS83':StringType(),
'AS84':StringType(),
'AS85':DoubleType(),
'AS86':DoubleType(),
'AS87':DateType(),
'AS88':DoubleType(),
'AS89':StringType(),
'AS90':DoubleType(),
'AS91':DateType(),
'AS92':StringType(),
'AS93':DoubleType(),
'AS94':StringType(),
'AS100':DoubleType(),
'AS101':DoubleType(),
'AS102':DoubleType(),
'AS103':DoubleType(),
'AS104':DoubleType(),
'AS105':DoubleType(),
'AS106':DoubleType(),
'AS107':DoubleType(),
'AS108':DoubleType(),
'AS109':DoubleType(),
'AS110':DoubleType(),
'AS111':StringType(),
'AS112':DateType(),
'AS115':DoubleType(),
'AS116':DoubleType(),
'AS117':DoubleType(),
'AS118':DoubleType(),
'AS119':DoubleType(),
'AS120':DoubleType(),
'AS121':BooleanType(),
'AS122':BooleanType(),
'AS123':StringType(),
'AS124':DateType(),
'AS125':DoubleType(),
'AS126':DoubleType(),
'AS127':DateType(),
'AS128':DoubleType(),
'AS129':StringType(),
'AS130':DateType(),
'AS131':BooleanType(),
'AS132':DoubleType(),
'AS133':DateType(),
'AS134':DateType(),
'AS135':DoubleType(),
'AS136':DoubleType(),
'AS137':DateType(),
'AS138':DoubleType()}

In [9]:
# Single file will not have all the columns specified in the DTS. Create an ad-hoc schema on the fly.
def get_file_schema(df,master_cols):
    schema = []
    for col_name in df.columns:
        if col_name.endswith("_reason"):
            schema.append(StructField(col_name, StringType(), True))
        else:
            root_col_name = col_name.split("_")[0]
            col_type = master_cols.get(root_col_name, False)
            if col_type:
                schema.append(StructField(col_name, col_type, True))
    return StructType(schema)

In [10]:
# # Find List columns and change numerical value to literal
# mapper_dict={
#     "Obligor Legal Form / Business Type":{1:"Public Company",2:"Limited Company",3:"Partnership",4:"Individul",5:"Other"},
#     "Customer Segment":{1:"Medium",2:"Small",3:"Micro",4:"Other"},
#     "Borrower Basel III Segment":{1:"Corporate",2:"SME treated as Corporate",3:"Retail",4:"Other"},
#     "Asset Type":{1: 'Loan',
#                 2: 'Guarantee',
#                 3: 'Promissory Notes',
#                 4: 'Participation Rights',
#                 5: 'Overdraft',
#                 6: 'Letter of Credit',
#                 7: 'Working Capital Facility',
#                 8: 'Other'},
#     "Seniority":{1: 'Senior Secured',
#                 2: 'Senior Unsecured',
#                 3: 'Junior',
#                 4: 'Junior Unsecured',
#                 5: 'Other'},
#     "Purpose":{1: 'Purchase',
#             2: 'Re-mortgage',
#             3: 'Renovation',
#             4: 'Equity release',
#             5: 'Construction Real Estate',
#             6: 'Construction Other',
#             7: 'Debt consolidation',
#             8: 'Re-mortgage with Equity Release',
#             9: 'Re-mortgage on Different Terms',
#             10: 'Combination Mortgage',
#             11: 'Investment Mortgage',
#             12: 'Working Capital',
#             13: 'Other'},
#     "Principal Payment Frequency":{1: 'Monthly',
#                                 2: 'Quarterly',
#                                 3: 'Semi annually',
#                                 4: 'Annual',
#                                 5: 'Bullet',
#                                 6: 'Other'},
#     "Interest Payment Frequency":{1: 'Linear',
#                             2: 'French',
#                             3: 'Fix Amortisation Schedule',
#                             4: 'Bullet',
#                             5: 'Partial Bullet',
#                             6: 'Revolving',
#                             7: 'Other'},
#     "Type of Loan":{1: 'Term', 2: 'Revolving Credit Line', 3: 'Other'},
#     "Payment type":{1: 'Direct Debit', 2: 'Standing Order', 3: 'Cheque', 4: 'Cash', 5: 'Other'},
#     "Interest Rate Type":{1: 'Floating rate loan for life',
#                         2: "Floating rate loan linked to Libor, Euribor, BoE reverting to the Bank's SVR, ECB reverting to Bank’s SVR",
#                         3: 'Fixed rate loan for life',
#                         4: 'Fixed with future periodic resets',
#                         5: 'Fixed rate loan with compulsory future switch to floating',
#                         6: 'Capped',
#                         7: 'Discount',
#                         8: 'Switch Optionality',
#                         9: 'Borrower Swapped',
#                         10: 'Other'},
#     "Current Interest Rate Index":{1: '1 month LIBOR',
#                                 2: '1 month EURIBOR',
#                                 3: '3 month LIBOR',
#                                 4: '3 month EURIBOR',
#                                 5: '6 month LIBOR',
#                                 6: '6 month EURIBOR',
#                                 7: '12 month LIBOR',
#                                 8: '12 month EURIBOR',
#                                 9: 'BoE Base Rate',
#                                 10: 'ECB Base Rate',
#                                 11: 'Standard Variable Rate',
#                                 12: 'Other'},
#     "Interest Reset Period":{1: 'Annual',
#                             2: 'Semi-annual',
#                             3: 'Quarterly',
#                             4: 'Monthly',
#                             5: 'Not apply',
#                             6: 'Other'},
#     "Reason for Default (Basel II definition)":{1: 'Bankruptcy / Insolvency',
#                                             2: 'Failure to Pay',
#                                             3: 'Breach of Terms',
#                                             4: 'Other'},
#     "Recovery Source":{1: 'Bankruptcy / Insolvency',
#                         2: 'Failure to Pay',
#                         3: 'Breach of Terms',
#                         4: 'Other'},
#     "Recovery Source":{1: 'Liquidation of Collateral',
#                         2: 'Enforcement of Guarantees',
#                         3: 'Additional Lending',
#                         4: 'Cash Recoveries',
#                         5: 'Mixed',
#                         6: 'Other'},
# }

In [11]:
import csv 
list_dfs = []
for csv_f in all_files:
    col_names = []
    content = []
    with open(csv_f,'r') as f:
        for i, line in enumerate(csv.reader(f)):
            if i == 0:
                col_names = line
            elif i == 1:
                continue
            else:
                content.append(line)
        df = spark.createDataFrame(content, col_names)
        list_dfs.append(df)
assets_df = reduce(DataFrame.union, list_dfs)

In [12]:
def replace_no_data(df):
    # For ND  values
    for col_name in df.columns:
        df = df.withColumn(col_name, F.when(F.col(col_name).startswith("ND"), None)
                                    .otherwise(F.col(col_name)))
    return df

test_1 = replace_no_data(assets_df)

test_1.show()

+----------+------+----+----+-----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+-----+----+----+----+----+-----+----+----+----+-------+-------+----+----+----------+---------+---------+----+----+----+----+----------+----+-------+-------+----+----+----------+----+----+-------+----+----------+-----------+----------+----+----+----------+----+----+----+----+----+----+----+----------+----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+
|       AS1|   AS3|AS15|AS16| AS17|AS18|AS19|AS20|AS21|AS22|AS23|AS24|AS25|AS26|AS27|AS28|AS29|AS30|AS31|AS32|AS33|AS34|AS35|AS36| AS37|AS38|AS39|AS40|AS41| AS42|AS43|AS44|AS45|   AS50|   AS51|AS52|AS53|      AS54|     AS55|     AS56|AS57|AS58|AS59|AS60|      AS61|AS62|   AS63|   AS64|AS65|AS66|      AS67|AS68|AS69|   AS70|AS71|      AS80|

In [13]:
def replace_bool_data(df):
    # For  boolean values
    for col_name in df.columns:
        df = df.withColumn(col_name, F.when(F.col(col_name) == "Y", "True")
                                    .when(F.col(col_name) == "N", "False")
                                    .otherwise(F.col(col_name)))
    return df
test_2 = replace_bool_data(test_1)
test_2.show()

+----------+------+----+----+-----+----+----+----+----+----+-----+----+----+----+----+----+-----+----+----+----+----+----+----+----+-----+----+----+----+----+-----+----+----+----+-------+-------+----+-----+----------+---------+---------+----+----+----+----+----------+----+-------+-------+----+----+----------+----+----+-------+----+----------+-----------+----------+----+----+----------+----+----+----+----+----+----+----+----------+----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+
|       AS1|   AS3|AS15|AS16| AS17|AS18|AS19|AS20|AS21|AS22| AS23|AS24|AS25|AS26|AS27|AS28| AS29|AS30|AS31|AS32|AS33|AS34|AS35|AS36| AS37|AS38|AS39|AS40|AS41| AS42|AS43|AS44|AS45|   AS50|   AS51|AS52| AS53|      AS54|     AS55|     AS56|AS57|AS58|AS59|AS60|      AS61|AS62|   AS63|   AS64|AS65|AS66|      AS67|AS68|AS69|   AS70|AS71|     

In [14]:
def cast_to_datatype(df, columns):
    for col_name, data_type in columns.items():
        if data_type == BooleanType():
            df=df.withColumn("tmp_col_name", F.col(col_name).contains("True")).drop(col_name).withColumnRenamed("tmp_col_name", col_name)
        if data_type == DateType():
           df=df.withColumn("tmp_col_name", F.to_date(F.col(col_name))).drop(col_name).withColumnRenamed("tmp_col_name", col_name)
        if data_type == DoubleType():
           df=df.withColumn("tmp_col_name", F.round(F.col(col_name).cast(DoubleType()),2)).drop(col_name).withColumnRenamed("tmp_col_name", col_name) 
    return df

test_3 = cast_to_datatype(test_2, asset_columns)
test_3.printSchema()

root
 |-- AS3: string (nullable = true)
 |-- AS15: string (nullable = true)
 |-- AS16: string (nullable = true)
 |-- AS17: string (nullable = true)
 |-- AS18: string (nullable = true)
 |-- AS21: string (nullable = true)
 |-- AS22: string (nullable = true)
 |-- AS24: string (nullable = true)
 |-- AS25: string (nullable = true)
 |-- AS26: string (nullable = true)
 |-- AS32: string (nullable = true)
 |-- AS33: string (nullable = true)
 |-- AS34: string (nullable = true)
 |-- AS35: string (nullable = true)
 |-- AS36: string (nullable = true)
 |-- AS42: string (nullable = true)
 |-- AS43: string (nullable = true)
 |-- AS45: string (nullable = true)
 |-- AS52: string (nullable = true)
 |-- AS57: string (nullable = true)
 |-- AS58: string (nullable = true)
 |-- AS59: string (nullable = true)
 |-- AS62: string (nullable = true)
 |-- AS65: string (nullable = true)
 |-- AS68: string (nullable = true)
 |-- AS83: string (nullable = true)
 |-- AS84: string (nullable = true)
 |-- AS89: string (nulla

In [15]:
test_3.show(n=5,truncate=False)

+------+----+----+-----+----+----+----+----+----+----+----+----+----+----+----+-----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+-----+-----+-----+----------+----+----+-----+----+----+-----+----+----+----+----+----+----+----+----+----------+----------+-----+---------+---------+---------+----+----+-------+-------+----+----------+----+----------+----+----+----+----+----+----+----+----+----+----+----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+
|AS3   |AS15|AS16|AS17 |AS18|AS21|AS22|AS24|AS25|AS26|AS32|AS33|AS34|AS35|AS36|AS42 |AS43|AS45|AS52|AS57|AS58|AS59|AS62|AS65|AS68|AS83|AS84|AS89|AS92|AS94|AS111|AS123|AS129|AS1       |AS19|AS20|AS23 |AS27|AS28|AS29 |AS30|AS31|AS37|AS38|AS39|AS40|AS41|AS44|AS50      |AS51      |AS53 |AS54     |AS55     |AS56     |AS60|AS61|AS63   |AS64   |AS66|AS67      |AS69|AS70   

In [17]:
test_3.write.format("delta").save("../data/output/bronze/asset_bronze")

Py4JJavaError: An error occurred while calling o2673.save.
: com.google.common.util.concurrent.ExecutionError: java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.sql.delta.storage.DelegatingLogStore$
	at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2261)
	at com.google.common.cache.LocalCache.get(LocalCache.java:4000)
	at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4789)
	at org.apache.spark.sql.delta.DeltaLog$.getDeltaLogFromCache$1(DeltaLog.scala:604)
	at org.apache.spark.sql.delta.DeltaLog$.apply(DeltaLog.scala:611)
	at org.apache.spark.sql.delta.DeltaLog$.forTable(DeltaLog.scala:492)
	at org.apache.spark.sql.delta.sources.DeltaDataSource.createRelation(DeltaDataSource.scala:151)
	at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:98)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:109)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:169)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:95)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:584)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:176)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:584)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:560)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:116)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:860)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:390)
	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:357)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:568)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:833)
Caused by: java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.sql.delta.storage.DelegatingLogStore$
	at org.apache.spark.sql.delta.storage.DelegatingLogStore.$anonfun$schemeBasedLogStore$1(DelegatingLogStore.scala:67)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.delta.storage.DelegatingLogStore.schemeBasedLogStore(DelegatingLogStore.scala:67)
	at org.apache.spark.sql.delta.storage.DelegatingLogStore.getDelegate(DelegatingLogStore.scala:85)
	at org.apache.spark.sql.delta.storage.DelegatingLogStore.read(DelegatingLogStore.scala:96)
	at org.apache.spark.sql.delta.Checkpoints.$anonfun$loadMetadataFromFile$2(Checkpoints.scala:373)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile(DeltaLogging.scala:139)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile$(DeltaLogging.scala:137)
	at org.apache.spark.sql.delta.DeltaLog.recordFrameProfile(DeltaLog.scala:63)
	at org.apache.spark.sql.delta.metering.DeltaLogging.$anonfun$recordDeltaOperationInternal$1(DeltaLogging.scala:132)
	at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:77)
	at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:67)
	at org.apache.spark.sql.delta.DeltaLog.recordOperation(DeltaLog.scala:63)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperationInternal(DeltaLogging.scala:131)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:121)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:109)
	at org.apache.spark.sql.delta.DeltaLog.recordDeltaOperation(DeltaLog.scala:63)
	at org.apache.spark.sql.delta.Checkpoints.$anonfun$loadMetadataFromFile$1(Checkpoints.scala:372)
	at org.apache.spark.sql.delta.metering.DeltaLogging.withDmqTag(DeltaLogging.scala:143)
	at org.apache.spark.sql.delta.metering.DeltaLogging.withDmqTag$(DeltaLogging.scala:142)
	at org.apache.spark.sql.delta.DeltaLog.withDmqTag(DeltaLog.scala:63)
	at org.apache.spark.sql.delta.Checkpoints.loadMetadataFromFile(Checkpoints.scala:371)
	at org.apache.spark.sql.delta.Checkpoints.lastCheckpoint(Checkpoints.scala:366)
	at org.apache.spark.sql.delta.Checkpoints.lastCheckpoint$(Checkpoints.scala:365)
	at org.apache.spark.sql.delta.DeltaLog.lastCheckpoint(DeltaLog.scala:63)
	at org.apache.spark.sql.delta.SnapshotManagement.$init$(SnapshotManagement.scala:56)
	at org.apache.spark.sql.delta.DeltaLog.<init>(DeltaLog.scala:68)
	at org.apache.spark.sql.delta.DeltaLog$.$anonfun$apply$3(DeltaLog.scala:593)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)
	at org.apache.spark.sql.delta.DeltaLog$.$anonfun$apply$2(DeltaLog.scala:589)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile(DeltaLogging.scala:139)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordFrameProfile$(DeltaLogging.scala:137)
	at org.apache.spark.sql.delta.DeltaLog$.recordFrameProfile(DeltaLog.scala:456)
	at org.apache.spark.sql.delta.metering.DeltaLogging.$anonfun$recordDeltaOperationInternal$1(DeltaLogging.scala:132)
	at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:77)
	at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:67)
	at org.apache.spark.sql.delta.DeltaLog$.recordOperation(DeltaLog.scala:456)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperationInternal(DeltaLogging.scala:131)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:121)
	at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:109)
	at org.apache.spark.sql.delta.DeltaLog$.recordDeltaOperation(DeltaLog.scala:456)
	at org.apache.spark.sql.delta.DeltaLog$.createDeltaLog$1(DeltaLog.scala:588)
	at org.apache.spark.sql.delta.DeltaLog$.$anonfun$apply$4(DeltaLog.scala:604)
	at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4792)
	at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
	at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
	at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
	at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2257)
	... 47 more
