# Delta-rs library Benchmarks

## Setup

### Downloading the data

Only once per project.

In [1]:
## Choose an option:
# supplier_tpch_sf1.parquet
# supplier_tpch_sf10.parquet
# supplier_tpch_sf100.parquet
# lineitem_tpch_sf1.parquet
# lineitem_tpch_sf10.parquet
## Copy the option in the *** field

## Set the local path
LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/***"

## Set the remote URL
REMOTE_URL = "https://repo.hops.works/dev/gio-hopsworks/***"

!curl REMOTE_URL -o LOCAL_PATH

SyntaxError: invalid syntax (<ipython-input-1-ef7995ca8a24>, line 1)

### Installing and importing the libraries

In [3]:
# Verify that the custom delta-rs library is installed
%pip install deltalake
# Verify that all other libraries are installed
%pip install pyarrow
%pip install pandas
%pip install timeit

## For debugging
#%pip install ipython

[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.


In [4]:
from deltalake import DeltaTable, write_deltalake
import pyarrow as pa
import pandas as pd
import timeit

## For debugging
# from IPython.display import display

## Benchmarking

### Supplier table

#### SF 1 - 10000 rows

In [13]:
## Initialise results list of rows
results = []

## Iterate benchmark for - Choose a number
number_of_iterations = 5

for i in range(number_of_iterations):
    ###################### (1) WRITE BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
from deltalake import write_deltalake'''

    TEST_CODE='''
HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier" 
LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/supplier_tpch_sf1.parquet"
pa_table = pa.parquet.read_table(LOCAL_PATH)
write_deltalake(HDFS_DATA_PATH, pa_table)'''

    # benchmark the task
    write_result = timeit.timeit(setup  = SETUP_CODE,
                           stmt   = TEST_CODE,
                           number = 1          )

    # report the result
    # printing exec. time
    print('Write delta lake table, the time taken is {0} seconds'.format(write_result))

    ###################### (2) READ BENCHMARK #####################

    SETUP_CODE='''from deltalake import DeltaTable'''

    TEST_CODE='''
HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
DeltaTable(HDFS_DATA_PATH)'''

    # benchmark the task
    read_result = timeit.timeit(setup  = SETUP_CODE,
                                stmt   = TEST_CODE,
                                number = 1          )

    # report the result
    # printing exec. time
    print('Read delta lake table, the time taken is {0} seconds'.format(read_result))

    ###################### (3) APPEND BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
from deltalake import write_deltalake'''

    TEST_CODE='''
HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/supplier_tpch_sf1.parquet"
pa_table = pa.parquet.read_table(LOCAL_PATH)
df = pa_table.to_pandas()
half_number_rows = df.shape[0]//2
df_first_half = df.head(half_number_rows)
write_deltalake(HDFS_DATA_PATH, df_first_half, mode="append")'''

    # benchmark the task
    append_result = timeit.timeit(setup  = SETUP_CODE,
                                  stmt   = TEST_CODE,
                                  number = 1          )

    # report the result
    # printing exec. time
    print('Append on delta lake table, the time taken is {0} seconds'.format(append_result))

    ###################### (4) OVERWRITE BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
from deltalake import write_deltalake'''

    TEST_CODE='''
HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/supplier_tpch_sf1.parquet"
pa_table = pa.parquet.read_table(LOCAL_PATH)
df = pa_table.to_pandas()
write_deltalake(HDFS_DATA_PATH, df, mode="overwrite")'''

    # benchmark the task
    overwrite_result = timeit.timeit(setup  = SETUP_CODE,
                                     stmt   = TEST_CODE,
                                     number = 1          )

    # report the result
    # printing exec. time
    print('Overwrite delta lake table, the time taken is {0} seconds'.format(overwrite_result))
    
    ###################### (5) READ OLD TABLE #####################
    
    SETUP_CODE='''from deltalake import DeltaTable'''

    TEST_CODE='''
HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
# This code retrieves the first table version, i.e. Table A
dt_old = DeltaTable(HDFS_DATA_PATH, version=0)'''

    # benchmark the task
    read_old_result = timeit.timeit(setup  = SETUP_CODE,
                                    stmt   = TEST_CODE,
                                    number = 1          )

    # report the result
    # printing exec. time
    print('Read old delta lake table, the time taken is {0} seconds'.format(read_old_result))
    
    ## Save row of results in results
    results_row = [write_result, read_result, append_result, overwrite_result, read_old_result]
    results.append(results_row)
    
    ## Erase data from created folder
    !rm -r /home/yarnapp/hopsfs/Experiments/supplier

## Create and then save a dataframe with the results in .csv
df = pd.DataFrame(results, columns =  ["Write", "Read", "Append", "Overwrite", "ReadOld"])
df.to_csv("/home/yarnapp/hopsfs/Resources/benchmark_results_supplier_tpch_sf1.csv")

Write delta lake table, the time taken is 1.782279137056321 seconds
Read delta lake table, the time taken is 0.1018887641839683 seconds
Append on delta lake table, the time taken is 1.565036104992032 seconds
Overwrite delta lake table, the time taken is 1.3607688727788627 seconds
Read old delta lake table, the time taken is 0.06789629790000618 seconds
Write delta lake table, the time taken is 1.8549835828598589 seconds
Read delta lake table, the time taken is 0.13238394213840365 seconds
Append on delta lake table, the time taken is 1.8693466200493276 seconds
Overwrite delta lake table, the time taken is 1.8909420589916408 seconds
Read old delta lake table, the time taken is 0.09576750383712351 seconds
Write delta lake table, the time taken is 2.06822645990178 seconds
Read delta lake table, the time taken is 0.22875092714093626 seconds
Append on delta lake table, the time taken is 1.8702663420699537 seconds
Overwrite delta lake table, the time taken is 1.82991316402331 seconds
Read old 

#### SF 10 - 100000 rows

In [None]:
## Initialise results list of rows
results = []

## Iterate benchmark for - Choose a number
number_of_iterations = 5

for i in range(number_of_iterations):
    ###################### (1) WRITE BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier" 
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/supplier_tpch_sf10.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    write_deltalake(HDFS_DATA_PATH, pa_table)'''

    # benchmark the task
    write_result = timeit.timeit(setup  = SETUP_CODE,
                           stmt   = TEST_CODE,
                           number = 1          )

    # report the result
    # printing exec. time
    print('Write delta lake table, the time taken is {0} seconds'.format(write_result))

    ###################### (2) READ BENCHMARK #####################

    SETUP_CODE='''from deltalake import DeltaTable'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
    DeltaTable(HDFS_DATA_PATH)'''

    # benchmark the task
    read_result = timeit.timeit(setup  = SETUP_CODE,
                                stmt   = TEST_CODE,
                                number = 1          )

    # report the result
    # printing exec. time
    print('Read delta lake table, the time taken is {0} seconds'.format(read_result))

    ###################### (3) APPEND BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/supplier_tpch_sf10.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    df = pa_table.to_pandas()
    half_number_rows = df.shape[0]//2
    df_first_half = df.head(half_number_rows)
    write_deltalake(HDFS_DATA_PATH, df_first_half, mode="append")'''

    # benchmark the task
    append_result = timeit.timeit(setup  = SETUP_CODE,
                                  stmt   = TEST_CODE,
                                  number = 1          )

    # report the result
    # printing exec. time
    print('Append on delta lake table, the time taken is {0} seconds'.format(append_result))

    ###################### (4) OVERWRITE BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/supplier_tpch_sf10.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    df = pa_table.to_pandas()
    write_deltalake(HDFS_DATA_PATH, df, mode="overwrite")'''

    # benchmark the task
    overwrite_result = timeit.timeit(setup  = SETUP_CODE,
                                     stmt   = TEST_CODE,
                                     number = 1          )

    # report the result
    # printing exec. time
    print('Overwrite delta lake table, the time taken is {0} seconds'.format(overwrite_result))
    
    ###################### (5) READ OLD TABLE #####################
    
    SETUP_CODE='''from deltalake import DeltaTable'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
    # This code retrieves the first table version, i.e. Table A
    dt_old = DeltaTable(HDFS_DATA_PATH, version=0)'''

    # benchmark the task
    read_old_result = timeit.timeit(setup  = SETUP_CODE,
                                    stmt   = TEST_CODE,
                                    number = 1          )

    # report the result
    # printing exec. time
    print('Read old delta lake table, the time taken is {0} seconds'.format(read_old_result))
    
    ## Save row of results in results
    results_row = [write_result, read_result, append_result, overwrite_result, read_old_result]
    results.append(results_row)
    
    ## Erase data from created folder
    !rm -r "/home/yarnapp/hopsfs/Experiments/supplier"

## Create and then save a dataframe with the results in .csv
df = pd.DataFrame(results, columns =  ["Write", "Read", "Append", "Overwrite", "ReadOld"])
df.to_csv("/home/yarnapp/hopsfs/Resources/benchmark_results_supplier_tpch_sf10.csv")

#### SF 100 - 1000000 rows

In [None]:
## Initialise results list of rows
results = []

## Iterate benchmark for - Choose a number
number_of_iterations = 5

for i in range(number_of_iterations):
    ###################### (1) WRITE BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier" 
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/supplier_tpch_sf100.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    write_deltalake(HDFS_DATA_PATH, pa_table)'''

    # benchmark the task
    write_result = timeit.timeit(setup  = SETUP_CODE,
                           stmt   = TEST_CODE,
                           number = 1          )

    # report the result
    # printing exec. time
    print('Write delta lake table, the time taken is {0} seconds'.format(write_result))

    ###################### (2) READ BENCHMARK #####################

    SETUP_CODE='''from deltalake import DeltaTable'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
    DeltaTable(HDFS_DATA_PATH)'''

    # benchmark the task
    read_result = timeit.timeit(setup  = SETUP_CODE,
                                stmt   = TEST_CODE,
                                number = 1          )

    # report the result
    # printing exec. time
    print('Read delta lake table, the time taken is {0} seconds'.format(read_result))

    ###################### (3) APPEND BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/supplier_tpch_sf100.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    df = pa_table.to_pandas()
    half_number_rows = df.shape[0]//2
    df_first_half = df.head(half_number_rows)
    write_deltalake(HDFS_DATA_PATH, df_first_half, mode="append")'''

    # benchmark the task
    append_result = timeit.timeit(setup  = SETUP_CODE,
                                  stmt   = TEST_CODE,
                                  number = 1          )

    # report the result
    # printing exec. time
    print('Append on delta lake table, the time taken is {0} seconds'.format(append_result))

    ###################### (4) OVERWRITE BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/supplier_tpch_sf100.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    df = pa_table.to_pandas()
    write_deltalake(HDFS_DATA_PATH, df, mode="overwrite")'''

    # benchmark the task
    overwrite_result = timeit.timeit(setup  = SETUP_CODE,
                                     stmt   = TEST_CODE,
                                     number = 1          )

    # report the result
    # printing exec. time
    print('Overwrite delta lake table, the time taken is {0} seconds'.format(overwrite_result))
    
    ###################### (5) READ OLD TABLE #####################
    
    SETUP_CODE='''from deltalake import DeltaTable'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/supplier"
    # This code retrieves the first table version, i.e. Table A
    dt_old = DeltaTable(HDFS_DATA_PATH, version=0)'''

    # benchmark the task
    read_old_result = timeit.timeit(setup  = SETUP_CODE,
                                    stmt   = TEST_CODE,
                                    number = 1          )

    # report the result
    # printing exec. time
    print('Read old delta lake table, the time taken is {0} seconds'.format(read_old_result))
    
    ## Save row of results in results
    results_row = [write_result, read_result, append_result, overwrite_result, read_old_result]
    results.append(results_row)
    
    ## Erase data from created folder
    !rm -r "/home/yarnapp/hopsfs/Experiments/supplier"

## Create and then save a dataframe with the results in .csv
df = pd.DataFrame(results, columns =  ["Write", "Read", "Append", "Overwrite", "ReadOld"])
df.to_csv("/home/yarnapp/hopsfs/Resources/benchmark_results_supplier_tpch_sf100.csv")

### Lineitem table

#### SF 1 - 6000000 rows

In [None]:
## Initialise results list of rows
results = []

## Iterate benchmark for - Choose a number
number_of_iterations = 5

for i in range(number_of_iterations):
    ###################### (1) WRITE BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/lineitem" 
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/lineitem_tpch_sf1.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    write_deltalake(HDFS_DATA_PATH, pa_table)'''

    # benchmark the task
    write_result = timeit.timeit(setup  = SETUP_CODE,
                           stmt   = TEST_CODE,
                           number = 1          )

    # report the result
    # printing exec. time
    print('Write delta lake table, the time taken is {0} seconds'.format(write_result))

    ###################### (2) READ BENCHMARK #####################

    SETUP_CODE='''from deltalake import DeltaTable'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/lineitem"
    DeltaTable(HDFS_DATA_PATH)'''

    # benchmark the task
    read_result = timeit.timeit(setup  = SETUP_CODE,
                                stmt   = TEST_CODE,
                                number = 1          )

    # report the result
    # printing exec. time
    print('Read delta lake table, the time taken is {0} seconds'.format(read_result))

    ###################### (3) APPEND BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/lineitem"
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/lineitem_tpch_sf1.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    df = pa_table.to_pandas()
    half_number_rows = df.shape[0]//2
    df_first_half = df.head(half_number_rows)
    write_deltalake(HDFS_DATA_PATH, df_first_half, mode="append")'''

    # benchmark the task
    append_result = timeit.timeit(setup  = SETUP_CODE,
                                  stmt   = TEST_CODE,
                                  number = 1          )

    # report the result
    # printing exec. time
    print('Append on delta lake table, the time taken is {0} seconds'.format(append_result))

    ###################### (4) OVERWRITE BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/lineitem"
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/lineitem_tpch_sf1.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    df = pa_table.to_pandas()
    write_deltalake(HDFS_DATA_PATH, df, mode="overwrite")'''

    # benchmark the task
    overwrite_result = timeit.timeit(setup  = SETUP_CODE,
                                     stmt   = TEST_CODE,
                                     number = 1          )

    # report the result
    # printing exec. time
    print('Overwrite delta lake table, the time taken is {0} seconds'.format(overwrite_result))
    
    ###################### (5) READ OLD TABLE #####################
    
    SETUP_CODE='''from deltalake import DeltaTable'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/lineitem"
    # This code retrieves the first table version, i.e. Table A
    dt_old = DeltaTable(HDFS_DATA_PATH, version=0)'''

    # benchmark the task
    read_old_result = timeit.timeit(setup  = SETUP_CODE,
                                    stmt   = TEST_CODE,
                                    number = 1          )

    # report the result
    # printing exec. time
    print('Read old delta lake table, the time taken is {0} seconds'.format(read_old_result))
    
    ## Save row of results in results
    results_row = [write_result, read_result, append_result, overwrite_result, read_old_result]
    results.append(results_row)
    
    ## Erase data from created folder
    !rm -r "/home/yarnapp/hopsfs/Experiments/lineitem"

## Create and then save a dataframe with the results in .csv
df = pd.DataFrame(results, columns =  ["Write", "Read", "Append", "Overwrite", "ReadOld"])
df.to_csv("/home/yarnapp/hopsfs/Resources/benchmark_results_lineitem_tpch_sf1.csv")

#### SF 10 - 60000000 rows

In [None]:
## Initialise results list of rows
results = []

## Iterate benchmark for - Choose a number
number_of_iterations = 5

for i in range(number_of_iterations):
    ###################### (1) WRITE BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/lineitem" 
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/lineitem_tpch_sf10.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    write_deltalake(HDFS_DATA_PATH, pa_table)'''

    # benchmark the task
    write_result = timeit.timeit(setup  = SETUP_CODE,
                           stmt   = TEST_CODE,
                           number = 1          )

    # report the result
    # printing exec. time
    print('Write delta lake table, the time taken is {0} seconds'.format(write_result))

    ###################### (2) READ BENCHMARK #####################

    SETUP_CODE='''from deltalake import DeltaTable'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/lineitem"
    DeltaTable(HDFS_DATA_PATH)'''

    # benchmark the task
    read_result = timeit.timeit(setup  = SETUP_CODE,
                                stmt   = TEST_CODE,
                                number = 1          )

    # report the result
    # printing exec. time
    print('Read delta lake table, the time taken is {0} seconds'.format(read_result))

    ###################### (3) APPEND BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/lineitem"
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/lineitem_tpch_sf10.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    df = pa_table.to_pandas()
    half_number_rows = df.shape[0]//2
    df_first_half = df.head(half_number_rows)
    write_deltalake(HDFS_DATA_PATH, df_first_half, mode="append")'''

    # benchmark the task
    append_result = timeit.timeit(setup  = SETUP_CODE,
                                  stmt   = TEST_CODE,
                                  number = 1          )

    # report the result
    # printing exec. time
    print('Append on delta lake table, the time taken is {0} seconds'.format(append_result))

    ###################### (4) OVERWRITE BENCHMARK #####################

    SETUP_CODE='''import pyarrow as pa
    from deltalake import write_deltalake'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/lineitem"
    LOCAL_PATH = "/home/yarnapp/hopsfs/Resources/lineitem_tpch_sf10.parquet"
    pa_table = pa.parquet.read_table(LOCAL_PATH)
    df = pa_table.to_pandas()
    write_deltalake(HDFS_DATA_PATH, df, mode="overwrite")'''

    # benchmark the task
    overwrite_result = timeit.timeit(setup  = SETUP_CODE,
                                     stmt   = TEST_CODE,
                                     number = 1          )

    # report the result
    # printing exec. time
    print('Overwrite delta lake table, the time taken is {0} seconds'.format(overwrite_result))
    
    ###################### (5) READ OLD TABLE #####################
    
    SETUP_CODE='''from deltalake import DeltaTable'''

    TEST_CODE='''
    HDFS_DATA_PATH = "hdfs://rpc.namenode.service.consul:8020/Projects/test/Experiments/lineitem"
    # This code retrieves the first table version, i.e. Table A
    dt_old = DeltaTable(HDFS_DATA_PATH, version=0)'''

    # benchmark the task
    read_old_result = timeit.timeit(setup  = SETUP_CODE,
                                    stmt   = TEST_CODE,
                                    number = 1          )

    # report the result
    # printing exec. time
    print('Read old delta lake table, the time taken is {0} seconds'.format(read_old_result))
    
    ## Save row of results in results
    results_row = [write_result, read_result, append_result, overwrite_result, read_old_result]
    results.append(results_row)
    
    ## Erase data from created folder
    !rm -r "/home/yarnapp/hopsfs/Experiments/lineitem"

## Create and then save a dataframe with the results in .csv
df = pd.DataFrame(results, columns =  ["Write", "Read", "Append", "Overwrite", "ReadOld"])
df.to_csv("/home/yarnapp/hopsfs/Resources/benchmark_results_lineitem_tpch_sf10.csv")