# GeoLayoutLM to evaluation format

## Setup


In [1]:
import os
import re
import shutil

In [2]:
# Function to copy files from "result..." directories to the destination directory
def copy_result_files(base_directory, destination_directory):
    # Walk through all subdirectories and files in the base directory
    for root, dirs, files in os.walk(base_directory):
        # Check if the current directory name contains "result"
        if 'result' in os.path.basename(root):
            for file in files:
                # Define the full file path
                file_path = os.path.join(root, file)
                # Define the destination file path
                destination_file_path = os.path.join(destination_directory, file)
                # Copy the file to the destination directory
                shutil.copy(file_path, destination_file_path)
                print(f"Copied: {file_path} to {destination_file_path}")


In [7]:
# Function to rename files and save them in a new directory
def rename_files(directory_path, new_directory_path):
    for filename in os.listdir(directory_path):
        if filename.endswith('.json'):
            print(filename)
            # Match the pattern and capture groups
            filename_new = filename.replace("_json_result", "")
            match = re.match(r'(.*_)(pdf_)(\d+)(\.json)', filename_new)
            if match:
                print(match)
                base_name = match.group(1)
                number = int(match.group(3)) - 1
                new_filename = f"{base_name}{number}.json"

                # Get full paths
                old_file = os.path.join(directory_path, filename)
                new_file = os.path.join(new_directory_path, new_filename)

                # Copy the file to the new location with the new name
                shutil.copy(old_file, new_file)
                print(f"Copied and renamed: {filename} to {new_filename}")

## Load dataset

Upload result from cloud provider algorithm in zip format

In [3]:
!unzip result_amazon_with_tables.zip

Archive:  result_amazon_with_tables.zip
   creating: result_amazon_with_tables/
  inflating: result_amazon_with_tables/.DS_Store  
  inflating: result_amazon_with_tables/Caris-Molecular-Intelligence_MI-Profile_Breast_NOS_WEBchanged_pdf_1_json_result.json  
  inflating: result_amazon_with_tables/Caris-Molecular-Intelligence_MI-Profile_Breast_NOS_WEBchanged_pdf_1_table_result.csv  
  inflating: result_amazon_with_tables/CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_pdf_10_json_result.json  
  inflating: result_amazon_with_tables/CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_pdf_10_table_result.csv  
  inflating: result_amazon_with_tables/CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_pdf_11_json_result.json  
  inflating: result_amazon_with_tables/CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_pdf_11_table_result.csv  
  inflating: result_amazon_with_tables/CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_pdf_12_json_result.json  
  inflating: result_amazon_with_tables/CarisReport_2

## Execution

In [4]:
base_directory = r'result_amazon_with_tables' # Path to cloud provider results
destination_directory = r'result_amazon_with_tables_result' # Define the destination directory where the files will be copied
new_directory_path = destination_directory + '_rename' # Define the path to the new directory for renamed files

os.makedirs(destination_directory, exist_ok=True)
os.makedirs(new_directory_path, exist_ok=True)

In [5]:
# Call the function to copy the files
copy_result_files(base_directory, destination_directory)
print("1st Step completed.")

Copied: result_amazon_with_tables/Positive-Report_pdf_8_json_result.json to result_amazon_with_tables_result/Positive-Report_pdf_8_json_result.json
Copied: result_amazon_with_tables/CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_pdf_4_json_result.json to result_amazon_with_tables_result/CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_pdf_4_json_result.json
Copied: result_amazon_with_tables/.DS_Store to result_amazon_with_tables_result/.DS_Store
Copied: result_amazon_with_tables/Tempus-Onco_Clinical-Report-Sample_pdf_4_table_result.csv to result_amazon_with_tables_result/Tempus-Onco_Clinical-Report-Sample_pdf_4_table_result.csv
Copied: result_amazon_with_tables/Tempus-Onco_Clinical-Report-Sample_pdf_9_json_result.json to result_amazon_with_tables_result/Tempus-Onco_Clinical-Report-Sample_pdf_9_json_result.json
Copied: result_amazon_with_tables/Positive-Report_pdf_10_json_result.json to result_amazon_with_tables_result/Positive-Report_pdf_10_json_result.json
Copied: result_amazon_with_t

In [8]:
# Call the function to rename files and save them in the new directory
rename_files(destination_directory, new_directory_path)
print("2nd step completed: Copying and renaming.")

Positive-Report_pdf_8_json_result.json
<re.Match object; span=(0, 26), match='Positive-Report_pdf_8.json'>
Copied and renamed: Positive-Report_pdf_8_json_result.json to Positive-Report_7.json
CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_pdf_4_json_result.json
<re.Match object; span=(0, 58), match='CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_p>
Copied and renamed: CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_pdf_4_json_result.json to CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_3.json
Tempus-Onco_Clinical-Report-Sample_pdf_9_json_result.json
<re.Match object; span=(0, 45), match='Tempus-Onco_Clinical-Report-Sample_pdf_9.json'>
Copied and renamed: Tempus-Onco_Clinical-Report-Sample_pdf_9_json_result.json to Tempus-Onco_Clinical-Report-Sample_8.json
Positive-Report_pdf_10_json_result.json
<re.Match object; span=(0, 27), match='Positive-Report_pdf_10.json'>
Copied and renamed: Positive-Report_pdf_10_json_result.json to Positive-Report_9.json
CarisReport_2023_NSCLC_KRAS_G12C_PD

## Download result

In [9]:
!zip -r result_amazon_with_tables_result_rename.zip result_amazon_with_tables_result_rename

  adding: result_amazon_with_tables_result_rename/ (stored 0%)
  adding: result_amazon_with_tables_result_rename/CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_2.json (deflated 62%)
  adding: result_amazon_with_tables_result_rename/oncoextra-tnbc-ntrk-wm-sample-report_3.json (deflated 59%)
  adding: result_amazon_with_tables_result_rename/F1CDx Sample Report (Lung) (copy)_2.json (deflated 40%)
  adding: result_amazon_with_tables_result_rename/CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_18.json (deflated 48%)
  adding: result_amazon_with_tables_result_rename/oncoextra-tnbc-ntrk-wm-sample-report_8.json (deflated 59%)
  adding: result_amazon_with_tables_result_rename/CarisReport_2023_NSCLC_KRAS_G12C_PD-L1-unlocked_8.json (deflated 36%)
  adding: result_amazon_with_tables_result_rename/oncoextra-tnbc-ntrk-wm-sample-report_2.json (deflated 56%)
  adding: result_amazon_with_tables_result_rename/F1CDx Sample Report (Lung) (copy)_18.json (deflated 48%)
  adding: result_amazon_with_tables_