In [0]:
from azure.storage.fileshare import ShareServiceClient
import os
import pydicom
from io import BytesIO
from pydicom.fileset import FileSet
import pandas as pd
import tqdm

In [0]:
acc_name = dbutils.secrets.get(scope = "adc_store", key = "pacs_intfileshare_accname")
acc_key = dbutils.secrets.get(scope = "adc_store", key = "pacs_intfileshare_acckey")

# Connection string
connection_string = f"DefaultEndpointsProtocol=https;AccountName={acc_name};AccountKey={acc_key};EndpointSuffix=core.windows.net"

# File share name
share_name = "intfileshare"

# Get a share client via connection string
share_client = ShareServiceClient.from_connection_string(connection_string).get_share_client(share_name)

# Source folder in the file share
source_folder = "sectra"

proj_dir = "Evan Test_06acf4b09dab4043b8c1109b4e86c617"

# List all files and directories in the root of the file share
print("Add DICOMDIR files to table:")
items = list(share_client.list_directories_and_files(f"{source_folder}/{proj_dir}"))
for item in items:
    if item["name"] == "DICOMDIR":
        subdirs = '.'
    else:
        subdirs = item['name']

    print(item["name"])
    q = f"""
        INSERT INTO 1_inland.sectra.pacs_file_copy (
            src_root, src_proj_dir, src_subdirs, src_filename,
            dst_filepath, active_ind, status, added_at
        )
        VALUES (
            '{source_folder}', '{proj_dir}', '{subdirs}', 'DICOMDIR',
            'test_dst_path', 0, 'new', CURRENT_TIMESTAMP()
        );          
    """
    spark.sql(q)

In [0]:
file_client = share_client.get_file_client(f"{source_folder}/{proj_dir}/DICOMDIR")

In [0]:
file_bytes = file_client.download_file().readall()

In [0]:
with open("/Volumes/1_inland/sectra/vone/DICOMDIR", "wb") as f:
    f.write(file_bytes)

In [0]:
dcm_file = pydicom.dcmread("/Volumes/1_inland/sectra/vone/DICOMDIR")

In [0]:
list(dcm_file.DirectoryRecordSequence[3]["ReferencedFileID"].value)

In [0]:

from pyspark.sql import Row
from pyspark.sql import types as T

# Define the whole schema within a StructType
schema = T.StructType([
        T.StructField("src_subdirs", T.StringType(), True),
        T.StructField("src_file", T.StringType(), True),
])

df = spark.createDataFrame(data = [], schema = schema)

In [0]:
for x in tqdm.tqdm(dcm_file.DirectoryRecordSequence):
    try:
        refFileID = x["ReferencedFileID"].value
    except:
        continue

    new_row = spark.createDataFrame(
        data=[(f"{refFileID[1]}/{refFileID[2]}", refFileID[3])],
        schema=schema
    )
    
    df = df.union(new_row)

#del dcm_file


In [0]:
from pyspark.sql import functions as F

df = df.withColumn("dst_filepath", F.concat(F.lit(f"/Volumes/1_inland/sectra/vone/{proj_dir.replace(' ', '_')}/"), F.col("src_subdirs"), F.lit("/"), F.col("src_file")))

In [0]:
df.createOrReplaceTempView("temp_new_files")

In [0]:
spark.sql(f"""
INSERT INTO 1_inland.sectra.pacs_file_copy
(src_root, src_proj_dir, src_subdirs, src_filename,
dst_filepath, active_ind, status, added_at)
SELECT
'{source_folder}','{proj_dir}',src_subdirs, src_file,
dst_filepath, 1, 'new', current_timestamp()
FROM temp_new_files
""")