In [1]:
import os
import sys
import pathlib
import platform
import urllib.request
import shutil

from typing import (
    Dict,
    List
)

In [2]:
__file__ = 'db_tests.ipynb'

In [3]:
mod_path: str = os.path.join(str(pathlib.Path(os.path.abspath(__file__)).parents[2]))
sys.path.append(mod_path)

In [4]:
from convert_source.cs_utils.fileio import Command

In [5]:
from convert_source.cs_utils.database import (
    construct_db_dict,
    create_db,
    insert_row_db,
    get_len_rows,
    get_file_id,
    update_table_row,
    export_dataframe,
    export_scans_dataframe,
    _get_dir_relative_path,
    _export_tmp_bids_df,
    export_bids_scans_dataframe,
    query_db,
    _zeropad
)

In [6]:
# Test variables
scripts_dir: str = os.path.abspath(os.path.join(os.path.dirname(__file__),'helper.scripts'))
test_config1: str = os.path.join(scripts_dir,'test.1.config.yml')
test_config2: str = os.path.join(scripts_dir,'test.2.config.yml')

data_dir: str = os.path.abspath(os.path.join(os.path.dirname(__file__),'test.study_dir'))
dcm_test_data: str = os.path.join(data_dir,'TEST001-UNIT001','data.dicom','ST000000')

test_infile: str = os.path.join(scripts_dir,'test.orig_subject_id.txt')
test_mapfile: str = os.path.join(scripts_dir,'test.map_subject_id.txt')

out_dir: str = os.path.join(os.getcwd(),'test.database.study')
misc_dir: str = os.path.join(out_dir,'.misc')
test_db: str = os.path.join(misc_dir,'test.study.db')

In [7]:
def test_download_prog():
    class PlatformInferError(Exception):
        pass

    if platform.system().lower() == 'darwin':
        url: str = "https://github.com/rordenlab/dcm2niix/releases/download/v1.0.20201102/dcm2niix_mac.zip"
        file_name: str = "dcm2niix_mac.zip"
    elif platform.system().lower() == 'windows':
        url: str = "https://github.com/rordenlab/dcm2niix/releases/download/v1.0.20201102/dcm2niix_win.zip"
        file_name: str = "dcm2niix_win.zip"
    elif platform.system().lower() == 'linux':
        url: str = "https://github.com/rordenlab/dcm2niix/releases/download/v1.0.20201102/dcm2niix_lnx.zip"
        file_name: str = "dcm2niix_lin.zip"
    else:
        raise PlatformInferError("Unable to infer this platform's operating system.")

    with urllib.request.urlopen(url) as response, open(file_name, 'wb') as out_file:
        shutil.copyfileobj(response, out_file)
    
    assert os.path.exists(file_name) == True
    
    if platform.system().lower() == 'windows':
        extract: Command = Command("tar")
        extract.cmd_list.append("-zxvf")
        extract.cmd_list.append(file_name)
        extract.run()
    else:
        extract: Command = Command("unzip")
        extract.cmd_list.append(file_name)
        extract.run()

    os.remove(file_name)
    assert os.path.exists(file_name) == False

In [8]:
def test_extract_data():
    dcm_data: str = os.path.join(data_dir,'TEST001-UNIT001','data.dicom','data.1.tar.gz')
    extract: Command = Command("tar")
    extract.cmd_list.append("-zxvf")
    extract.cmd_list.append(dcm_data)
    extract.cmd_list.append("-C")
    extract.cmd_list.append(
        os.path.dirname(dcm_data)
    )
    extract.run()
    assert os.path.exists(dcm_test_data) == True

In [9]:
# Only need to run once
test_download_prog()
test_extract_data()

ERROR: x ./ST000000/
x ./ST000000/.misc/
x ./ST000000/SE000000/
x ./ST000000/SE000001/
x ./ST000000/SE000002/
x ./ST000000/SE000002/MR000000.dcm
x ./ST000000/SE000002/MR000002.dcm
x ./ST000000/SE000002/MR000004.dcm
x ./ST000000/SE000002/MR000005.dcm
x ./ST000000/SE000002/MR000006.dcm
x ./ST000000/SE000002/MR000010.dcm
x ./ST000000/SE000002/MR000011.dcm
x ./ST000000/SE000002/MR000013.dcm
x ./ST000000/SE000002/MR000015.dcm
x ./ST000000/SE000001/MR000001.dcm
x ./ST000000/SE000001/MR000005.dcm
x ./ST000000/SE000001/MR000009.dcm
x ./ST000000/SE000001/MR000012.dcm
x ./ST000000/SE000001/MR000016.dcm
x ./ST000000/SE000001/MR000017.dcm
x ./ST000000/SE000001/MR000020.dcm
x ./ST000000/SE000001/MR000024.dcm
x ./ST000000/SE000000/MR000002.dcm
x ./ST000000/.misc/ctkDICOM.sql
x ./ST000000/.misc/ctkDICOMTagCache.sql
x ./ST000000/.misc/dicom/
x ./ST000000/.misc/thumbs/
x ./ST000000/.misc/thumbs/1.2.840.113619.2.243.6074146108103184.41976.4343.2084088/
x ./ST000000/.misc/thumbs/1.2.840.113619.2.243.6074

In [9]:
def test_create_db():
    if os.path.exists(misc_dir):
        pass
    else:
        os.makedirs(misc_dir)
    
    assert os.path.exists(create_db(database=test_db)) == True
    shutil.rmtree(out_dir)
    assert os.path.exists(out_dir) == False

In [19]:
test_create_db()

In [10]:
test_file_1: str = os.path.join(
    data_dir,
    'TEST001-UNIT001',
    'data.dicom',
    'ST000000',
    'SE000001',
    'MR000009.dcm'
)

test_file_2: str = os.path.join(
    data_dir,
    'TEST001-UNIT001',
    'data.parrec',
    'AXIAL.PAR'

)

test_file_3: str = os.path.join(
    data_dir,
    'TEST001-UNIT001',
    'data.nifti',
    'FLAIR.nii.gz'
)

In [11]:
# TODO: 
#   Create file path/name truncation function for DICOMs
#       for database query consistency.

In [13]:
if os.path.exists(misc_dir):
    pass
else:
    os.makedirs(misc_dir)

create_db(database=test_db)

'/Users/adebayobraimah/Desktop/projects/convert_source/convert_source/tests/test.database.study/.misc/test.study.db'

In [11]:
construct_db_dict(
    study_dir=data_dir,
    sub_id='001',
    ses_id='001',
    database=test_db,
    file_name=test_file_2
)

{'file_id': '0000004',
 'rel_path': './test.study_dir/TEST001-UNIT001/data.parrec/AXIAL.PAR',
 'file_date': '2021-05-26T13:20:03',
 'acq_date': 'N/A',
 'sub_id': '001',
 'ses_id': '001',
 'bids_name': ''}

In [12]:
def test_construct_db_dict_and_insert_row_db():
    if os.path.exists(misc_dir):
        pass
    else:
        os.makedirs(misc_dir)

    create_db(database=test_db)

    test_dict_1: Dict[str,str] = construct_db_dict(
                                                    study_dir=data_dir,
                                                    sub_id='001',
                                                    ses_id='001',
                                                    database=test_db,
                                                    file_name=test_file_1
                                                )
    insert_row_db(database=test_db, info=test_dict_1)
    test_dict_2: Dict[str,str] = construct_db_dict(
                                                    study_dir=data_dir,
                                                    sub_id='101',
                                                    ses_id='1',
                                                    database=test_db,
                                                    file_name=test_file_2
                                                )
    insert_row_db(database=test_db, info=test_dict_2)
    test_dict_3: Dict[str,str] = construct_db_dict(
                                                    study_dir=data_dir,
                                                    sub_id='CX009902',
                                                    ses_id='BMNC000XDF',
                                                    database=test_db,
                                                    file_name=test_file_3
                                                )
    insert_row_db(database=test_db, info=test_dict_3)

    # Test statements
    assert get_len_rows(database=test_db) == 3

    assert test_dict_1.get('file_id','') == '0000001'
    assert test_dict_1.get('sub_id','') == '001'
    assert test_dict_1.get('ses_id','') == '001'
    assert test_dict_1.get('acq_date','') == 'N/A'
    assert test_dict_1.get('bids_name','') == ''

    assert test_dict_2.get('file_id','') == '0000002'
    assert test_dict_2.get('sub_id','') == '101'
    assert test_dict_2.get('ses_id','') == '1'
    assert test_dict_2.get('acq_date','') == 'N/A'
    assert test_dict_2.get('bids_name','') == ''

    assert test_dict_3.get('file_id','') == '0000003'
    assert test_dict_3.get('sub_id','') == 'CX009902'
    assert test_dict_3.get('ses_id','') == 'BMNC000XDF'
    assert test_dict_3.get('acq_date','') == 'N/A'
    assert test_dict_3.get('bids_name','') == ''

In [37]:
test_construct_db_dict_and_insert_row_db()

In [13]:
test_dict_1: Dict[str,str] = construct_db_dict(
                                                study_dir=data_dir,
                                                sub_id='001',
                                                ses_id='001',
                                                database=test_db,
                                                file_name=test_file_1
                                            )
insert_row_db(database=test_db, info=test_dict_1)
test_dict_2: Dict[str,str] = construct_db_dict(
                                                study_dir=data_dir,
                                                sub_id='101',
                                                ses_id='1',
                                                database=test_db,
                                                file_name=test_file_2
                                            )
insert_row_db(database=test_db, info=test_dict_2)
test_dict_3: Dict[str,str] = construct_db_dict(
                                                study_dir=data_dir,
                                                sub_id='CX009902',
                                                ses_id='BMNC000XDF',
                                                database=test_db,
                                                file_name=test_file_3
                                            )
insert_row_db(database=test_db, info=test_dict_3)

'/Users/adebayobraimah/Desktop/projects/convert_source/convert_source/tests/test.database.study/.misc/test.study.db'

In [14]:
get_len_rows(database=test_db)

3

In [15]:
test_dict_1.get('rel_path')

'./test.study_dir/TEST001-UNIT001/data.dicom/ST000000/SE000001/MR000009.dcm'

In [15]:
# test_path = 
query_db(database=test_db,
                    table="rel_path",
                    prim_key="rel_path",
                    value=test_dict_1.get('rel_path'))

'./test.study_dir/TEST001-UNIT001/data.dicom/ST000000/SE000001/MR000009.dcm'

In [16]:
query_db(database=test_db,
                    table="rel_path",
                    prim_key="rel_path",
                    value="")

''

In [17]:
query_db(database=test_db,
                    table="rel_path",
                    prim_key="rel_path",
                    column="file_id",
                    value=test_path)

NameError: name 'test_path' is not defined

In [19]:
test_dict_3

{'file_id': '0000004',
 'rel_path': './test.study_dir/TEST001-UNIT001/data.nifti/FLAIR.nii.gz',
 'file_date': '2021-05-26T13:20:05',
 'acq_date': 'N/A',
 'sub_id': 'CX009902',
 'ses_id': 'BMNC000XDF',
 'bids_name': ''}

In [20]:
query_db(database=test_db,
                    table='sub_id',
                    prim_key='sub_id',
                    column="file_id",
                    value='CX009902')

'0000003'

In [30]:
query_db(database=test_db,
                    table='acq_date',
                    prim_key='file_id',
                    value='0000003')

'N/A'

In [40]:
update_table_row(database=test_db,
                    prim_key='0000003',
                    table_name='bids_name',
                    value='sub-CX009902_ses-BMNC000XDF_run-01_flair')

'/Users/adebayobraimah/Desktop/projects/convert_source/convert_source/tests/test.database.study/.misc/test.study.db'

In [17]:
query_db(database=test_db,
                    table='bids_name',
                    prim_key='file_id',
                    value='0000003')

'sub-CX009902_ses-BMNC000XDF_run-01_flair'

In [35]:
def test_cleanup():
    """NOTE: This test currently FAILS on Windows operating systems."""
    shutil.rmtree(out_dir)
    assert os.path.exists(out_dir) == False

    shutil.rmtree(dcm_test_data)
    assert os.path.exists(dcm_test_data) == False

    os.remove("dcm2niix")
test_cleanup()

In [21]:
df = export_dataframe(database=test_db)
df

Unnamed: 0,file_id,rel_path,file_date,acq_date,sub_id,ses_id,bids_name
0,1,./test.study_dir/TEST001-UNIT001/data.dicom/ST...,2021-05-26T11:28:39,,001,001,
1,2,./test.study_dir/TEST001-UNIT001/data.parrec/A...,2021-05-26T11:28:39,,101,1,
2,3,./test.study_dir/TEST001-UNIT001/data.nifti/FL...,2021-05-26T11:28:39,,CX009902,BMNC000XDF,sub-CX009902_ses-BMNC000XDF_run-01_flair


In [22]:
df['rel_path']

0    ./test.study_dir/TEST001-UNIT001/data.dicom/ST...
1    ./test.study_dir/TEST001-UNIT001/data.parrec/A...
2    ./test.study_dir/TEST001-UNIT001/data.nifti/FL...
Name: rel_path, dtype: object

In [23]:
('acq_date' in list(df.columns)) and ('bids_name' in list(df.columns))

True

In [24]:
len(list(df.columns))

7

In [25]:
from convert_source.batch_convert import read_config

In [26]:
[search_dict,
bids_search,
bids_map,
meta_dict,
exclusion_list] = read_config(config_file=test_config1,
                            verbose=True)


 Initialized parameters from configuration file

 Categorizing search terms

 Including BIDS related search term settings

 Corresponding BIDS mapping settings

 Including additional settings for metadata

 Exclusion option implemented


In [27]:
exclusion_list

['SURVEY',
 'Reg',
 'SHORT',
 'LONG',
 'MRS',
 'PRESS',
 'DEFAULT',
 'ScreenCapture',
 'PD',
 'ALL',
 'SPECTRO']

In [28]:
search_dict

{'anat': {'T1w': ['T1', 'TFE'], 'T2w': ['T2', 'TSE'], 'flair': ['flair']},
 'func': {'bold': {'rest': ['rsfMR', 'rest', 'FFE', 'FEEPI']},
  'cbv': {'rest': ['casl', 'pcas']}},
 'fmap': {'fmap': ['map']},
 'swi': {'swi': ['swi']},
 'dwi': {'dwi': ['diffusion', 'DTI', 'DWI']}}

In [38]:
df_tmp_2 = export_scans_dataframe(test_db,False,None,'sub_id','ses_id','bids_name','acq_date')
df_tmp_2

Unnamed: 0,0,1,2,3
0,001,001,,
1,101,1,,
2,CX009902,BMNC000XDF,sub-CX009902_ses-BMNC000XDF_run-01_flair,


In [39]:
len(df_tmp_2)

3

In [30]:
df_tmp = _export_tmp_bids_df(database=test_db,
                            sub_id='CX009902',
                            modality_type='anat',
                            modality_label='flair')
df_tmp

Unnamed: 0,index,filename,acq_time
0,2,anat/sub-CX009902_ses-BMNC000XDF_run-01_flair....,


In [31]:
df_tmp['filename']

0    anat/sub-CX009902_ses-BMNC000XDF_run-01_flair....
Name: filename, dtype: object

In [32]:
df = export_bids_scans_dataframe(database=test_db,
                            sub_id='CX009902',
                            search_dict=search_dict)
df

Unnamed: 0,index,filename,acq_time
0,2,anat/sub-CX009902_ses-BMNC000XDF_run-01_flair....,


In [37]:
df.to_csv('test.csv')