In [1]:
import sqlite3
import pandas as pd

from sqlite3 import (
    DatabaseError,
    IntegrityError,
    OperationalError
)

from typing import (
    Dict,
    List,
    Optional,
    Tuple,
    Union
)

from collections import OrderedDict

In [2]:
database = 'file.database.db'

In [3]:
tables: OrderedDict = OrderedDict({
    'file_id':'TEXT',    # PRIMARY KEY
    'rel_path':'TEXT',
    'file_date':'TEXT',
    'acq_date':'TEXT',
    'sub_id':'TEXT',
    'ses_id':'TEXT',
    'bids_name':'TEXT'
})

In [4]:
def create_db_test_1(database: str,
                     tables: OrderedDict
                     ) -> str:
    """Test database creation function
    """
    # Create/access database
    conn = sqlite3.connect(database)
    c = conn.cursor()

    # Construct database tables
    for i in range(1,len(tables)):
        table_name: str = list(tables.keys())[i]
        new_field: str = list(tables.keys())[i]
        field_type: str = tables.get(list(tables.keys())[i],'NULL')
        
        # Create database tables
        try:
            tn: str = table_name
            # Primary Key
            nf1: str = list(tables.keys())[0]
            ft1: str = tables.get(list(tables.keys())[0],'NULL')
            # Child Key
            nf2: str = new_field
            ft2: str = field_type

            query: str = f"CREATE TABLE {tn} ({nf1} {ft1} PRIMARY KEY, {nf2} {ft2})"
            c.execute(query)
        except OperationalError:
            # print(table_name)
            continue
    
    # Commit changes and close the connection
    conn.commit()
    conn.close()
    return database

In [5]:
create_db_test_1(database,tables)

'file.database.db'

In [6]:
info: Dict[str,str] = {
    'file_id': '0000001',    # PRIMARY KEY
    'rel_path':'/Users/adebayobraimah/Desktop/projects/sql_db/file.py',
    'file_date':'2021-05-22',
    'acq_date':'2021-05-22',
    'sub_id':'001',
    'ses_id':'001',
    'bids_name':"sub-001_run-01_T1w"
}

In [9]:
info: Dict[str,str] = {
    'file_id': '0000002',    # PRIMARY KEY
    'rel_path':'/Users/adebayobraimah/Desktop/projects/sql_db/file.py',
    'file_date':'2021-05-22',
    'acq_date':'2021-05-22',
    'sub_id':'002',
    'ses_id':'001',
    'bids_name':"sub-002_run-01_T1w"
}

In [48]:
info: Dict[str,str] = {
    'file_id': '0000003',    # PRIMARY KEY
    'rel_path':'/Users/adebayobraimah/Desktop/projects/sql_db/file.py',
    'file_date':'2021-05-22',
    'acq_date':'2021-05-22',
    'sub_id':'002',
    'ses_id':'001',
    'bids_name':"sub-002_run-01_T2w"
}

In [142]:
info: Dict[str,str] = {
    'file_id': '0000004',    # PRIMARY KEY
    'rel_path':'/Users/adebayobraimah/Desktop/projects/sql_db/file.py',
    'file_date':'2021-05-22',
    'acq_date':'2021-05-22',
    'sub_id':'002',
    'ses_id':'001',
    'bids_name':"sub-002_task-rest_run-01_bold"
}

In [49]:
def insert_db_test(database: str,
                    tables: OrderedDict,
                    info: Dict[str,str]
                    ) -> str:
    """Inserts rows into existing database tables.
    """
    # Access database
    conn = sqlite3.connect(database)
    c = conn.cursor()

    # Insert new rows into database tables
    for i in range(1,len(tables)):
        table_name: str = list(tables.keys())[i]
        new_field: str = list(tables.keys())[i]

        tn: str = table_name
        p_key: str = list(tables.keys())[0]
        col: str = new_field

        p_val: str = info[list(tables.keys())[0]]
        col_val: str = info.get(list(tables.keys())[i],'NULL')
        
        query: str = f"INSERT INTO {tn} ({p_key},{col}) VALUES( ?,? )"

        try:
            c.execute(query, (p_val,col_val))
        except IntegrityError:
            continue
    
    conn.commit()
    conn.close()
    return database

In [143]:
insert_db_test(database,tables,info)

'file.database.db'

In [11]:
def get_len_rows_test(database: str, 
                    tables: OrderedDict
                    ) -> int:
    """Gets number of rows in a databases' table.
    """
    # Access database
    conn = sqlite3.connect(database)
    c = conn.cursor()

    # Perform database query
    query: str = f"SELECT COUNT(*) from {list(tables.keys())[1]}"
    c.execute(query)

    result: int = c.fetchone()[0]
    return result

In [12]:
get_len_rows_test(database,tables)

2

In [13]:
def get_file_id(database: str, 
                tables: OrderedDict
                ) -> str:
    """Returns new file_id for file that does not yet exist in the database.
    """
    file_id: int = get_len_rows_test(database, tables) + 1
    fild_id: str = str(file_id)
    # zeropad number here
    return file_id

In [14]:
get_file_id(database,tables)

3

In [15]:
def update_table_row(database: str,
                    prim_key: str,
                    table_name: str, 
                    col_name: str, 
                    value: Optional[Union[int,str]]
                    ) -> str:
    """Updates a row in a table in some given database.
    """
    # Access database
    conn = sqlite3.connect(database)
    c = conn.cursor()

    # Perform database table update
    query: str = f"UPDATE {table_name} SET {col_name} = ? WHERE {list(tables.keys())[0]} = ?"

    c.execute(query, (value,prim_key))

    conn.commit()
    conn.close()
    return database

In [238]:
update_table_row(database,
                prim_key='0000002',
                table_name='sub_id',
                col_name='sub_id',
                value='002')

'file.database.db'

In [16]:
def export_dataframe(database: str,
                    tables: OrderedDict
                    ) -> pd.DataFrame:
    """Exports joined tables from the input database.
    """
    # Access database
    conn = sqlite3.connect(database)

    df_list: List = []

    for i in range(1,len(tables)):
        table = list(tables.keys())[i]
        df_tmp: pd.DataFrame = pd.read_sql_query(f"SELECT * FROM {table}", conn)

        if i == 1:
            pass
        else:
            df_tmp = df_tmp.drop(labels=list(tables.keys())[0],axis=1)

        df_list.append(df_tmp)

    df: pd.DataFrame = pd.concat(df_list,axis=1,join='outer')

    return df

In [51]:
export_dataframe(database,tables)

Unnamed: 0,file_id,rel_path,file_date,acq_date,sub_id,ses_id,bids_name
0,1,/Users/adebayobraimah/Desktop/projects/sql_db/...,2021-05-22,2021-05-22,1,1,sub-001_run-01_T1w
1,2,/Users/adebayobraimah/Desktop/projects/sql_db/...,2021-05-22,2021-05-22,2,1,sub-002_run-01_T1w
2,3,/Users/adebayobraimah/Desktop/projects/sql_db/...,2021-05-22,2021-05-22,2,1,sub-002_run-01_T2w


In [18]:
list(tables.keys())[0]

'file_id'

In [19]:
def export_scans_dataframe(database: str,
                            info: Dict[str,str],
                            raise_exec: bool = False,
                            *args: str
                            ) -> pd.DataFrame:
    """Exports a dataframe provided table/column IDs from the input ordered dictionary that corresponds to the table/colmns in the provided database.
    """
    # Access database
    conn = sqlite3.connect(database)
    c = conn.cursor()

    df_list: List = []

    for i in args:
        table = str(i)

        query: str = f"SELECT count(name) FROM sqlite_master WHERE type='table' AND name='{table}'"

        c.execute(query)

        if c.fetchone()[0] == 1:
            pass
        else:
            if raise_exec:
                raise DatabaseError(f"Table {table} does not exist in database")
            continue
        
        df_tmp: pd.DataFrame = pd.read_sql_query(f"SELECT * FROM {table}", conn)
        df_tmp = df_tmp.drop(labels=list(tables.keys())[0],axis=1)
        df_list.append(df_tmp)

    df: pd.DataFrame = pd.concat(df_list,axis=1,join='outer')

    return df

In [235]:
df = export_scans_dataframe(database,tables,False,'sub_id','ses_id','bids_name','acq_date')
df

Unnamed: 0,sub_id,ses_id,bids_name,acq_date
0,1,1,sub-001_run-01_T1w,2021-05-22
1,2,1,sub-002_run-01_T1w,2021-05-22
2,2,1,sub-002_run-01_T2w,2021-05-22
3,2,1,sub-002_task-rest_run-01_bold,2021-05-22


In [22]:
# Tasks:
#   1. Filter df to keep subject of interest
#   2. Prepend text to string
#       a. Change column name

In [53]:
# 1. subject filter
sub_id = '001'
df.loc[df['sub_id'] == f'{sub_id}']

Unnamed: 0,sub_id,ses_id,bids_name
0,1,1,sub-001_run-01_T1w


In [29]:
mod = 'anat/'
df['bids_name'] = f'{mod}' + df['bids_name'].astype(str) 
df

Unnamed: 0,sub_id,ses_id,bids_name
0,1,1,anat/sub-001_run-01_T1w
1,2,1,anat/sub-002_run-01_T1w


In [72]:
df[df['bids_name'].str.contains("T1w")]

Unnamed: 0,sub_id,ses_id,bids_name
0,1,1,sub-001_run-01_T1w
1,2,1,sub-002_run-01_T1w


In [79]:
mod = 'anat/'
df['bids_name'] = f'{mod}' + df[df['bids_name'].str.contains("T1w")]

df.dropna(axis=0)

Unnamed: 0,sub_id,ses_id,bids_name
0,1,1,anat/001
1,2,1,anat/002


In [77]:
df.dropna(axis=0)

Unnamed: 0,sub_id,ses_id,bids_name
0,1,1,anat/001
1,2,1,anat/002


In [300]:
def _export_tmp_bids_df(database: str,
                        tables: Dict[str,str],
                        sub_id: str,
                        modality_type: str,
                        modality_label: str
                        ) -> pd.DataFrame:
    """working doc-string
    """
    df_tmp: pd.DataFrame = export_scans_dataframe(database,
                                                    tables,
                                                    False,
                                                    'sub_id',
                                                    'ses_id',
                                                    'bids_name',
                                                    'acq_date')
    # Filter by subject ID
    df: pd.DataFrame = df_tmp.loc[df_tmp['sub_id'] == f'{sub_id}']

    # Filter by modality type and modality label
    mod = modality_type + "/"
    df: d.DataFrame = df[df['bids_name'].str.contains(f"{modality_label}")]
    df['bids_name']: pd.DataFrame = f'{mod}' + df['bids_name'].astype(str)
    df: pd.DataFrame = df.dropna(axis=0)

    df: pd.DataFrame = df.rename(
                            columns={
                                "bids_name": "filename", 
                                "acq_date": "acq_time"}
                                )
    
    df: pd.DataFrame = df.drop(
                            columns=[
                                "sub_id",
                                "ses_id"]
                                )

    return df

In [247]:
df = _export_tmp_bids_df(database,tables,'002','anat','T1w')
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['bids_name']: pd.DataFrame = f'{mod}' + df['bids_name'].astype(str)


Unnamed: 0,filename,acq_time
1,anat/sub-002_run-01_T1w,2021-05-22


In [226]:
df[df['bids_name'].str.contains("T1w")]

Unnamed: 0,sub_id,ses_id,bids_name
1,2,1,anat/sub-002_run-01_T1w


In [251]:
import os
import sys
import pathlib

__file__ = 'sql_tutorial.ipynb'

In [253]:
_pkg_path: str = str(pathlib.Path(os.path.abspath(__file__)).parents[1])

_pkg_path

'/Users/adebayobraimah/Desktop/projects/convert_source'

In [255]:
sys.path.append(_pkg_path)

In [256]:
from convert_source.batch_convert import read_config

In [248]:
config = "/Users/adebayobraimah/Desktop/projects/convert_source/convert_source/config/config.CCHMC.default.yml"

In [257]:
[search_dict,
bids_search,
bids_map,
meta_dict,
exclusion_list] = read_config(config_file=config,
                            verbose=True)


 Initialized parameters from configuration file

 Categorizing search terms

 Including BIDS related search term settings

 Corresponding BIDS mapping settings

 Including additional settings for metadata

 Exclusion option implemented


In [258]:
search_dict

{'anat': {'T1w': ['T1', 'T1w', 'TFE'],
  'T2w': ['T2', 'T2w', 'TSE'],
  'flair': ['flair']},
 'func': {'bold': {'rest': ['rsfMR', 'rest', 'FFE', 'FEEPI'],
   'visualstrobe': ['vis', 'visual']},
  'cbv': {'rest': ['casl', 'pcas']}},
 'fmap': {'fmap': ['map']},
 'swi': {'swi': ['swi']},
 'dwi': {'dwi': ['diffusion', 'DTI', 'DWI', '6_DIR']}}

In [288]:
for i,j in search_dict.items():
    # print(f"{i}")
    for n,m in j.items():
        print(f"{i} : {n}")

anat : T1w
anat : T2w
anat : flair
func : bold
func : cbv
fmap : fmap
swi : swi
dwi : dwi


In [310]:
def export_bids_scans_dataframe(database: str,
                                tables: Dict[str,str],
                                sub_id: str,
                                search_dict: Dict[str,str],
                                df: Optional[pd.DataFrame] = None
                                ) -> pd.DataFrame:
    """working doc-string
    """
    df_list: List = []
    for modality_type,labels in search_dict.items():
        for modality_label,_ in labels.items():
            df_tmp: pd.DataFrame = _export_tmp_bids_df(database,
                                                        tables,
                                                        sub_id,
                                                        modality_type,
                                                        modality_label)
            if len(df_tmp) == 0:
                continue
            else:
                df_list.append(df_tmp)

    if len(df_list) == 0:
        df: pd.DataFrame = pd.DataFrame(
                                    columns=[
                                        'filename',
                                        'acq_time'
                                        ])
        pass
    else:
        df: pd.DataFrame = pd.concat(df_list,
                                    axis=0,
                                    join='outer',
                                    ignore_index=True)
    return df

In [313]:
export_bids_scans_dataframe(database,tables,'002',search_dict)

Unnamed: 0,filename,acq_time
0,anat/sub-002_run-01_T1w,2021-05-22
1,anat/sub-002_run-01_T2w,2021-05-22
2,func/sub-002_task-rest_run-01_bold,2021-05-22


In [295]:
_export_tmp_bids_df(database,tables,'001','swi','swi')

Unnamed: 0,sub_id,ses_id,filename,acq_time


## Get `relative` path for `file_name`

In [5]:
import os
import pathlib

In [3]:
study_dir = "/Users/adebayobraimah/Desktop/projects/convert_source/test.data/study.images"
file_name = "/Users/adebayobraimah/Desktop/projects/convert_source/test.data/study.images/IRC287H009/IRC287H-9_AXIAL_3_3.nii.gz"

In [12]:
path_sep = os.path.sep
dir_tmp = str(pathlib.Path(study_dir).parents[0])
dir_tmp

'/Users/adebayobraimah/Desktop/projects/convert_source/test.data'

In [15]:
file_name.replace(dir_tmp + path_sep,"." + path_sep)

'./study.images/IRC287H009/IRC287H-9_AXIAL_3_3.nii.gz'

In [314]:
from datetime import datetime

In [320]:
now = datetime.now()
str(now.strftime("%Y-%m-%dT%H:%M:%S"))

'2021-05-23T21:32:25'

In [321]:
# SQL QUERY: SELECT sub_id FROM sub_id WHERE file_id = '0000001'

In [322]:
database = 'file.database.db'

In [323]:
# Access database
conn = sqlite3.connect(database)
c = conn.cursor()

In [373]:
table = 'sub_id'
column = table
prim_key = 'file_id'
value = '0000001'

In [385]:
# query: str = f"SELECT {column} FROM {table} WHERE ? = ?"
query: str = f"SELECT {column} FROM {table} WHERE {prim_key} = '{value}'"

In [386]:
query

"SELECT sub_id FROM sub_id WHERE file_id = '0000001'"

In [392]:
# c.execute(query,(prim_key))
c.execute(query)

<sqlite3.Cursor at 0x7fc2777a8110>

In [393]:
c.fetchone()[0]

'001'

In [None]:
table = 'sub_id'
column = table
prim_key = 'file_id'
value = '0000001'

In [400]:
def query_db(database:str,
            table: str,
            prim_key: str,
            value: Union[int,str],
            column: Optional[str] = None
            ) -> str:
    """working doc-string
    """
    # Access database
    database: str = os.path.abspath(database)
    conn = sqlite3.connect(database)
    c = conn.cursor()

    if column:
        pass
    else:
        column: str = table

    # Query database
    query: str = f"SELECT {column} FROM {table} WHERE {prim_key} = '{value}'"
    c.execute(query)
    query_val: str = c.fetchone()[0]

    conn.commit()
    conn.close()
    return query_val

In [406]:
query_db(database=database,
            table='bids_name',
            prim_key='file_id',
            value='0000002') 

'sub-002_run-01_T1w'

In [403]:
df

Unnamed: 0,filename,acq_time
1,anat/sub-002_run-01_T1w,2021-05-22


## Test conditional imports

In [2]:
c = 1
from f import phi