# Create SQL Schema from CSV Files 
#### Joe Eberle, Alan Calhoun, Helmi (Al)  Seoud Code Refactored ON  : 9/20/2022  ---  Revised ON  : 10/6/2022

### Project Setup - Importing Libraries and Initializing Global Variables 

In [1]:
#install dependent Libraries is not already installed 
#!pip install pyttsx3

# Import the necessary Libraries 
import glob, os
import pandas as pd
# import logging 
from pathlib import Path
import pyttsx3
import pyodbc 
import timeit
import time
from datetime import date
from datetime import datetime
import sqlalchemy

# Establish some import parameters 

importing_xlsx_files = False 
importing_csv_files = True 
Data_Import_Starting_Directory = 'Y:/_Kaleida_Input/'
#Data_Import_Starting_Directory = 'C:/Data/'
Process_Name = 'Importing CSV data into SQL'

step_debugging = True
detail_debugging = True
detail_Talking = False # only talk on major steps 
Process_Step_Name = ''  
Reading_Intro = False
Reading_Credits =  False
Reading_Steps = False 
Reading_Terms = False 
printing_output = True
Talking_Code = True
Talking_Voice_Male_Gender = True        # Set to False for Female Voice 
Code_Logging  = True 
event_log_row = 0 

# Create some Global Variables for SQL Constructs 
column_inserts = ''
column_question_mark   = ''
create_table_SQL  = ''
create_real_table_SQL  = ''
insert_records_SQL  = ''
create_schema_SQL  = '' 
create_real_schema_SQL  = '' 
Table_Name_Extension_Daily = '_DI'
Table_Name_Extension_Historical = '_HX'
Table_Name_Extension_Rejected = '_RJ'
Table_Name_Extension_Administrative = '_AD'
Table_Name_Prefix = '[pbic_1_0].'

# Create some Global Variables for SQL Connection
server = 'Kalpwvsqlgppc01' 
database  = 'GPPC_DEV' 
username ='GPPC'
pwd = 'Elephant-Trunk-06'
sql_connector = 'DRIVER={SQL Server};SERVER='+server+';DATABASE='+database+';Trusted_Connection=No;UID='+username+';PWD='+pwd
# log_filename='data_importing_log.log'

# Configure the Logging to the DEBUG Level 
# logging.basicConfig(level=logging.DEBUG, filename=log_filename, format= '%(asctime)s %(clientip)-15s %(user)-8s %(message)s')
Text_to_Speech = pyttsx3.init()

## Dictionary For character_replacements List 
character_replacements = { " ":"_","#":"Number","%":"Percentage" \
                         ,'_Unnamed':'','_Level':'',"$":"Dollar",'_1':'' \
                         ,'_2':'','_3':'','_4':'','_5':''  \
                         ,'_6':'','_7':'','_8':'','_9':''  \
                         ,'_0':'',':7':'',':8':'',':':'' }
## Dictionary For replacing data types in databases 
data_type_replacements = { "object":"varchar","float64":"float","int64":"int","%":"Percentage" \
                         ,'_Unnamed':'','datetime64':'timestamp',"timedelta64[ns]":"varchar"}    

## add the glaobal data frames for event loggging and Schema Creation 
df_event_log = pd.DataFrame(columns = ('Event_ID','Process_Name','Event_Name','Event_Date','Event_Time','Task_Start_Time','Task_End_Time','TASk_Duration','Comments'))
df_import_directories = pd.DataFrame(columns = ('Root_Directory','Sub_Directory'))
df_import_files = pd.DataFrame(columns = ('Root_Directory','Sub_Directory','Table_Name','Import_File_Name','File_Size','File_Modified_Date','File_Created_Date'))
import_directory_file_Number  = 0
import_file_Number  = 0 
event_log_row = 0
sub_directory_count = 0 
import_csv_file_count = 0 
total_csv_file_count = 0 


### Customization - Special Formulas for GPPC 

In [2]:
def customize_table_names(table_name):
    table_name = table_name.replace('ADPDaily_Time_Card','Time_Card').replace('Available_Slots_Past','Available_Slots')  \
    .replace('Available_Slots_Past','Available_Slots').replace('DailyAppointments','Appointments') \
    .replace('DailyAppointments','Appointments').replace('HXDX','Historical') \
    .replace('PatientInformationMain18Under','Patient_Information')  \
    .replace('QualityPrimaryCareAnnualVisitReport','Primary_Care_Annual_Visit') \
    .replace('RCM_Adujstments','RCM_Adjustments') \
    .replace('SurgicalAppointmentSummar2DaysAgo','Surgical_Appointment') \
    .replace('SurgicalAppointmentSummar2DaysAgo','Primary_Care_Annual_Visit') \
    .replace('Third_Next_Available','Third_Next_Available') \
    .replace('ADPEmployee_Census','Employee_Census') \
    .replace('Third_Next_Available','Third_Next_Available')
    return table_name



def customize_column_name_clean_up(df_to_clean):
    df_to_clean = df_to_clean.rename(columns={'Name_':'Name','Appt_Date':'Appointment_Date','Appt_Type':'Appointment_Type' \
                                ,'EbillEsuperbillNumber':'Ebill_Esuper_Bill_Number' \
                                ,'Referral_FromTodo_Selection':'Referral_From_To_Do_Selection' \
                                ,'Wellnow_LocationTodo_Selection':'Wellnow_Location_To_Do_Selection' \
                                ,'Access_Vip':'Access_VIP' \
                                ,'Vip':'VIP' \
                                ,'Todo_DateCreate_Date':'To_Do_Date_Create_Date' \
                               }, inplace = True)
    return df_to_clean


## Project Setup -    Establishing DataFrames & Establishing Global Functions

In [None]:
global Talking_Code
global import_directory_file_Number 
global event_log_row 
global printing_output


def set_up_python_infrastructure():
    initialize_replacement_Dictionaries()    # Set up control libraries for syntactic Consistency 
    Initialize_Text_to_Speach()              # Intitialize Text to Speech Engine 
    df_e_log = create_event_log_dataframe()  # Set up the Event Logging 
    
# Say Whatever the user wants 
def say(speech):
    if Talking_Code:
        Text_to_Speech.say(speech)
        Text_to_Speech.runAndWait()    
    
# Intitialize Text to Speech Engine 
def Initialize_Text_to_Speach():
    Text_to_Speech = pyttsx3.init()
    Text_to_Speech.setProperty('Rate',187)
    voices = Text_to_Speech.getProperty('voices')
    if Talking_Voice_Male_Gender:
        Text_to_Speech.setProperty('voice', voices[0].id)    # Default Male voice registered as 'Dave'
    else: 
        Text_to_Speech.setProperty('voice', voices[1].id)    # Alternate Female voice registered as 'Tina'
    speech = 'The text to speech engine is initialized using pythons pyttsx3 engine'
    Text_to_Speech.say(speech)
    Text_to_Speech.runAndWait()    
    
    
# Say Whatever the user wants 
def say(speech):
    Text_to_Speech.say(speech)
    Text_to_Speech.runAndWait()       
    
# Create dataframe to house Directories 
def create_directory_dataframe():
    df_import_directories = pd.DataFrame(columns = ('Root_Directory','Sub_Directory'))
    return df_import_directories  

def create_import_files_dataframe():
    df_import_files = pd.DataFrame(columns = ('Root_Directory','Sub_Directory','Table_Name','File_Name','File_Size_Bytes','File_Created','File_Modified'))
    return df_import_files  

def add_import_directory(Root_Directory,Sub_Directory):
    global import_directory_file_Number 
    import_directory_file_Number += 1  
    df_import_directories.loc[import_directory_file_Number] = [Root_Directory,Sub_Directory]
    return import_directory_file_Number 

def add_import_file(Root_Directory,Sub_Directory,Table_Name,File_Name,File_Size_Bytes,File_Created,File_Modified):
    global import_file_Number 
    import_file_Number += 1  
    df_import_files.loc[import_file_Number] = [Root_Directory,Sub_Directory,Table_Name,File_Name,File_Size_Bytes,File_Created,File_Modified]
    return import_directory_file_Number  

# Create dataframe to house Directories 
def create_event_log_dataframe():
    df_event_log = pd.DataFrame(columns = ('Event_ID','Process_Name','Event_Name','Event_Date','Event_Time','Task_Start_Time','Task_End_Time','TASk_Duration','Comments'))
    return df_event_log


df_event_log = create_event_log_dataframe()

# Create dataframe to database schema 
def create_database_schema_dataframe():
    df_schema = pd.DataFrame(columns = ('Database_Name','Table_Name','Column_Number','Column_Name','Column_Data_Type','Column_Sample_Data','Column_Description'))
    return df_schema

# Create dataframe to house Directories 
def add_log_event(Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments ):
    global event_log_row  
    event_log_row += 1  
    df_event_log.loc[event_log_row] = [event_log_row,Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments]

    
# Add Log Events 
def add_log_event_timer(Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments ):
    global event_log_row  
    event_log_row += 1  
    Event_Date = date.today()
    Event_Time = time.time() 
    df_event_log.loc[event_log_row] = [event_log_row,Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments]
    
# Reset the Event timer start time 
def reset_event_timer(Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments ):
    global event_log_row  
    event_log_row += 1  
    Event_Date = date.today()
    Event_Time = time.time() 
    Task_Start_Time    = time.time()  
    df_event_log.loc[event_log_row] = [event_log_row,Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments]

    # Get a list of all the Subfiles to iterate through 
def list_all_csv_files(path):
    extension = 'csv'
    os.chdir(path)
    print('CSV Files to Import from Directory:', path)
    csv_file_count = 0
    for file in glob.glob('*.{}'.format(extension)):
        csv_file_count += 1 
        out('CSV File #{} filename: {}  '.format(str(csv_file_count),file))

# Get a list of all the Subfiles to iterate through 
def register_all_csv_files_for_import(path, table_name):
    global total_csv_file_count  
    extension = 'csv'
    os.chdir(path)
#     print('CSV Files to Import from Directory:', path)
    csv_file_count = 0
    for file in glob.glob('*.{}'.format(extension)):
        csv_file_count += 1 
        total_csv_file_count += 1         
        File_Size = os.path.getsize(file)
        File_Last_Modified =  time.ctime(os.path.getmtime(file))
        File_Create_Date =  time.ctime(os.path.getctime(file))
        Full_Path_File_Name = path + '\\' + file
        Full_Path_File_Name = Full_Path_File_Name.replace('/','\**').replace('\\','\**').replace('**','')
        out('CSV Total#{} File #{} filename: {}  '.format(str(total_csv_file_count),str(csv_file_count),file)) 
        file_Number = add_import_file(path,path,table_name,file,File_Size,File_Last_Modified,File_Create_Date)
        
def create_import_files_dataframe():
    df_import_files = pd.DataFrame(columns = ('Root_Directory','Sub_Directory','Table_Name','File_Name'))
    return df_import_files  

def add_import_directory(Root_Directory,Sub_Directory):
    global import_directory_file_Number 
    import_directory_file_Number += 1  
    df_import_directories.loc[import_directory_file_Number] = [Root_Directory,Sub_Directory]
    return import_directory_file_Number 

def add_import_File(Root_Directory,Sub_Directory,Table_Name,File_Name):
    global import_file_Number 
    import_file_Number += 1  
    df_import_files.loc[import_file_Number] = [Root_Directory,Sub_Directory,Table_Name,File_Name]
    return import_directory_file_Number          
        
    
def infer_table_name_from_path(path):
    table_name = path.replace(Data_Import_Starting_Directory,"").replace('Y:/_Kaleida_Input/','').replace('Y:_Kaleida_Input','').replace(' ','_').replace('/','').replace('\\','')
    table_name = customize_table_names(table_name)
    return table_name 
        
# Introduction - Overview of CSV to SQL Import Process Steps 
def read_credits(): 
    Dialog = 'This Jupiter Notebook Was  : '
    Dialog = Dialog + 'Developed in Collaboration by Joe Eberle, Alan Calhoun, Helmi (Al) Seoud  '
    Dialog = Dialog + 'Developed in Python starting on 9/20/2022 '
    Dialog = Dialog + 'This package is free AND Open Source and the code is openly available for general Use. '    
    say(Dialog)         
    
# Introduction - Overview of CSV to SQL Import Process Steps 
def read_terms(): 
    Dialog = 'The terminology for this process is : '
    Dialog = Dialog + 'Python. Python is a general-purpose programming language that is widely used for data science.  '
    Dialog = Dialog + 'Structured Query Language (SQL) is one of the worlds most widely used programming languages for manipulating and querying data. '
    Dialog = Dialog + 'CSV. A Comma-Separated Values (CSV)  file is a text file in which information is separated by commas. '
    Dialog = Dialog + 'PANDAS. Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool, built on top of the Python programming language.  '
    Dialog = Dialog + 'OS PACKAGE - The OS python library provides a portable way of using operating system dependent functionality to allow your python code to run on all platforms '
    say(Dialog)  
    
# Process Steps - Overview of CSV to SQL Import Process Steps 
def read_process_steps():
    Dialog = 'The data flow for this process is : '
    Dialog = Dialog + 'Precursor Step 1: The clinician or administrator enters the patients data into the Electronic Medical Record (EMR). '
    Dialog = Dialog + 'Precursor Step 2: At the end of the day the EMR data is exported into Comma Seperated Values (CSV) files and shared via SFTP. '
    Dialog = Dialog + 'Step 1: Establish The Root Directory. '
    Dialog = Dialog + 'Step 2: Walk the directory structure discovering data to discover all data directories  '
    Dialog = Dialog + 'Step 3: Register the directories in an excel spreadsheet for future processing. '
    Dialog = Dialog + 'Step 4: Register the CSV Files in an excel spreadsheet for future processing. ' 
    say(Dialog)    
    
    
# Introduction - Overview of NoteBooks  
def read_introduction():
    Dialog = 'This jupiter notebook will discover all of the CSV files under a specific root directory. '
    Dialog = Dialog +  'This python code will take the CSV files exported from an Electronic Medical Record platform. '
    Dialog = Dialog + 'and import them into a faster database such as SQL Lite or PostgreSQL or SQL Server or SNOW Flake. '
    Dialog = Dialog + 'The directories will then be available for future importing. '
    say(Dialog)  
    
    
def column_create_SQL (import_df):
    column_name_List = [x.title() for x in import_df.columns] # Create a List of Columns 
    column_Str =  (', '.join(column_name_List)) # Convert List into one String with commas 
    out('Columns =',column_Str)  
    return column_Str            
    
    
def out(dialog):
    global detail_Talking
    if printing_output: 
        print(dialog) 
    if Talking_Code and detail_Talking == True:
        say(dialog)   
    if Talking_Code and (detail_Talking == False) and (dialog.find('step') >= 0):
        say(dialog)     

        
def list_all_xlsx_files(path):
    extension = 'xlsx'
    os.chdir(path)
    csv_file_count = 0
    for file in glob.glob('*.{}'.format(extension)):
        csv_file_count += 1 
        out('File #{}   is {} '.format(csv_file_count,file))     \
        
        
def explain_the_project():
    if Reading_Intro:
        read_introduction()
    if Reading_Credits:    
        read_credits() 
    if Reading_Steps:
        read_process_steps()
    if Reading_Terms:
        read_terms()        
        
        
        
def convert_data_types(input_df):
    column_datatype_str = str(input_df.dtypes)
    column_datatype_str =  column_datatype_str.replace('dtype: object','').replace('object','varchar[255], ').replace('datetime64[ns]','timestamp, ').replace('float64','float, ')
    out('create column SQL string: {} \n'.format(column_datatype_str))        
        
def read_and_clean_file(data_folder, filename):

    print('Import File =', filename)                 
    df_input_csv = pd.read_csv(filename, nrows=10)
    out('Reading Dataframe Columns before cleanups:{}'.format(df_input_csv.columns))
    df_input_csv.columns = df_input_csv.columns.map('^'.join)
    df_input_csv.columns  = [x.strip().title().replace("^","").replace(" ","_").replace("#","Number").replace("#","Number").replace("%","Percentage") \
                             .replace('_Unnamed','').replace('Unnamed','').replace('Unnamed:','').replace('_Level','').replace("$","Dollar") \
                             .replace('_1','').replace('_2','').replace('_3','').replace('_4','').replace('_5','')  \
                             .replace('_6','').replace('_7','').replace('_8','').replace('_9','')  \
                             .replace('1','').replace('2','').replace('3','').replace('4','').replace('5','')  \
                             .replace('6','').replace('7','').replace('8','').replace('9','').replace('0','')  \
                             .replace('_0','').replace(':7','').replace(':8','').replace(':','').replace('Unnamed: ','')  \
                             for x in df_input_csv.columns]
    return df_input_csv    
 
        
    
def set_up_python_infrastructure():
    initialize_replacement_Dictionaries()    # Set up control libraries for syntactic Consistency 
    Initialize_Text_to_Speach()              # Intitialize Text to Speech Engine 
    df_event_log = create_event_log_dataframe()  # Set up the Event Logging to housae the events of this process 
    create_database_schema_dataframe()       # Set up the Database Schema dataframe to house the schema

In [4]:
print('Talking Code Setting: ',Talking_Code)
print('Talking Code Detail Setting: ',detail_Talking)
if Talking_Code:
    if Talking_Voice_Male_Gender: 
        out('Talking_Code: Text to Voice set to ON. Voice set  to  Default Male Voice. ')
    else: 
        out('Talking_Code: Text to Voice set to ON. Voice set  to  Alternate Female Voice. ')
    if  detail_Talking:   
        out('Talking  Details is set to True say all detailed outputs..  this will be slow and boring ') 
    else: 
        out('Talking  Details is set to False to say only major steps..  this setting will execute fast and tell you the high level steps of the process. ')        

Talking Code Setting:  True
Talking Code Detail Setting:  False
Talking_Code: Text to Voice set to ON. Voice set  to  Default Male Voice. 
Talking  Details is set to False to say only major steps..  this setting will execute fast and tell you the high level steps of the process. 



## Process Reinitialize - Re initialize the Data Frames and all Global Counters to Zero 

In [5]:
global import_directory_file_Number 
global import_file_Number  
global event_log_row 
global sub_directory_count  

# recreate the dataframes 
create_directory_dataframe() 
create_import_files_dataframe() 
create_database_schema_dataframe() 

# reset all the global Counters  
import_directory_file_Number  = 0
import_file_Number  = 0 
event_log_row = 0
sub_directory_count = 0 


## Optional Precursor Step  - Explain the Project 

In [6]:
explain_the_project()        

## Step 1 - Establish the root Directory 

In [7]:
add_log_event(Process_Name,'step 1 - Establish the root Directory: {}'.format(Data_Import_Starting_Directory),datetime.now(),datetime.now(),time.time(),time.time(), 0 ,'Step 1 - Establish the root Directory: {}'.format(Data_Import_Starting_Directory))
Process_Step_Name = 'Step 1 - Establish the root Directory' 
out('Step 1 - Establish the root Directory')
out('The root directory to walk is : {}'.format(Data_Import_Starting_Directory))  
df_event_log.head()

Step 1 - Establish the root Directory
The root directory to walk is : Y:/_Kaleida_Input/


Unnamed: 0,Event_ID,Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time,TASk_Duration,Comments
1,1,Importing CSV data into SQL,step 1 - Establish the root Directory: Y:/_Kal...,2022-10-18 17:24:31.900737,2022-10-18 17:24:31.900737,1666128000.0,1666128000.0,0,Step 1 - Establish the root Directory: Y:/_Kal...


## Step 2 - Walk the directory structure discovering data to discover all data directories

In [8]:
global sub_directory_count
# Get a list of all the Subfiles to iterate through 
def walk_sub_directories(root_directory):
    global sub_directory_count
    Process_Step_Name = 'Step 2 - Walk the directory structure discovering data to discover all data directories' 
    df_import_directories = create_directory_dataframe() 
    directory_entry = 0 
 
    for root, subdirectories, files in os.walk(root_directory):
        # hard coded - remove this later!!!!!! 
        # old data and Excel data should NOT BE Included under root 
        if (root.find('old') == -1) and (root.find('excel') == -1):
            out('Registering Directory # {} {}  '.format(directory_entry,root ))            
            directory_entry += 1 
            sub_directory_count += 1     
            num = add_import_directory(root, root)        
         
    return df_import_directories

out('Step 2 - Walk the directory structure  to discover all data directories')
add_log_event(Process_Name,'Step 2 - Walk the directory structure  to discover all data directories',datetime.now(),datetime.now(),time.time(),time.time(), 0 ,'Step 2 - Walk the directory structure  to discover all data directories'.format(Data_Import_Starting_Directory))
walk_sub_directories(Data_Import_Starting_Directory)  
out('Step 2 Done - Listing all Registered data Directories: ')


Step 2 - Walk the directory structure  to discover all data directories
Registering Directory # 0 Y:/_Kaleida_Input/  
Registering Directory # 1 Y:/_Kaleida_Input/Access  
Registering Directory # 2 Y:/_Kaleida_Input/ADP  
Registering Directory # 3 Y:/_Kaleida_Input/ADP\Daily Time Card  
Registering Directory # 4 Y:/_Kaleida_Input/ADP\Employee Census  
Registering Directory # 5 Y:/_Kaleida_Input/Available_Slots  
Registering Directory # 6 Y:/_Kaleida_Input/Available_Slots_Past  
Registering Directory # 7 Y:/_Kaleida_Input/DailyAppointments  
Registering Directory # 8 Y:/_Kaleida_Input/Future Surgeries  
Registering Directory # 9 Y:/_Kaleida_Input/FutureAppointmentVisits  
Registering Directory # 10 Y:/_Kaleida_Input/Hx  
Registering Directory # 11 Y:/_Kaleida_Input/HXDX  
Registering Directory # 12 Y:/_Kaleida_Input/Incident  
Registering Directory # 13 Y:/_Kaleida_Input/InsuranceIDVerificationAudit  
Registering Directory # 14 Y:/_Kaleida_Input/LungCancerScreeningTracker  
Registering 

In [9]:
# Persist the directories in an excel file Registry 
Data_Import_Starting_Directory =  'J:/IT GLIN Data Services Shared/TempData/'
Excel_file_Name = Data_Import_Starting_Directory + 'Import_Directory_Registry.xlsx'
out('Registering Directories in excel File:{} '.format(Excel_file_Name))
df_import_directories.to_excel(Excel_file_Name, index=False)

Registering Directories in excel File:J:/IT GLIN Data Services Shared/TempData/Import_Directory_Registry.xlsx 


In [10]:
df_import_directories.shape

(34, 2)

In [11]:
df_import_directories.head(100)

Unnamed: 0,Root_Directory,Sub_Directory
1,Y:/_Kaleida_Input/,Y:/_Kaleida_Input/
2,Y:/_Kaleida_Input/Access,Y:/_Kaleida_Input/Access
3,Y:/_Kaleida_Input/ADP,Y:/_Kaleida_Input/ADP
4,Y:/_Kaleida_Input/ADP\Daily Time Card,Y:/_Kaleida_Input/ADP\Daily Time Card
5,Y:/_Kaleida_Input/ADP\Employee Census,Y:/_Kaleida_Input/ADP\Employee Census
6,Y:/_Kaleida_Input/Available_Slots,Y:/_Kaleida_Input/Available_Slots
7,Y:/_Kaleida_Input/Available_Slots_Past,Y:/_Kaleida_Input/Available_Slots_Past
8,Y:/_Kaleida_Input/DailyAppointments,Y:/_Kaleida_Input/DailyAppointments
9,Y:/_Kaleida_Input/Future Surgeries,Y:/_Kaleida_Input/Future Surgeries
10,Y:/_Kaleida_Input/FutureAppointmentVisits,Y:/_Kaleida_Input/FutureAppointmentVisits


## Step 3 - Discover and Register all CSV files to import

In [None]:
def iterate_directories_to_import_files(Import_Directory_Data_Frame):
    out('Step 3 - Discover and Register all CSV files to import ')
    Process_Step_Name = 'Step 3 - Discover and Register all CSV files to import'  
    list_of_Directories = df_import_directories['Root_Directory']
    dir_count = 0
    for dir in list_of_Directories:
        dir_count += 1
        out('Directory #{} to find import files : {} \n   '.format(dir_count,dir))
        SQL_Table_Name = infer_table_name_from_path(dir)
        register_all_csv_files_for_import(dir, SQL_Table_Name   )

iterate_directories_to_import_files(df_import_directories)

Step 3 - Discover and Register all CSV files to import 
Directory #1 to find import files : Y:/_Kaleida_Input/ 
   
Directory #2 to find import files : Y:/_Kaleida_Input/Access 
   
Directory #3 to find import files : Y:/_Kaleida_Input/ADP 
   
Directory #4 to find import files : Y:/_Kaleida_Input/ADP\Daily Time Card 
   
CSV Total#1 File #1 filename: DailyTimecardReport.050421190755.csv  
CSV Total#2 File #2 filename: DailyTimecardReport.051821190747.csv  
CSV Total#3 File #3 filename: DailyTimecardReport.060121190842.csv  
CSV Total#4 File #4 filename: DailyTimecardReport.061521190831.csv  
CSV Total#5 File #5 filename: DailyTimecardReport.062921190854.csv  
CSV Total#6 File #6 filename: DailyTimecardReport.071321190908.csv  
CSV Total#7 File #7 filename: DailyTimecardReport.072721190859.csv  
CSV Total#8 File #8 filename: DailyTimecardReport.081021190954.csv  
CSV Total#9 File #9 filename: DailyTimecardReport.082421190545.csv  
CSV Total#10 File #10 filename: DailyTimecardReport.090

CSV Total#131 File #102 filename: Sub II 5.26 to 11.27.csv  
CSV Total#132 File #103 filename: Sub II 5.27 to 11.28.csv  
CSV Total#133 File #104 filename: SUb II 5.28 to 11.29.csv  
CSV Total#134 File #105 filename: Sub II 5.4 to 11.4.csv  
CSV Total#135 File #106 filename: Sub II 5.5 to 11.5.csv  
CSV Total#136 File #107 filename: Sub II 5.6 to 11.7.csv  
CSV Total#137 File #108 filename: Sub II 5.7 to 11.8.csv  
CSV Total#138 File #109 filename: Sub II 6.1 to 12.1.csv  
CSV Total#139 File #110 filename: Sub II 6.10 to 12.11.csv  
CSV Total#140 File #111 filename: Sub II 6.11 to 12.12.csv  
CSV Total#141 File #112 filename: Sub II 6.14 to 12.15.csv  
CSV Total#142 File #113 filename: Sub II 6.15 to 12.16.csv  
CSV Total#143 File #114 filename: Sub II 6.16 to 12.17.csv  
CSV Total#144 File #115 filename: Sub II 6.17 to 12.18.csv  
CSV Total#145 File #116 filename: Sub II 6.18 to 12.19.csv  
CSV Total#146 File #117 filename: Sub II 6.2 to 12.2.csv  
CSV Total#147 File #118 filename: Su

CSV Total#285 File #118 filename: Sub II 6.20.csv  
CSV Total#286 File #119 filename: Sub II 6.21.csv  
CSV Total#287 File #120 filename: Sub II 6.22.csv  
CSV Total#288 File #121 filename: Sub II 6.23.csv  
CSV Total#289 File #122 filename: Sub II 6.24.csv  
CSV Total#290 File #123 filename: Sub II 6.27.csv  
CSV Total#291 File #124 filename: Sub II 6.28.csv  
CSV Total#292 File #125 filename: Sub II 6.29.csv  
CSV Total#293 File #126 filename: Sub II 6.3.csv  
CSV Total#294 File #127 filename: Sub II 6.30.csv  
CSV Total#295 File #128 filename: Sub II 6.6.csv  
CSV Total#296 File #129 filename: Sub II 6.7.csv  
CSV Total#297 File #130 filename: Sub II 6.8.csv  
CSV Total#298 File #131 filename: Sub II 6.9.csv  
CSV Total#299 File #132 filename: Sub II 7.1.csv  
CSV Total#300 File #133 filename: Sub II 7.11.csv  
CSV Total#301 File #134 filename: Sub II 7.12.csv  
CSV Total#302 File #135 filename: Sub II 7.4.csv  
CSV Total#303 File #136 filename: Sub II 7.5.csv  
CSV Total#304 File #

CSV Total#419 File #7 filename: 2459619_18_20220208001819_dmhmreport_DAVIDK_1831086.csv  
CSV Total#420 File #8 filename: 2459619_19_20220208001921_dmhmreport_DAVIDK_2038850.csv  
CSV Total#421 File #9 filename: 2459626_122_20220215020227_dmhmreport_JENNIFERDUP_2584879.csv  
CSV Total#422 File #10 filename: 2459626_22_20220215002203_dmhmreport_DAVIDK_38819.csv  
CSV Total#423 File #11 filename: 2459626_22_20220215002253_dmhmreport_DAVIDK_5113408.csv  
CSV Total#424 File #12 filename: 2459633_124_20220222020422_dmhmreport_JENNIFERDUP_21132572.csv  
CSV Total#425 File #13 filename: 2459633_16_20220222001618_dmhmreport_DAVIDK_1714096.csv  
CSV Total#426 File #14 filename: 2459633_17_20220222001735_dmhmreport_DAVIDK_3418948.csv  
CSV Total#427 File #15 filename: 2459640_15_20220301001518_dmhmreport_DAVIDK_1729049.csv  
CSV Total#428 File #16 filename: 2459640_17_20220301001720_dmhmreport_DAVIDK_1837058.csv  
CSV Total#429 File #17 filename: 2459641_133_20220302021357_dmhmreport_JENNIFERDUP

CSV Total#504 File #32 filename: 2459550_375_20211201061529_dmhmreport_EHRSUPPORT_2128918.csv  
CSV Total#505 File #33 filename: 2459550_44_20211201004428_dmhmreport_EHRSUPPORT_2744668.csv  
CSV Total#506 File #34 filename: 2459550_57_20211201005705_dmhmreport_EHRSUPPORT_313122.csv  
CSV Total#507 File #35 filename: 2459612_33_20220201003332_dmhmreport_EHRSUPPORT_3033211.csv  
CSV Total#508 File #36 filename: 2459612_39_20220201003948_dmhmreport_EHRSUPPORT_4548543.csv  
CSV Total#509 File #37 filename: 2459612_532_20220201085244_dmhmreport_EHRSUPPORT_3424247.csv  
CSV Total#510 File #38 filename: 2459640_51_20220301005105_dmhmreport_EHRSUPPORT_425889.csv  
CSV Total#511 File #39 filename: 2459640_53_20220301005341_dmhmreport_EHRSUPPORT_3942776.csv  
CSV Total#512 File #40 filename: 2459641_376_20220302061611_dmhmreport_EHRSUPPORT_234503.csv  
CSV Total#513 File #41 filename: 2459671_62_20220401010243_dmhmreport_EHRSUPPORT_428943.csv  
CSV Total#514 File #42 filename: 2459671_77_2022040

CSV Total#607 File #13 filename: 2459266_124_20210220020407_dmhmreport_EHRSUPPORT_5101165.csv  
CSV Total#608 File #14 filename: 2459267_87_20210221012730_dmhmreport_EHRSUPPORT_20110196.csv  
CSV Total#609 File #15 filename: 2459268_82_20210222012258_dmhmreport_EHRSUPPORT_5711451.csv  
CSV Total#610 File #16 filename: 2459269_146_20210223022631_dmhmreport_EHRSUPPORT_2991841.csv  
CSV Total#611 File #17 filename: 2459270_84_20210224012458_dmhmreport_EHRSUPPORT_5746348.csv  
CSV Total#612 File #18 filename: 2459271_127_20210225020726_dmhmreport_EHRSUPPORT_2634363.csv  
CSV Total#613 File #19 filename: 2459272_85_20210226012535_dmhmreport_EHRSUPPORT_33138346.csv  
CSV Total#614 File #20 filename: 2459273_85_20210227012541_dmhmreport_EHRSUPPORT_3978967.csv  
CSV Total#615 File #21 filename: 2459275_85_20210301012538_dmhmreport_EHRSUPPORT_36135169.csv  
CSV Total#616 File #22 filename: 2459276_85_20210302012518_dmhmreport_EHRSUPPORT_1685734.csv  
CSV Total#617 File #23 filename: 2459277_85_

CSV Total#694 File #100 filename: 2459316_12_20210411001241_dmhmreport_MICHAELO_396134.csv  
CSV Total#695 File #101 filename: 2459316_326_20210411052659_dmhmreport_EHRSUPPORT_5896722.csv  
CSV Total#696 File #102 filename: 2459317_79_20210412011951_dmhmreport_EHRSUPPORT_50120121.csv  
CSV Total#697 File #103 filename: 2459317_8_20210412000838_dmhmreport_MICHAELO_3732179.csv  
CSV Total#698 File #104 filename: 2459318_113_20210413015321_dmhmreport_EHRSUPPORT_139574.csv  
CSV Total#699 File #105 filename: 2459318_30_20210413003021_dmhmreport_MICHAELO_193706.csv  
CSV Total#700 File #106 filename: 2459319_19_20210414001919_dmhmreport_MICHAELO_1848370.csv  
CSV Total#701 File #107 filename: 2459319_85_20210414012540_dmhmreport_EHRSUPPORT_3989585.csv  
CSV Total#702 File #108 filename: 2459320_101_20210415014112_dmhmreport_EHRSUPPORT_1065020.csv  
CSV Total#703 File #109 filename: 2459320_10_20210415001052_dmhmreport_MICHAELO_5116652.csv  
CSV Total#704 File #110 filename: 2459321_13_20210

CSV Total#782 File #188 filename: 2459359_96_20210524013657_dmhmreport_EHRSUPPORT_568031.csv  
CSV Total#783 File #189 filename: 2459360_120_20210525020043_dmhmreport_EHRSUPPORT_3746367.csv  
CSV Total#784 File #190 filename: 2459360_6_20210525000617_dmhmreport_MICHAELO_1647893.csv  
CSV Total#785 File #191 filename: 2459361_20_20210526002052_dmhmreport_MICHAELO_4948209.csv  
CSV Total#786 File #192 filename: 2459361_98_20210526013805_dmhmreport_EHRSUPPORT_4109529.csv  
CSV Total#787 File #193 filename: 2459362_13_20210527001352_dmhmreport_MICHAELO_5120958.csv  
CSV Total#788 File #194 filename: 2459362_87_20210527012739_dmhmreport_EHRSUPPORT_38147369.csv  
CSV Total#789 File #195 filename: 2459363_87_20210528012724_dmhmreport_EHRSUPPORT_2396593.csv  
CSV Total#790 File #196 filename: 2459363_9_20210528000917_dmhmreport_MICHAELO_1632546.csv  
CSV Total#791 File #197 filename: 2459364_100_20210529014056_dmhmreport_EHRSUPPORT_5520345.csv  
CSV Total#792 File #198 filename: 2459364_6_2021

CSV Total#869 File #275 filename: 2459403_86_20210707012656_dmhmreport_EHRSUPPORT_5542658.csv  
CSV Total#870 File #276 filename: 2459404_106_20210708014648_dmhmreport_EHRSUPPORT_4141473.csv  
CSV Total#871 File #277 filename: 2459404_13_20210708001302_dmhmreport_JUSTINJ_013403.csv  
CSV Total#872 File #278 filename: 2459405_11_20210709001150_dmhmreport_JUSTINJ_4819385.csv  
CSV Total#873 File #279 filename: 2459405_86_20210709012604_dmhmreport_EHRSUPPORT_3136067.csv  
CSV Total#874 File #280 filename: 2459406_119_20210710015945_dmhmreport_EHRSUPPORT_4452416.csv  
CSV Total#875 File #281 filename: 2459406_12_20210710001201_dmhmreport_JUSTINJ_022485.csv  
CSV Total#876 File #282 filename: 2459407_599_20210711095931_dmhmreport_EHRSUPPORT_2968162.csv  
CSV Total#877 File #283 filename: 2459407_9_20210711001001_dmhmreport_JUSTINJ_5710123.csv  
CSV Total#878 File #284 filename: 2459408_108_20210712014848_dmhmreport_EHRSUPPORT_4330496.csv  
CSV Total#879 File #285 filename: 2459408_9_2021071

CSV Total#957 File #363 filename: 2459447_13_20210820001336_dmhmreport_JUSTINJ_3447846.csv  
CSV Total#958 File #364 filename: 2459448_14_20210821001432_dmhmreport_JUSTINJ_3025561.csv  
CSV Total#959 File #365 filename: 2459448_85_20210821012503_dmhmreport_EHRSUPPORT_260940.csv  
CSV Total#960 File #366 filename: 2459449_100_20210822014011_dmhmreport_EHRSUPPORT_1076225.csv  
CSV Total#961 File #367 filename: 2459449_16_20210822001610_dmhmreport_JUSTINJ_811034.csv  
CSV Total#962 File #368 filename: 2459450_12_20210823001202_dmhmreport_JUSTINJ_113860.csv  
CSV Total#963 File #369 filename: 2459450_98_20210823013901_dmhmreport_EHRSUPPORT_5992847.csv  
CSV Total#964 File #370 filename: 2459451_133_20210824021332_dmhmreport_EHRSUPPORT_2644690.csv  
CSV Total#965 File #371 filename: 2459451_27_20210824002749_dmhmreport_JUSTINJ_477304.csv  
CSV Total#966 File #372 filename: 2459452_103_20210825014351_dmhmreport_EHRSUPPORT_5035271.csv  
CSV Total#967 File #373 filename: 2459452_14_20210825001

CSV Total#1044 File #450 filename: 2459491_101_20211003014124_dmhmreport_EHRSUPPORT_2364610.csv  
CSV Total#1045 File #451 filename: 2459491_10_20211003001042_dmhmreport_JUSTINJ_4129072.csv  
CSV Total#1046 File #452 filename: 2459492_17_20211004001706_dmhmreport_JUSTINJ_515411.csv  
CSV Total#1047 File #453 filename: 2459492_80_20211004012001_dmhmreport_EHRSUPPORT_0109395.csv  
CSV Total#1048 File #454 filename: 2459493_18_20211005001809_dmhmreport_JUSTINJ_81084.csv  
CSV Total#1049 File #455 filename: 2459493_99_20211005014000_dmhmreport_EHRSUPPORT_5821967.csv  
CSV Total#1050 File #456 filename: 2459494_15_20211006001551_dmhmreport_JUSTINJ_5022060.csv  
CSV Total#1051 File #457 filename: 2459494_85_20211006012535_dmhmreport_EHRSUPPORT_3434123.csv  
CSV Total#1052 File #458 filename: 2459495_105_20211007014525_dmhmreport_EHRSUPPORT_2028156.csv  
CSV Total#1053 File #459 filename: 2459495_26_20211007002604_dmhmreport_JUSTINJ_17456.csv  
CSV Total#1054 File #460 filename: 2459496_16_20

CSV Total#1130 File #536 filename: 2459534_102_20211115014235_dmhmreport_EHRSUPPORT_3438021.csv  
CSV Total#1131 File #537 filename: 2459534_15_20211115001517_dmhmreport_JUSTINJ_1633104.csv  
CSV Total#1132 File #538 filename: 2459535_119_20211116015937_dmhmreport_EHRSUPPORT_3143029.csv  
CSV Total#1133 File #539 filename: 2459535_18_20211116001805_dmhmreport_JUSTINJ_446430.csv  
CSV Total#1134 File #540 filename: 2459536_14_20211117001448_dmhmreport_JUSTINJ_4741880.csv  
CSV Total#1135 File #541 filename: 2459536_86_20211117012625_dmhmreport_EHRSUPPORT_2323558.csv  
CSV Total#1136 File #542 filename: 2459537_100_20211118014012_dmhmreport_EHRSUPPORT_1064153.csv  
CSV Total#1137 File #543 filename: 2459537_22_20211118002230_dmhmreport_JUSTINJ_2937950.csv  
CSV Total#1138 File #544 filename: 2459538_17_20211119001730_dmhmreport_JUSTINJ_289524.csv  
CSV Total#1139 File #545 filename: 2459538_85_20211119012523_dmhmreport_EHRSUPPORT_21122086.csv  
CSV Total#1140 File #546 filename: 2459539_

CSV Total#1217 File #623 filename: 2459577_86_20211228012639_dmhmreport_EHRSUPPORT_3859395.csv  
CSV Total#1218 File #624 filename: 2459578_14_20211229001419_dmhmreport_JUSTINJ_1945633.csv  
CSV Total#1219 File #625 filename: 2459578_85_20211229012522_dmhmreport_EHRSUPPORT_21113524.csv  
CSV Total#1220 File #626 filename: 2459579_18_20211230001844_dmhmreport_JUSTINJ_4435932.csv  
CSV Total#1221 File #627 filename: 2459579_99_20211230013933_dmhmreport_EHRSUPPORT_3250402.csv  
CSV Total#1222 File #628 filename: 2459580_16_20211231001633_dmhmreport_JUSTINJ_3130930.csv  
CSV Total#1223 File #629 filename: 2459580_85_20211231012531_dmhmreport_EHRSUPPORT_3070070.csv  
CSV Total#1224 File #630 filename: 2459581_11_20220101001158_dmhmreport_JUSTINJ_5726150.csv  
CSV Total#1225 File #631 filename: 2459581_87_20220101012713_dmhmreport_EHRSUPPORT_1250902.csv  
CSV Total#1226 File #632 filename: 2459582_18_20220102001829_dmhmreport_JUSTINJ_2833271.csv  
CSV Total#1227 File #633 filename: 2459582_9

CSV Total#1303 File #709 filename: 2459621_106_20220210014626_dmhmreport_EHRSUPPORT_2557594.csv  
CSV Total#1304 File #710 filename: 2459621_7_20220210000725_dmhmreport_JUSTINJ_2417737.csv  
CSV Total#1305 File #711 filename: 2459622_100_20220211014008_dmhmreport_EHRSUPPORT_732081.csv  
CSV Total#1306 File #712 filename: 2459622_20_20220211002049_dmhmreport_JUSTINJ_4822372.csv  
CSV Total#1307 File #713 filename: 2459623_14_20220212001420_dmhmreport_JUSTINJ_2013360.csv  
CSV Total#1308 File #714 filename: 2459623_86_20220212012624_dmhmreport_EHRSUPPORT_2324327.csv  
CSV Total#1309 File #715 filename: 2459624_387_20220213062728_dmhmreport_EHRSUPPORT_2676089.csv  
CSV Total#1310 File #716 filename: 2459624_9_20220213000928_dmhmreport_JUSTINJ_2743964.csv  
CSV Total#1311 File #717 filename: 2459625_81_20220214012142_dmhmreport_EHRSUPPORT_4167766.csv  
CSV Total#1312 File #718 filename: 2459625_9_20220214000913_dmhmreport_JUSTINJ_1125335.csv  
CSV Total#1313 File #719 filename: 2459626_103

CSV Total#1390 File #796 filename: 2459668_101_20220329014141_dmhmreport_EHRSUPPORT_39118013.csv  
CSV Total#1391 File #797 filename: 2459668_25_20220329002551_dmhmreport_JUSTINJ_477111.csv  
CSV Total#1392 File #798 filename: 2459669_34_20220330003404_dmhmreport_JUSTINJ_27905.csv  
CSV Total#1393 File #799 filename: 2459670_101_20220331014153_dmhmreport_EHRSUPPORT_5113840.csv  
CSV Total#1394 File #800 filename: 2459670_32_20220331003224_dmhmreport_JUSTINJ_2248359.csv  
CSV Total#1395 File #801 filename: 2459671_30_20220401003045_dmhmreport_JUSTINJ_4228309.csv  
CSV Total#1396 File #802 filename: 2459672_140_20220402022057_dmhmreport_EHRSUPPORT_56120372.csv  
CSV Total#1397 File #803 filename: 2459672_7_20220402000720_dmhmreport_JUSTINJ_1918763.csv  
CSV Total#1398 File #804 filename: 2459673_137_20220403021706_dmhmreport_EHRSUPPORT_5141202.csv  
CSV Total#1399 File #805 filename: 2459673_8_20220403000841_dmhmreport_JUSTINJ_3931614.csv  
CSV Total#1400 File #806 filename: 2459674_10_2

CSV Total#1477 File #883 filename: 2459713_85_20220513012512_dmhmreport_EHRSUPPORT_1164615.csv  
CSV Total#1478 File #884 filename: 2459714_7_20220514000728_dmhmreport_JUSTINJ_27448.csv  
CSV Total#1479 File #885 filename: 2459714_86_20220514012606_dmhmreport_EHRSUPPORT_4131546.csv  
CSV Total#1480 File #886 filename: 2459715_6_20220515000701_dmhmreport_JUSTINJ_5917845.csv  
CSV Total#1481 File #887 filename: 2459715_95_20220515013551_dmhmreport_EHRSUPPORT_50106890.csv  
CSV Total#1482 File #888 filename: 2459716_19_20220516001955_dmhmreport_JUSTINJ_5442750.csv  
CSV Total#1483 File #889 filename: 2459716_80_20220516012031_dmhmreport_EHRSUPPORT_29131845.csv  
CSV Total#1484 File #890 filename: 2459717_17_20220517001734_dmhmreport_JUSTINJ_3333876.csv  
CSV Total#1485 File #891 filename: 2459717_87_20220517012708_dmhmreport_EHRSUPPORT_614549.csv  
CSV Total#1486 File #892 filename: 2459718_271_20220518043109_dmhmreport_JUSTINJ_84856.csv  
CSV Total#1487 File #893 filename: 2459718_87_202

CSV Total#1564 File #970 filename: 2459757_10_20220626001047_dmhmreport_JUSTINJ_4533080.csv  
CSV Total#1565 File #971 filename: 2459757_82_20220626012202_dmhmreport_EHRSUPPORT_1127437.csv  
CSV Total#1566 File #972 filename: 2459758_14_20220627001428_dmhmreport_JUSTINJ_2712503.csv  
CSV Total#1567 File #973 filename: 2459758_82_20220627012227_dmhmreport_EHRSUPPORT_2615453.csv  
CSV Total#1568 File #974 filename: 2459759_7_20220628000728_dmhmreport_JUSTINJ_2637035.csv  
CSV Total#1569 File #975 filename: 2459759_89_20220628012909_dmhmreport_EHRSUPPORT_768971.csv  
CSV Total#1570 File #976 filename: 2459760_11_20220629001116_dmhmreport_JUSTINJ_1510083.csv  
CSV Total#1571 File #977 filename: 2459760_99_20220629013937_dmhmreport_EHRSUPPORT_3669165.csv  
CSV Total#1572 File #978 filename: 2459761_104_20220630014440_dmhmreport_EHRSUPPORT_3951989.csv  
CSV Total#1573 File #979 filename: 2459761_12_20220630001226_dmhmreport_JUSTINJ_2638188.csv  
CSV Total#1574 File #980 filename: 2459762_12_

In [None]:
# Persist the csv files in an excel import file Registry 
Data_Import_Starting_Directory =  'J:/IT GLIN Data Services Shared/TempData/'
Excel_file_Name = Data_Import_Starting_Directory + 'Discovered_CSV_files_to_import.xlsx'
out('Registering Directories in excel File:{} '.format(Excel_file_Name))
df_import_files.to_excel(Excel_file_Name, index=False)
