## Optional Precursor Step  - Import Dependent Python Libraries  (If Not Installed) 
#####  PIP Install any Python Libraries below that you dont already have installed 

In [None]:
#PIP Install any Python Libraries you dont already have installed 
#!pip install pyttsx3
#!pip install pandas
#!pip install pyodbc
#!pip import sqlalchemy

## Create SQL Schema from CSV Files 
#### Joe Eberle, Alan Calhoun, Helmi (Al)  Seoud
##### Refactored ON  : 9/20/2022  ---  Revised ON  : 10/6/2022

## Project Setup - Importing Libraries and Initializing Global Variables 

In [1]:
#install dependent Libraries is not already installed 
#!pip install pyttsx3

# Import the necessary Libraries 
import glob, os
import pandas as pd
# import logging 
from pathlib import Path
import pyttsx3
import pyodbc 
import timeit
import time
from datetime import date
from datetime import datetime
import sqlalchemy

# Establish some import parameters 

importing_xlsx_files = False 
importing_csv_files = True 
Data_Import_Starting_Directory = 'Y:/_Kaleida_Input/'
#Data_Import_Starting_Directory = 'C:/Data/'
Process_Name = 'Importing CSV data into SQL'

step_debugging = True
detail_debugging = True
detail_Talking = False # only talk on major steps 
Process_Step_Name = ''  
Reading_Intro = False
Reading_Credits =  False
Reading_Steps = False 
Reading_Terms = False 
printing_output = True
Talking_Code = True
Talking_Voice_Male_Gender = True        # Set to False for Female Voice 
Code_Logging  = True 
event_log_row = 0 

# Create some Global Variables for SQL Constructs 
column_inserts = ''
column_question_mark   = ''
create_table_SQL  = ''
create_real_table_SQL  = ''
insert_records_SQL  = ''
create_schema_SQL  = '' 
create_real_schema_SQL  = '' 
Table_Name_Extension_Daily = '_DI'
Table_Name_Extension_Historical = '_HX'
Table_Name_Extension_Rejected = '_RJ'
Table_Name_Extension_Administrative = '_AD'
Table_Name_Prefix = '[pbic_1_0].'

# Create some Global Variables for SQL Connection
server = 'Kalpwvsqlgppc01' 
database  = 'GPPC_DEV' 
username ='GPPC'
pwd = 'Elephant-Trunk-06'
sql_connector = 'DRIVER={SQL Server};SERVER='+server+';DATABASE='+database+';Trusted_Connection=No;UID='+username+';PWD='+pwd
# log_filename='data_importing_log.log'

# Configure the Logging to the DEBUG Level 
# logging.basicConfig(level=logging.DEBUG, filename=log_filename, format= '%(asctime)s %(clientip)-15s %(user)-8s %(message)s')
Text_to_Speech = pyttsx3.init()

## Dictionary For character_replacements List 
character_replacements = { " ":"_","#":"Number","%":"Percentage" \
                         ,'_Unnamed':'','_Level':'',"$":"Dollar",'_1':'' \
                         ,'_2':'','_3':'','_4':'','_5':''  \
                         ,'_6':'','_7':'','_8':'','_9':''  \
                         ,'_0':'',':7':'',':8':'',':':'' }
## Dictionary For replacing data types in databases 
data_type_replacements = { "object":"varchar","float64":"float","int64":"int","%":"Percentage" \
                         ,'_Unnamed':'','datetime64':'timestamp',"timedelta64[ns]":"varchar"}    

## add the glaobal data frames for event loggging and Schema Creation 
df_event_log = pd.DataFrame(columns = ('Event_ID','Process_Name','Event_Name','Event_Date','Event_Time','Task_Start_Time','Task_End_Time','TASk_Duration','Comments'))
df_import_directories = pd.DataFrame(columns = ('Root_Directory','Sub_Directory'))
df_import_files = pd.DataFrame(columns = ('Root_Directory','Sub_Directory','Table_Name','Import_File_Name','File_Size','File_Modified_Date','File_Created_Date'))
import_directory_file_Number  = 0
import_file_Number  = 0 
event_log_row = 0
sub_directory_count = 0 


## Customization - Special Formulas for GPPC 

In [19]:
def customize_table_names(table_name):
    table_name.replace('ADPDaily_Time_Card','Time_Card').replace('Available_Slots_Past','Available_Slots')  \
    .replace('Available_Slots_Past','Available_Slots').replace('DailyAppointments','Appointments') \
    .replace('DailyAppointments','Appointments').replace('HXDX','Historical') \
    .replace('PatientInformationMain18Under','Patient_Information')  \
    .replace('QualityPrimaryCareAnnualVisitReport','Primary_Care_Annual_Visit') \
    .replace('RCM_Adujstments','RCM_Adjustments') \
    .replace('SurgicalAppointmentSummar2DaysAgo','Surgical_Appointment') \
    .replace('SurgicalAppointmentSummar2DaysAgo','Primary_Care_Annual_Visit') \
    .replace('Third_Next_Available','Third_Next_Available') \
    .replace('Third_Next_Available','Third_Next_Available') \
    .replace('Third_Next_Available','Third_Next_Available') \
    .replace('Third_Next_Available','Third_Next_Available') 

    return table_name


def customize_column_name_clean_up(df_to_clean):
    df_to_clean.rename(columns={'Name_':'Name','Appt_Date':'Appointment_Date','Appt_Type':'Appointment_Type' \
                                ,'EbillEsuperbillNumber':'Ebill_Esuper_Bill_Number' \
                                ,'Referral_FromTodo_Selection':'Referral_From_To_Do_Selection' \
                                ,'Wellnow_LocationTodo_Selection':'Wellnow_Location_To_Do_Selection' \
                                ,'Access_Vip':'Access_VIP' \
                                ,'Vip':'VIP' \
                                ,'Todo_DateCreate_Date':'To_Do_Date_Create_Date' \
                               }, inplace = True)
    return df_to_clean


## Project Setup -    Establishing DataFrames & Establishing Global Functions

In [23]:
global Talking_Code
global import_directory_file_Number 
global event_log_row 
global printing_output


def set_up_python_infrastructure():
    initialize_replacement_Dictionaries()    # Set up control libraries for syntactic Consistency 
    Initialize_Text_to_Speach()              # Intitialize Text to Speech Engine 
    df_e_log = create_event_log_dataframe()  # Set up the Event Logging 
    
# Say Whatever the user wants 
def say(speech):
    if Talking_Code:
        Text_to_Speech.say(speech)
        Text_to_Speech.runAndWait()    
    
# Intitialize Text to Speech Engine 
def Initialize_Text_to_Speach():
    Text_to_Speech = pyttsx3.init()
    Text_to_Speech.setProperty('Rate',187)
    voices = Text_to_Speech.getProperty('voices')
    if Talking_Voice_Male_Gender:
        Text_to_Speech.setProperty('voice', voices[0].id)    # Default Male voice registered as 'Dave'
    else: 
        Text_to_Speech.setProperty('voice', voices[1].id)    # Alternate Female voice registered as 'Tina'
    speech = 'The text to speech engine is initialized using pythons pyttsx3 engine'
    Text_to_Speech.say(speech)
    Text_to_Speech.runAndWait()    
    
    
# Say Whatever the user wants 
def say(speech):
    Text_to_Speech.say(speech)
    Text_to_Speech.runAndWait()       
    
# Create dataframe to house Directories 
def create_directory_dataframe():
    df_import_directories = pd.DataFrame(columns = ('Root_Directory','Sub_Directory'))
    return df_import_directories  

def create_import_files_dataframe():
    df_import_files = pd.DataFrame(columns = ('Root_Directory','Sub_Directory','Table_Name','File_Name','File_Size_Bytes','File_Created','File_Modified'))
    return df_import_files  

def add_import_directory(Root_Directory,Sub_Directory):
    global import_directory_file_Number 
    import_directory_file_Number += 1  
    df_import_directories.loc[import_directory_file_Number] = [Root_Directory,Sub_Directory]
    return import_directory_file_Number 

def add_import_file(Root_Directory,Sub_Directory,Table_Name,File_Name,File_Size_Bytes,File_Created,File_Modified):
    global import_file_Number 
    import_file_Number += 1  
    df_import_files.loc[import_file_Number] = [Root_Directory,Sub_Directory,Table_Name,File_Name,File_Size_Bytes,File_Created,File_Modified]
    return import_directory_file_Number  

# Create dataframe to house Directories 
def create_event_log_dataframe():
    df_event_log = pd.DataFrame(columns = ('Event_ID','Process_Name','Event_Name','Event_Date','Event_Time','Task_Start_Time','Task_End_Time','TASk_Duration','Comments'))
    return df_event_log


df_event_log = create_event_log_dataframe()

# Create dataframe to database schema 
def create_database_schema_dataframe():
    df_schema = pd.DataFrame(columns = ('Database_Name','Table_Name','Column_Number','Column_Name','Column_Data_Type','Column_Sample_Data','Column_Description'))
    return df_schema

# Create dataframe to house Directories 
def add_log_event(Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments ):
    global event_log_row  
    event_log_row += 1  
    df_event_log.loc[event_log_row] = [event_log_row,Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments]

    
# Add Log Events 
def add_log_event_timer(Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments ):
    global event_log_row  
    event_log_row += 1  
    Event_Date = date.today()
    Event_Time = time.time() 
    df_event_log.loc[event_log_row] = [event_log_row,Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments]
    
# Reset the Event timer start time 
def reset_event_timer(Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments ):
    global event_log_row  
    event_log_row += 1  
    Event_Date = date.today()
    Event_Time = time.time() 
    Task_Start_Time    = time.time()  
    df_event_log.loc[event_log_row] = [event_log_row,Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , Comments]

    # Get a list of all the Subfiles to iterate through 
def list_all_csv_files(path):
    extension = 'csv'
    os.chdir(path)
    print('CSV Files to Import from Directory:', path)
    csv_file_count = 0
    for file in glob.glob('*.{}'.format(extension)):
        csv_file_count += 1 
        out('CSV File #{} filename: {}  '.format(str(csv_file_count),file))

# Get a list of all the Subfiles to iterate through 
def register_all_csv_files_for_import(path, table_name):
    extension = 'csv'
    os.chdir(path)
    print('CSV Files to Import from Directory:', path)
    csv_file_count = 0
    for file in glob.glob('*.{}'.format(extension)):
        csv_file_count += 1 
        File_Size = os.path.getsize(file)
        File_Last_Modified =  time.ctime(os.path.getmtime(file))
        File_Create_Date =  time.ctime(os.path.getctime(file))
        out('CSV File #{} filename: {}  '.format(str(csv_file_count),file)) 
        file_Number = add_import_file(path,path,table_name,file,File_Size,File_Last_Modified,File_Create_Date)
        
def create_import_files_dataframe():
    df_import_files = pd.DataFrame(columns = ('Root_Directory','Sub_Directory','Table_Name','File_Name'))
    return df_import_files  

def add_import_directory(Root_Directory,Sub_Directory):
    global import_directory_file_Number 
    import_directory_file_Number += 1  
    df_import_directories.loc[import_directory_file_Number] = [Root_Directory,Sub_Directory]
    return import_directory_file_Number 

def add_import_File(Root_Directory,Sub_Directory,Table_Name,File_Name):
    global import_file_Number 
    import_file_Number += 1  
    df_import_files.loc[import_file_Number] = [Root_Directory,Sub_Directory,Table_Name,File_Name]
    return import_directory_file_Number          
        
    
def infer_table_name_from_path(path):
    table_name = path.replace(Data_Import_Starting_Directory,"").replace('Y:/_Kaleida_Input/','').replace('Y:_Kaleida_Input','').replace(' ','_').replace('/','').replace('\\','')
    table_name = customize_table_names(table_name)
    return table_name 
        
# Introduction - Overview of CSV to SQL Import Process Steps 
def read_credits(): 
    Dialog = 'This Jupiter Notebook Was  : '
    Dialog = Dialog + 'Developed in Collaboration by Joe Eberle, Alan Calhoun, Helmi (Al) Seoud  '
    Dialog = Dialog + 'Developed in Python starting on 9/20/2022 '
    Dialog = Dialog + 'This package is free AND Open Source and the code is openly available for general Use. '    
    say(Dialog)         
    
# Introduction - Overview of CSV to SQL Import Process Steps 
def read_terms(): 
    Dialog = 'The terminology for this process is : '
    Dialog = Dialog + 'Python. Python is a general-purpose programming language that is widely used for data science.  '
    Dialog = Dialog + 'Structured Query Language (SQL) is one of the worlds most widely used programming languages for manipulating and querying data. '
    Dialog = Dialog + 'CSV. A Comma-Separated Values (CSV)  file is a text file in which information is separated by commas. '
    Dialog = Dialog + 'PANDAS. Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool, built on top of the Python programming language.  '
    Dialog = Dialog + 'OS PACKAGE - The OS python library provides a portable way of using operating system dependent functionality to allow your python code to run on all platforms '
    say(Dialog)  
    
# Process Steps - Overview of CSV to SQL Import Process Steps 
def read_process_steps():
    Dialog = 'The data flow for this process is : '
    Dialog = Dialog + 'Precursor Step 1: The clinician or administrator enters the patients data into the Electronic Medical Record (EMR). '
    Dialog = Dialog + 'Precursor Step 2: At the end of the day the EMR data is exported into Comma Seperated Values (CSV) files and shared via SFTP. '
    Dialog = Dialog + 'Step 1: Establish The Root Directory. '
    Dialog = Dialog + 'Step 2: Walk the directory structure discovering data to discover all data directories  '
    Dialog = Dialog + 'Step 3: Read the CSV data from each directory into python a PANDAS Dataframe. '
    Dialog = Dialog + 'Step 4: Clean the data and make it consistent in the PANDAS Dataframe. ' 
    Dialog = Dialog + 'Step 5: Check the consistency of the data and perform change control if there are differences. ' 
    Dialog = Dialog + 'Step 6: Convert the pandas dataframes into SQL table Create Statements  '
    Dialog = Dialog + 'Step 7: Creates the SQL tables in the target Database   '
    Dialog = Dialog + 'Step 8: Insert the the PANDAS Rows into SQL using the to_SQL Method.  '
    Dialog = Dialog + 'Step 9: Add event logging to capture the performance of the entire process.  '
    Dialog = Dialog + 'Step 10: Document the SCHEMA into an easy to use Excel Spreadsheet.  '
    Dialog = Dialog + 'Step 11: Check the total number of records imported via SQL to the total raw record count to make sure no data is Left Behind.    '
    say(Dialog)    
    
    
# Introduction - Overview of NoteBooks  
def read_introduction():
    Dialog = 'This jupiter notebook will import all of the CSV files under a specific root directory into a database. '
    Dialog = Dialog +  'This python code will take the CSV files exported froms an Electronic Medical Record platform. '
    Dialog = Dialog + 'and import them into a faster database such as PostgreSQL or SQL Server or SNOW Flake. '
    Dialog = Dialog + 'the data is then available for anaylsis using query tools or ready for visualizations in Power BI or Tableau. '
    say(Dialog)  
    
    
def column_create_SQL (import_df):
    column_name_List = [x.title() for x in import_df.columns] # Create a List of Columns 
    column_Str =  (', '.join(column_name_List)) # Convert List into one String with commas 
    out('Columns =',column_Str)  
    return column_Str            
    
    
def out(dialog):
    global detail_Talking
    if printing_output: 
        print(dialog) 
    if Talking_Code and detail_Talking == True:
        say(dialog)   
    if Talking_Code and (detail_Talking == False) and (dialog.find('step') >= 0):
        say(dialog)     

        
def list_all_xlsx_files(path):
    extension = 'xlsx'
    os.chdir(path)
    csv_file_count = 0
    for file in glob.glob('*.{}'.format(extension)):
        csv_file_count += 1 
        out('File #{}   is {} '.format(csv_file_count,file))     \
        
        
def explain_the_project():
    if Reading_Intro:
        read_introduction()
    if Reading_Credits:    
        read_credits() 
    if Reading_Steps:
        read_process_steps()
    if Reading_Terms:
        read_terms()        
        
        
        
def convert_data_types(input_df):
    column_datatype_str = str(input_df.dtypes)
    column_datatype_str =  column_datatype_str.replace('dtype: object','').replace('object','varchar[255], ').replace('datetime64[ns]','timestamp, ').replace('float64','float, ')
    out('create column SQL string: {} \n'.format(column_datatype_str))        
        
def read_and_clean_file(data_folder, filename):

    print('Import File =', filename)                 
    df_input_csv = pd.read_csv(filename, nrows=10)
    out('Reading Dataframe Columns before cleanups:{}'.format(df_input_csv.columns))
    df_input_csv.columns = df_input_csv.columns.map('^'.join)
    df_input_csv.columns  = [x.strip().title().replace("^","").replace(" ","_").replace("#","Number").replace("#","Number").replace("%","Percentage") \
                             .replace('_Unnamed','').replace('Unnamed','').replace('Unnamed:','').replace('_Level','').replace("$","Dollar") \
                             .replace('_1','').replace('_2','').replace('_3','').replace('_4','').replace('_5','')  \
                             .replace('_6','').replace('_7','').replace('_8','').replace('_9','')  \
                             .replace('1','').replace('2','').replace('3','').replace('4','').replace('5','')  \
                             .replace('6','').replace('7','').replace('8','').replace('9','').replace('0','')  \
                             .replace('_0','').replace(':7','').replace(':8','').replace(':','').replace('Unnamed: ','')  \
                             for x in df_input_csv.columns]
    return df_input_csv    
 
        
    
def set_up_python_infrastructure():
    initialize_replacement_Dictionaries()    # Set up control libraries for syntactic Consistency 
    Initialize_Text_to_Speach()              # Intitialize Text to Speech Engine 
    df_event_log = create_event_log_dataframe()  # Set up the Event Logging to housae the events of this process 
    create_database_schema_dataframe()       # Set up the Database Schema dataframe to house the schema

## Project Setup -    Database Connectivity & SQL Generation & SQL Execution Functions

In [4]:
#Execute SQL  Dynamically based upon the Connection string  
def execute_SQL(execute_SQL_command):
    global sql_connector
    print('Execute SQL Connect - Call')
    cnxn = pyodbc.connect(sql_connector)
    cursor = cnxn.cursor()
    sql_execute_result = cursor.execute(execute_SQL_command)
    print('After SQL Call','Result Code: ',sql_execute_result)
    out('Executing SQL - After SQL Execute - Command: {} '.format(execute_SQL_command))    
    
    cnxn.commit()
    cursor.close()
    
# The following code uses a SQL server template for Droping tables and replaces the table name 
# spo that it will create the SQL code will drop ANY Table name that is passed to it 
def Create_Drop_Table_SQL(table_name):  
    # Example ---- DROP TABLE [pbic_1_0].[Access_DI]
    Drop_Table_SQL  = 'DROP TABLE [pbic_1_0].[{}]'.format(table_name)
    return Drop_Table_SQL    

In [5]:
print('Talking Code Setting: ',Talking_Code)
print('Talking Code Detail Setting: ',detail_Talking)
if Talking_Code:
    if Talking_Voice_Male_Gender: 
        out('Talking_Code: Text to Voice set to ON. Voice set  to  Default Male Voice. ')
    else: 
        out('Talking_Code: Text to Voice set to ON. Voice set  to  Alternate Female Voice. ')
    if  detail_Talking:   
        out('Talking  Details is set to True say all detailed outputs..  this will be slow and boring ') 
    else: 
        out('Talking  Details is set to False to say only major steps..  this setting will execute fast and tell you the high level steps of the process. ')        

Talking Code Setting:  True
Talking Code Detail Setting:  False
Talking_Code: Text to Voice set to ON. Voice set  to  Default Male Voice. 
Talking  Details is set to False to say only major steps..  this setting will execute fast and tell you the high level steps of the process. 



## Process Reinitialize - Re initialize the Data Frames and all Global Counters to Zero 

In [6]:
global import_directory_file_Number 
global import_file_Number  
global event_log_row 
global sub_directory_count  

# recreate the dataframes 
create_directory_dataframe() 
create_import_files_dataframe() 
create_database_schema_dataframe() 

# reset all the global Counters  
import_directory_file_Number  = 0
import_file_Number  = 0 
event_log_row = 0
sub_directory_count = 0 


## Optional Precursor Step  - Explain the Project 

In [None]:
explain_the_project()        

## Step 1 - Establish the root Directory 

In [7]:
add_log_event(Process_Name,'step 1 - Establish the root Directory: {}'.format(Data_Import_Starting_Directory),datetime.now(),datetime.now(),time.time(),time.time(), 0 ,'Step 1 - Establish the root Directory: {}'.format(Data_Import_Starting_Directory))
Process_Step_Name = 'Step 1 - Establish the root Directory' 
out('Step 1 - Establish the root Directory')
out('The root directory to walk is : {}'.format(Data_Import_Starting_Directory))  
df_event_log.head()

Step 1 - Establish the root Directory
The root directory to walk is : Y:/_Kaleida_Input/


Unnamed: 0,Event_ID,Process_Name,Event_Name,Event_Date,Event_Time,Task_Start_Time,Task_End_Time,TASk_Duration,Comments
1,1,Importing CSV data into SQL,step 1 - Establish the root Directory: Y:/_Kal...,2022-10-13 15:28:58.980930,2022-10-13 15:28:58.980930,1665689000.0,1665689000.0,0,Step 1 - Establish the root Directory: Y:/_Kal...


## Step 2 - Walk the directory structure discovering data to discover all data directories

In [8]:
global sub_directory_count
# Get a list of all the Subfiles to iterate through 
def walk_sub_directories(root_directory):
    global sub_directory_count
    Process_Step_Name = 'Step 2 - Walk the directory structure discovering data to discover all data directories' 
    df_import_directories = create_directory_dataframe() 
    directory_entry = 0 
 
    for root, subdirectories, files in os.walk(root_directory):
        # hard coded - remove this later!!!!!! 
        # old data and Excel data should NOT BE Included under root 
        if (root.find('old') == -1) and (root.find('excel') == -1):
            out('Registering Directory # {} {}  '.format(directory_entry,root ))            
            directory_entry += 1 
            sub_directory_count += 1     
            num = add_import_directory(root, root)        
         
    return df_import_directories

out('Step 2 - Walk the directory structure  to discover all data directories')
add_log_event(Process_Name,'Step 2 - Walk the directory structure  to discover all data directories',datetime.now(),datetime.now(),time.time(),time.time(), 0 ,'Step 2 - Walk the directory structure  to discover all data directories'.format(Data_Import_Starting_Directory))
walk_sub_directories(Data_Import_Starting_Directory)  
out('Step 2 Done - Listing all Registered data Directories: ')


Step 2 - Walk the directory structure  to discover all data directories
Registering Directory # 0 Y:/_Kaleida_Input/  
Registering Directory # 1 Y:/_Kaleida_Input/Access  
Registering Directory # 2 Y:/_Kaleida_Input/ADP  
Registering Directory # 3 Y:/_Kaleida_Input/ADP\Daily Time Card  
Registering Directory # 4 Y:/_Kaleida_Input/ADP\Employee Census  
Registering Directory # 5 Y:/_Kaleida_Input/Available_Slots  
Registering Directory # 6 Y:/_Kaleida_Input/Available_Slots_Past  
Registering Directory # 7 Y:/_Kaleida_Input/Call Center  
Registering Directory # 8 Y:/_Kaleida_Input/CPT Visit  
Registering Directory # 9 Y:/_Kaleida_Input/DailyAppointments  
Registering Directory # 10 Y:/_Kaleida_Input/DailyCPT  
Registering Directory # 11 Y:/_Kaleida_Input/DailyMultipleAppointmentSameDay  
Registering Directory # 12 Y:/_Kaleida_Input/DailyScheduledOfficeAppointmentVisit  
Registering Directory # 13 Y:/_Kaleida_Input/DailySuperbill  
Registering Directory # 14 Y:/_Kaleida_Input/EmployeePerfo

In [9]:
# Persist the directories in an excel file Registry 
Data_Import_Starting_Directory =  'J:/IT GLIN Data Services Shared/TempData/'
Excel_file_Name = Data_Import_Starting_Directory + 'Import_Directory_Registry.xlsx'
out('Registering Directories in excel File:{} '.format(Excel_file_Name))
df_import_directories.to_excel(Excel_file_Name, index=False)

Registering Directories in excel File:J:/IT GLIN Data Services Shared/TempData/Import_Directory_Registry.xlsx 


In [10]:
df_import_directories.shape

(47, 2)

In [11]:
df_import_directories.head(100)

Unnamed: 0,Root_Directory,Sub_Directory
1,Y:/_Kaleida_Input/,Y:/_Kaleida_Input/
2,Y:/_Kaleida_Input/Access,Y:/_Kaleida_Input/Access
3,Y:/_Kaleida_Input/ADP,Y:/_Kaleida_Input/ADP
4,Y:/_Kaleida_Input/ADP\Daily Time Card,Y:/_Kaleida_Input/ADP\Daily Time Card
5,Y:/_Kaleida_Input/ADP\Employee Census,Y:/_Kaleida_Input/ADP\Employee Census
6,Y:/_Kaleida_Input/Available_Slots,Y:/_Kaleida_Input/Available_Slots
7,Y:/_Kaleida_Input/Available_Slots_Past,Y:/_Kaleida_Input/Available_Slots_Past
8,Y:/_Kaleida_Input/Call Center,Y:/_Kaleida_Input/Call Center
9,Y:/_Kaleida_Input/CPT Visit,Y:/_Kaleida_Input/CPT Visit
10,Y:/_Kaleida_Input/DailyAppointments,Y:/_Kaleida_Input/DailyAppointments


## Step 3 - Discover and Register all CSV files to import

In [12]:
def iterate_directories_to_import_files(Import_Directory_Data_Frame):
    out('Step 3 - Discover and Register all CSV files to import ')
    Process_Step_Name = 'Step 3 - Discover and Register all CSV files to import'  
    list_of_Directories = df_import_directories['Root_Directory']
    dir_count = 0
    for dir in list_of_Directories:
        dir_count += 1
        out('Directory #{} to find import files : {}  \n   '.format(dir_count,dir))
        SQL_Table_Name = infer_table_name_from_path(dir)
        register_all_csv_files_for_import(dir, SQL_Table_Name   )

iterate_directories_to_import_files(df_import_directories)

Step 3 - Discover and Register all CSV files to import 
Directory #1 to find import files : Y:/_Kaleida_Input/  
   
CSV Files to Import from Directory: Y:/_Kaleida_Input/
Directory #2 to find import files : Y:/_Kaleida_Input/Access  
   
CSV Files to Import from Directory: Y:/_Kaleida_Input/Access
CSV File #1 filename: 2459631_274_20220220043457_dmhmreport_EHRSUPPORT_5592087.csv  
CSV File #2 filename: 2459638_97_20220227013752_dmhmreport_EHRSUPPORT_5187581.csv  
CSV File #3 filename: 2459646_85_20220307012546_dmhmreport_EHRSUPPORT_4516065.csv  
CSV File #4 filename: 2459652_467_20220313074723_dmhmreport_EHRSUPPORT_2179577.csv  
CSV File #5 filename: 2459659_138_20220320021809_dmhmreport_EHRSUPPORT_877261.csv  
CSV File #6 filename: 2459666_94_20220327013440_dmhmreport_EHRSUPPORT_3985999.csv  
CSV File #7 filename: 2459673_140_20220403022052_dmhmreport_EHRSUPPORT_51141178.csv  
CSV File #8 filename: 2459680_351_20220410055106_dmhmreport_EHRSUPPORT_582566.csv  
CSV File #9 filename: 24

CSV File #83 filename: Sub II 4.28 to 10.29.csv  
CSV File #84 filename: Sub II 4.29 to 10.30.csv  
CSV File #85 filename: Sub II 4.30 to 10.31.csv  
CSV File #86 filename: Sub II 4.7 to 10.7.csv  
CSV File #87 filename: Sub II 4_1 to 10_2.csv  
CSV File #88 filename: Sub II 4_5 to 10_5.csv  
CSV File #89 filename: Sub II 5.10 to 11.11.csv  
CSV File #90 filename: Sub II 5.11 to 11.12.csv  
CSV File #91 filename: Sub II 5.12 to 11.13.csv  
CSV File #92 filename: Sub II 5.13 to 11.14.csv  
CSV File #93 filename: Sub II 5.14 to 11.15.csv  
CSV File #94 filename: Sub II 5.17 to 11.18.csv  
CSV File #95 filename: Sub II 5.18 to 11.19.csv  
CSV File #96 filename: Sub II 5.19 to 11.20.csv  
CSV File #97 filename: Sub II 5.2 to 11.2.csv  
CSV File #98 filename: Sub II 5.20 to 11.21.csv  
CSV File #99 filename: Sub II 5.21 to 11.22.csv  
CSV File #100 filename: Sub II 5.24 to 11.25.csv  
CSV File #101 filename: Sub II 5.25 to 11.26.csv  
CSV File #102 filename: Sub II 5.26 to 11.27.csv  
CSV F

CSV File #131 filename: Sub II 6.9.csv  
CSV File #132 filename: Sub II 7.1.csv  
CSV File #133 filename: Sub II 7.11.csv  
CSV File #134 filename: Sub II 7.12.csv  
CSV File #135 filename: Sub II 7.4.csv  
CSV File #136 filename: Sub II 7.5.csv  
CSV File #137 filename: Sub II 7.6.csv  
CSV File #138 filename: Sub II 7.7.csv  
CSV File #139 filename: Sub II 7.8.csv  
Directory #8 to find import files : Y:/_Kaleida_Input/Call Center  
   
CSV Files to Import from Directory: Y:/_Kaleida_Input/Call Center
Directory #9 to find import files : Y:/_Kaleida_Input/CPT Visit  
   
CSV Files to Import from Directory: Y:/_Kaleida_Input/CPT Visit
Directory #10 to find import files : Y:/_Kaleida_Input/DailyAppointments  
   
CSV Files to Import from Directory: Y:/_Kaleida_Input/DailyAppointments
CSV File #1 filename: April 2019.csv  
CSV File #2 filename: April 2020.csv  
CSV File #3 filename: April 2021.csv  
CSV File #4 filename: April 2022.csv  
CSV File #5 filename: August 2019.csv  
CSV File #

Directory #30 to find import files : Y:/_Kaleida_Input/OutstandingOpenTriages  
   
CSV Files to Import from Directory: Y:/_Kaleida_Input/OutstandingOpenTriages
Directory #31 to find import files : Y:/_Kaleida_Input/OutstandingOpenXrays  
   
CSV Files to Import from Directory: Y:/_Kaleida_Input/OutstandingOpenXrays
Directory #32 to find import files : Y:/_Kaleida_Input/PatientExperience  
   
CSV Files to Import from Directory: Y:/_Kaleida_Input/PatientExperience
Directory #33 to find import files : Y:/_Kaleida_Input/PatientInformation  
   
CSV Files to Import from Directory: Y:/_Kaleida_Input/PatientInformation
Directory #34 to find import files : Y:/_Kaleida_Input/PatientInformation\Main18Under  
   
CSV Files to Import from Directory: Y:/_Kaleida_Input/PatientInformation\Main18Under
CSV File #1 filename: 2459588_144_20220108022428_dmhmreport_EHRSUPPORT_2697197.csv  
CSV File #2 filename: 2459605_158_20220125023834_dmhmreport_JENNIFERDUP_3361737.csv  
CSV File #3 filename: 2459612_

CSV File #21 filename: 2459428_367_20210801060719_dmhmreport_EHRSUPPORT_1144537.csv  
CSV File #22 filename: 2459428_43_20210801004313_dmhmreport_EHRSUPPORT_1226611.csv  
CSV File #23 filename: 2459459_329_20210901052915_dmhmreport_EHRSUPPORT_6121399.csv  
CSV File #24 filename: 2459459_54_20210901005424_dmhmreport_EHRSUPPORT_2330304.csv  
CSV File #25 filename: 2459459_73_20210901011320_dmhmreport_EHRSUPPORT_183400.csv  
CSV File #26 filename: 2459489_360_20211001060009_dmhmreport_EHRSUPPORT_119159.csv  
CSV File #27 filename: 2459489_47_20211001004747_dmhmreport_EHRSUPPORT_4631150.csv  
CSV File #28 filename: 2459489_62_20211001010214_dmhmreport_EHRSUPPORT_139427.csv  
CSV File #29 filename: 2459520_362_20211101060232_dmhmreport_EHRSUPPORT_2481136.csv  
CSV File #30 filename: 2459520_40_20211101004055_dmhmreport_EHRSUPPORT_5318172.csv  
CSV File #31 filename: 2459520_54_20211101005439_dmhmreport_EHRSUPPORT_3836059.csv  
CSV File #32 filename: 2459550_375_20211201061529_dmhmreport_EHR

CSV File #8 filename: 2459261_32_20210215003205_dmhmreport_MICHAELO_331442.csv  
CSV File #9 filename: 2459262_85_20210216012515_dmhmreport_EHRSUPPORT_1419310.csv  
CSV File #10 filename: 2459263_85_20210217012508_dmhmreport_EHRSUPPORT_712292.csv  
CSV File #11 filename: 2459264_107_20210218014708_dmhmreport_EHRSUPPORT_2111206.csv  
CSV File #12 filename: 2459265_117_20210219015725_dmhmreport_EHRSUPPORT_195157.csv  
CSV File #13 filename: 2459266_124_20210220020407_dmhmreport_EHRSUPPORT_5101165.csv  
CSV File #14 filename: 2459267_87_20210221012730_dmhmreport_EHRSUPPORT_20110196.csv  
CSV File #15 filename: 2459268_82_20210222012258_dmhmreport_EHRSUPPORT_5711451.csv  
CSV File #16 filename: 2459269_146_20210223022631_dmhmreport_EHRSUPPORT_2991841.csv  
CSV File #17 filename: 2459270_84_20210224012458_dmhmreport_EHRSUPPORT_5746348.csv  
CSV File #18 filename: 2459271_127_20210225020726_dmhmreport_EHRSUPPORT_2634363.csv  
CSV File #19 filename: 2459272_85_20210226012535_dmhmreport_EHRSUP

CSV File #106 filename: 2459319_19_20210414001919_dmhmreport_MICHAELO_1848370.csv  
CSV File #107 filename: 2459319_85_20210414012540_dmhmreport_EHRSUPPORT_3989585.csv  
CSV File #108 filename: 2459320_101_20210415014112_dmhmreport_EHRSUPPORT_1065020.csv  
CSV File #109 filename: 2459320_10_20210415001052_dmhmreport_MICHAELO_5116652.csv  
CSV File #110 filename: 2459321_13_20210416001321_dmhmreport_MICHAELO_2039509.csv  
CSV File #111 filename: 2459321_85_20210416012546_dmhmreport_EHRSUPPORT_4583910.csv  
CSV File #112 filename: 2459322_10_20210417001031_dmhmreport_MICHAELO_2938527.csv  
CSV File #113 filename: 2459322_112_20210417015249_dmhmreport_EHRSUPPORT_4841672.csv  
CSV File #114 filename: 2459323_147_20210418022743_dmhmreport_EHRSUPPORT_4115207.csv  
CSV File #115 filename: 2459323_9_20210418000924_dmhmreport_MICHAELO_238974.csv  
CSV File #116 filename: 2459324_103_20210419014317_dmhmreport_EHRSUPPORT_1135266.csv  
CSV File #117 filename: 2459324_14_20210419001422_dmhmreport_M

CSV File #203 filename: 2459367_101_20210601014154_dmhmreport_EHRSUPPORT_5384157.csv  
CSV File #204 filename: 2459367_13_20210601001345_dmhmreport_MICHAELO_4421414.csv  
CSV File #205 filename: 2459368_101_20210602014151_dmhmreport_EHRSUPPORT_47146886.csv  
CSV File #206 filename: 2459368_8_20210602000847_dmhmreport_JUSTINJ_4629901.csv  
CSV File #207 filename: 2459369_17_20210603001743_dmhmreport_JUSTINJ_4128410.csv  
CSV File #208 filename: 2459369_86_20210603012632_dmhmreport_EHRSUPPORT_3068363.csv  
CSV File #209 filename: 2459370_17_20210604001708_dmhmreport_JUSTINJ_744611.csv  
CSV File #210 filename: 2459370_86_20210604012633_dmhmreport_EHRSUPPORT_32116795.csv  
CSV File #211 filename: 2459371_10_20210605001006_dmhmreport_JUSTINJ_531270.csv  
CSV File #212 filename: 2459372_16_20210606001638_dmhmreport_JUSTINJ_3645523.csv  
CSV File #213 filename: 2459372_93_20210606013309_dmhmreport_EHRSUPPORT_575731.csv  
CSV File #214 filename: 2459373_121_20210607020131_dmhmreport_EHRSUPPOR

CSV File #300 filename: 2459416_129_20210720020912_dmhmreport_EHRSUPPORT_776936.csv  
CSV File #301 filename: 2459416_16_20210720001642_dmhmreport_JUSTINJ_4014536.csv  
CSV File #302 filename: 2459417_104_20210721014408_dmhmreport_EHRSUPPORT_434560.csv  
CSV File #303 filename: 2459417_19_20210721001907_dmhmreport_JUSTINJ_518907.csv  
CSV File #304 filename: 2459418_18_20210722001825_dmhmreport_JUSTINJ_2448780.csv  
CSV File #305 filename: 2459418_86_20210722012626_dmhmreport_EHRSUPPORT_24130613.csv  
CSV File #306 filename: 2459419_20_20210723002038_dmhmreport_JUSTINJ_3722921.csv  
CSV File #307 filename: 2459419_87_20210723012736_dmhmreport_EHRSUPPORT_3517208.csv  
CSV File #308 filename: 2459420_112_20210724015237_dmhmreport_EHRSUPPORT_3577647.csv  
CSV File #309 filename: 2459420_13_20210724001327_dmhmreport_JUSTINJ_257293.csv  
CSV File #310 filename: 2459421_135_20210725021520_dmhmreport_EHRSUPPORT_1896811.csv  
CSV File #311 filename: 2459421_8_20210725000823_dmhmreport_JUSTINJ_

CSV File #397 filename: 2459464_81_20210906012109_dmhmreport_EHRSUPPORT_7102303.csv  
CSV File #398 filename: 2459465_15_20210907001559_dmhmreport_JUSTINJ_5813608.csv  
CSV File #399 filename: 2459465_86_20210907012641_dmhmreport_EHRSUPPORT_39126441.csv  
CSV File #400 filename: 2459466_135_20210908021525_dmhmreport_EHRSUPPORT_2374550.csv  
CSV File #401 filename: 2459466_29_20210908002909_dmhmreport_JUSTINJ_823955.csv  
CSV File #402 filename: 2459467_14_20210909001449_dmhmreport_JUSTINJ_4831110.csv  
CSV File #403 filename: 2459467_86_20210909012601_dmhmreport_EHRSUPPORT_0127137.csv  
CSV File #404 filename: 2459468_13_20210910001347_dmhmreport_JUSTINJ_4521490.csv  
CSV File #405 filename: 2459468_99_20210910013913_dmhmreport_EHRSUPPORT_463812.csv  
CSV File #406 filename: 2459469_11_20210911001145_dmhmreport_JUSTINJ_434928.csv  
CSV File #407 filename: 2459469_85_20210911012544_dmhmreport_EHRSUPPORT_428924.csv  
CSV File #408 filename: 2459470_19_20210912001906_dmhmreport_JUSTINJ_54

CSV File #495 filename: 2459513_97_20211025013711_dmhmreport_EHRSUPPORT_1085467.csv  
CSV File #496 filename: 2459514_23_20211026002311_dmhmreport_JUSTINJ_96749.csv  
CSV File #497 filename: 2459514_86_20211026012602_dmhmreport_EHRSUPPORT_025896.csv  
CSV File #498 filename: 2459515_19_20211027001946_dmhmreport_JUSTINJ_4520229.csv  
CSV File #499 filename: 2459515_85_20211027012535_dmhmreport_EHRSUPPORT_33103414.csv  
CSV File #500 filename: 2459516_26_20211028002617_dmhmreport_JUSTINJ_1629508.csv  
CSV File #501 filename: 2459516_86_20211028012626_dmhmreport_EHRSUPPORT_2342483.csv  
CSV File #502 filename: 2459517_23_20211029002322_dmhmreport_JUSTINJ_2022722.csv  
CSV File #503 filename: 2459517_85_20211029012524_dmhmreport_EHRSUPPORT_2112909.csv  
CSV File #504 filename: 2459518_11_20211030001159_dmhmreport_JUSTINJ_5812127.csv  
CSV File #505 filename: 2459518_98_20211030013855_dmhmreport_EHRSUPPORT_5496074.csv  
CSV File #506 filename: 2459519_20_20211031002010_dmhmreport_JUSTINJ_81

CSV File #592 filename: 2459562_20_20211213002059_dmhmreport_JUSTINJ_584516.csv  
CSV File #593 filename: 2459562_79_20211213011949_dmhmreport_EHRSUPPORT_4968851.csv  
CSV File #594 filename: 2459563_15_20211214001534_dmhmreport_JUSTINJ_3230068.csv  
CSV File #595 filename: 2459563_85_20211214012559_dmhmreport_EHRSUPPORT_5840126.csv  
CSV File #596 filename: 2459564_117_20211215015753_dmhmreport_EHRSUPPORT_5234658.csv  
CSV File #597 filename: 2459564_17_20211215001742_dmhmreport_JUSTINJ_4035501.csv  
CSV File #598 filename: 2459565_16_20211216001642_dmhmreport_JUSTINJ_4015376.csv  
CSV File #599 filename: 2459565_85_20211216012555_dmhmreport_EHRSUPPORT_54109821.csv  
CSV File #600 filename: 2459566_15_20211217001503_dmhmreport_JUSTINJ_112963.csv  
CSV File #601 filename: 2459566_84_20211217012440_dmhmreport_EHRSUPPORT_3881702.csv  
CSV File #602 filename: 2459567_14_20211218001449_dmhmreport_JUSTINJ_4817231.csv  
CSV File #603 filename: 2459567_96_20211218013656_dmhmreport_EHRSUPPORT_

CSV File #691 filename: 2459612_126_20220201020620_dmhmreport_EHRSUPPORT_1928990.csv  
CSV File #692 filename: 2459612_19_20220201001940_dmhmreport_JUSTINJ_3937162.csv  
CSV File #693 filename: 2459613_17_20220202001717_dmhmreport_JUSTINJ_1622914.csv  
CSV File #694 filename: 2459613_88_20220202012819_dmhmreport_EHRSUPPORT_1780745.csv  
CSV File #695 filename: 2459614_13_20220203001400_dmhmreport_JUSTINJ_5819752.csv  
CSV File #696 filename: 2459614_98_20220203013823_dmhmreport_EHRSUPPORT_217123.csv  
CSV File #697 filename: 2459615_17_20220204001726_dmhmreport_JUSTINJ_2431144.csv  
CSV File #698 filename: 2459615_87_20220204012739_dmhmreport_EHRSUPPORT_3852902.csv  
CSV File #699 filename: 2459616_7_20220205000714_dmhmreport_JUSTINJ_1419118.csv  
CSV File #700 filename: 2459616_99_20220205014001_dmhmreport_EHRSUPPORT_5918021.csv  
CSV File #701 filename: 2459617_16_20220206001631_dmhmreport_JUSTINJ_2927741.csv  
CSV File #702 filename: 2459617_88_20220206012839_dmhmreport_EHRSUPPORT_3

CSV File #790 filename: 2459665_100_20220326014006_dmhmreport_EHRSUPPORT_52854.csv  
CSV File #791 filename: 2459665_14_20220326001411_dmhmreport_JUSTINJ_930628.csv  
CSV File #792 filename: 2459666_18_20220327001822_dmhmreport_JUSTINJ_2132521.csv  
CSV File #793 filename: 2459666_91_20220327013111_dmhmreport_EHRSUPPORT_1086025.csv  
CSV File #794 filename: 2459667_15_20220328001540_dmhmreport_JUSTINJ_3843120.csv  
CSV File #795 filename: 2459667_92_20220328013215_dmhmreport_EHRSUPPORT_1390335.csv  
CSV File #796 filename: 2459668_101_20220329014141_dmhmreport_EHRSUPPORT_39118013.csv  
CSV File #797 filename: 2459668_25_20220329002551_dmhmreport_JUSTINJ_477111.csv  
CSV File #798 filename: 2459669_34_20220330003404_dmhmreport_JUSTINJ_27905.csv  
CSV File #799 filename: 2459670_101_20220331014153_dmhmreport_EHRSUPPORT_5113840.csv  
CSV File #800 filename: 2459670_32_20220331003224_dmhmreport_JUSTINJ_2248359.csv  
CSV File #801 filename: 2459671_30_20220401003045_dmhmreport_JUSTINJ_42283

CSV File #888 filename: 2459716_19_20220516001955_dmhmreport_JUSTINJ_5442750.csv  
CSV File #889 filename: 2459716_80_20220516012031_dmhmreport_EHRSUPPORT_29131845.csv  
CSV File #890 filename: 2459717_17_20220517001734_dmhmreport_JUSTINJ_3333876.csv  
CSV File #891 filename: 2459717_87_20220517012708_dmhmreport_EHRSUPPORT_614549.csv  
CSV File #892 filename: 2459718_271_20220518043109_dmhmreport_JUSTINJ_84856.csv  
CSV File #893 filename: 2459718_87_20220518012741_dmhmreport_EHRSUPPORT_39130233.csv  
CSV File #894 filename: 2459719_271_20220519043110_dmhmreport_JUSTINJ_914721.csv  
CSV File #895 filename: 2459719_87_20220519012753_dmhmreport_EHRSUPPORT_5228591.csv  
CSV File #896 filename: 2459720_117_20220520015702_dmhmreport_EHRSUPPORT_011874.csv  
CSV File #897 filename: 2459720_271_20220520043110_dmhmreport_JUSTINJ_927058.csv  
CSV File #898 filename: 2459721_101_20220521014152_dmhmreport_EHRSUPPORT_5168092.csv  
CSV File #899 filename: 2459721_271_20220521043110_dmhmreport_JUSTIN

CSV File #986 filename: 2459765_6_20220704000607_dmhmreport_JUSTINJ_78892.csv  
CSV File #987 filename: 2459765_82_20220704012205_dmhmreport_EHRSUPPORT_485348.csv  
CSV File #988 filename: 2459766_105_20220705014532_dmhmreport_EHRSUPPORT_30137418.csv  
CSV File #989 filename: 2459766_16_20220705001628_dmhmreport_JUSTINJ_2821424.csv  
CSV File #990 filename: 2459767_12_20220706001225_dmhmreport_JUSTINJ_2320644.csv  
CSV File #991 filename: 2459767_89_20220706012905_dmhmreport_EHRSUPPORT_427396.csv  
CSV File #992 filename: 2459768_7_20220707000738_dmhmreport_JUSTINJ_3735715.csv  
CSV File #993 filename: 2459768_88_20220707012802_dmhmreport_EHRSUPPORT_090336.csv  
CSV File #994 filename: 2459769_10_20220708001048_dmhmreport_JUSTINJ_4727482.csv  
CSV File #995 filename: 2459769_88_20220708012853_dmhmreport_EHRSUPPORT_5167720.csv  
CSV File #996 filename: 2459770_11_20220709001136_dmhmreport_JUSTINJ_3523534.csv  
CSV File #997 filename: 2459770_87_20220709012750_dmhmreport_EHRSUPPORT_49120

CSV File #130 filename: Sub II  Third Next 6.8.csv  
CSV File #131 filename: Sub II 12.21.csv  
CSV File #132 filename: Sub II 2.15.csv  
CSV File #133 filename: Sub II 2.17.csv  
CSV File #134 filename: Sub II 2.22.csv  
CSV File #135 filename: Sub II 2.25.csv  
CSV File #136 filename: Sub II 2.28.csv  
CSV File #137 filename: SUB II 2.4.csv  
CSV File #138 filename: Sub II 3.1.csv  
CSV File #139 filename: Sub II 3.10.csv  
CSV File #140 filename: Sub II 3.11.csv  
CSV File #141 filename: Sub II 5.4.csv  
CSV File #142 filename: Sub II 5.5.csv  
CSV File #143 filename: Sub II 6.29.csv  
CSV File #144 filename: Sub II 6.30.csv  
CSV File #145 filename: Sub II Third Available 7.1.csv  
CSV File #146 filename: Sub II Third Next 1.10.csv  
CSV File #147 filename: Sub II Third Next 1.11.csv  
CSV File #148 filename: Sub II Third Next 1.13.csv  
CSV File #149 filename: Sub II third next 1.18.csv  
CSV File #150 filename: Sub II Third next 1.19.csv  
CSV File #151 filename: Sub II third nex

CSV File #292 filename: Sub III Third Next 2.3.csv  
CSV File #293 filename: Sub III Third Next 2.4.csv  
CSV File #294 filename: SUb III Third Next 2.7.csv  
CSV File #295 filename: Sub III Third Next 2.9.csv  
CSV File #296 filename: Sub III Third Next 3.11.csv  
CSV File #297 filename: Sub III Third Next 3.14.csv  
CSV File #298 filename: Sub III Third Next 3.15.csv  
CSV File #299 filename: Sub III Third Next 3.16.csv  
CSV File #300 filename: Sub III Third Next 3.17.csv  
CSV File #301 filename: Sub III Third Next 3.22.csv  
CSV File #302 filename: Sub III Third Next 3.23.csv  
CSV File #303 filename: Sub III Third Next 3.24.csv  
CSV File #304 filename: Sub III Third Next 3.25.csv  
CSV File #305 filename: Sub III Third Next 3.29.csv  
CSV File #306 filename: Sub III Third Next 3.30.csv  
CSV File #307 filename: Sub III Third Next 3.8.csv  
CSV File #308 filename: Sub III Third Next 3.9.csv  
CSV File #309 filename: Sub III Third Next 4.1.csv  
CSV File #310 filename: Sub III thi

In [13]:
# Persist the csv files in an excel import file Registry 
Data_Import_Starting_Directory =  'J:/IT GLIN Data Services Shared/TempData/'
Excel_file_Name = Data_Import_Starting_Directory + 'Discovered_CSV_files_to_import.xlsx'
out('Registering Directories in excel File:{} '.format(Excel_file_Name))
df_import_files.to_excel(Excel_file_Name, index=False)


Registering Directories in excel File:J:/IT GLIN Data Services Shared/TempData/Discovered_CSV_files_to_import.xlsx 


In [14]:
#df_import_files.loc[1  , ['Root_Directory','Import_file_Name']]
df_import_files.head(2) 

Unnamed: 0,Root_Directory,Sub_Directory,Table_Name,Import_File_Name,File_Size,File_Modified_Date,File_Created_Date
1,Y:/_Kaleida_Input/Access,Y:/_Kaleida_Input/Access,Access,2459631_274_20220220043457_dmhmreport_EHRSUPPO...,10262,Thu Jul 7 09:18:16 2022,Thu Jul 7 09:18:16 2022
2,Y:/_Kaleida_Input/Access,Y:/_Kaleida_Input/Access,Access,2459638_97_20220227013752_dmhmreport_EHRSUPPOR...,6014,Thu Jul 7 09:18:19 2022,Thu Jul 7 09:18:19 2022


In [None]:
#df_import_files.loc[1  , ['Root_Directory','Import_file_Name']]
df_import_files.shape 

In [15]:
def inspect_raw_file(raw_csv_file_to_import , table_name , Head_Rows):
    out('About to Read File:{}'.format(raw_csv_file_to_import))   
    if Head_Rows == 2:
        df_import_file = pd.read_csv(raw_csv_file_to_import ,header=[0,1] )
        df_import_file.columns = df_import_file.columns.map('^'.join)
        
        
        df_import_file.columns = [x.strip().title().replace("^","").replace(" ","_").replace("#","Number").replace("#","Number").replace("%","Percentage") \
                             .replace('_Unnamed','').replace('Unnamed','').replace('_level_','').replace('Unnamed:','').replace('_Level','').replace("$","Dollar") \
                             .replace('_1','').replace('_2','').replace('_3','').replace('_4','').replace('_5','')  \
                             .replace('_6','').replace('_7','').replace('_8','').replace('_9','').replace('_Level','')  \
                             .replace('1','').replace('2','').replace('3','').replace('4','').replace('5','')  \
                             .replace('6','').replace('7','').replace('8','').replace('9','').replace('0','')  \
                             .replace('_0','').replace('0_','').replace(':7','').replace(':8','').replace(':','').replace('Unnamed: ','')  \
                             for x in df_import_file.columns]  
        
    else: 
        df_import_file = pd.read_csv(raw_csv_file_to_import   )   
        df_import_file.columns = [x.strip().title().replace("^","").replace(" ","_").replace("#","Number").replace("#","Number").replace("%","Percentage") \
                             .replace('_Unnamed','').replace('Unnamed','').replace('Unnamed:','').replace('_Level','').replace("$","Dollar") \
                             .replace('_1','').replace('_2','').replace('_3','').replace('_4','').replace('_5','')  \
                             .replace('_6','').replace('_7','').replace('_8','').replace('_9','')  \
                             .replace('1','').replace('2','').replace('3','').replace('4','').replace('5','')  \
                             .replace('6','').replace('7','').replace('8','').replace('9','').replace('0','')  \
                             .replace('_0','').replace(':7','').replace(':8','').replace(':','').replace('Unnamed: ','')  \
                             for x in df_import_file.columns]          
    
    out('Read in raw csv file:{} rows:{} columns:{}'.format(raw_csv_file_to_import,df_import_file.shape[0], df_import_file.shape[1]))
    
    return df_import_file

In [16]:
global detail_debugging

for i, row in df_import_files.iterrows():
#    print(' Iter = {}'.format(i))
    Index = i 
    Root_Dir   =   row[0]      
    Sub_Dir    =   row[1]    
    Table_Name =   row[2]
    Import_CSV =   row[3]  
    #print('Detail Debugging:', detail_debugging ) 
    
    if detail_debugging:    
        out('Iter:{}, Root Dir:{}, Sub_Dir:{}, Table:{}, Import_CSV:{} '.format(i,Root_Dir,Sub_Dir,Table_Name,Import_CSV))
    if Table_Name == 'Access':
        Header_Row_Count = 2
    else:         
        Header_Row_Count = 1
    
    if i == 1:
        file_to_import = Root_Dir + '/' + Import_CSV
        if detail_debugging:
            out(' File to Import: {} '.format(file_to_import))
        df_import_raw = inspect_raw_file(file_to_import, Table_Name, Header_Row_Count)
    

Iter:1, Root Dir:Y:/_Kaleida_Input/Access, Sub_Dir:Y:/_Kaleida_Input/Access, Table:Access, Import_CSV:2459631_274_20220220043457_dmhmreport_EHRSUPPORT_5592087.csv 
 File to Import: Y:/_Kaleida_Input/Access/2459631_274_20220220043457_dmhmreport_EHRSUPPORT_5592087.csv 
About to Read File:Y:/_Kaleida_Input/Access/2459631_274_20220220043457_dmhmreport_EHRSUPPORT_5592087.csv
Read in raw csv file:Y:/_Kaleida_Input/Access/2459631_274_20220220043457_dmhmreport_EHRSUPPORT_5592087.csv rows:73 columns:19
Iter:2, Root Dir:Y:/_Kaleida_Input/Access, Sub_Dir:Y:/_Kaleida_Input/Access, Table:Access, Import_CSV:2459638_97_20220227013752_dmhmreport_EHRSUPPORT_5187581.csv 
Iter:3, Root Dir:Y:/_Kaleida_Input/Access, Sub_Dir:Y:/_Kaleida_Input/Access, Table:Access, Import_CSV:2459646_85_20220307012546_dmhmreport_EHRSUPPORT_4516065.csv 
Iter:4, Root Dir:Y:/_Kaleida_Input/Access, Sub_Dir:Y:/_Kaleida_Input/Access, Table:Access, Import_CSV:2459652_467_20220313074723_dmhmreport_EHRSUPPORT_2179577.csv 
Iter:5, Roo

Iter:204, Root Dir:Y:/_Kaleida_Input/Available_Slots_Past, Sub_Dir:Y:/_Kaleida_Input/Available_Slots_Past, Table:Available_Slots_Past, Import_CSV:Main 4.29.csv 
Iter:205, Root Dir:Y:/_Kaleida_Input/Available_Slots_Past, Sub_Dir:Y:/_Kaleida_Input/Available_Slots_Past, Table:Available_Slots_Past, Import_CSV:Main 4.5.csv 
Iter:206, Root Dir:Y:/_Kaleida_Input/Available_Slots_Past, Sub_Dir:Y:/_Kaleida_Input/Available_Slots_Past, Table:Available_Slots_Past, Import_CSV:Main 4.7 to 4.8.csv 
Iter:207, Root Dir:Y:/_Kaleida_Input/Available_Slots_Past, Sub_Dir:Y:/_Kaleida_Input/Available_Slots_Past, Table:Available_Slots_Past, Import_CSV:Main 5.10.csv 
Iter:208, Root Dir:Y:/_Kaleida_Input/Available_Slots_Past, Sub_Dir:Y:/_Kaleida_Input/Available_Slots_Past, Table:Available_Slots_Past, Import_CSV:Main 5.11.csv 
Iter:209, Root Dir:Y:/_Kaleida_Input/Available_Slots_Past, Sub_Dir:Y:/_Kaleida_Input/Available_Slots_Past, Table:Available_Slots_Past, Import_CSV:Main 5.12.csv 
Iter:210, Root Dir:Y:/_Kaleid

In [20]:
df_import_raw = customize_column_name_clean_up(df_import_raw)
df_import_raw.head(3) 

Unnamed: 0,Name,Account,Appointment_Date,Booked_Date,Number_Days_To_Get_In,Appointment_Type,VIP,Direct,Provider,Access_SpecialtyTodo_Selection,Wellnow_SpecialtyTodo_Selection,Wellnow_Location_To_Do_Selection,Wellnow_Ticket_Number,Wellnow_Uc_Visit_Date,To_Do_Date_Create_Date,Ebill_Esuper_Bill_Number,Referral_From_To_Do_Selection,Access_VIP,Access_Direct
0,"Adamczak, Jillian",927293,02/14/2022,02/10/2022,4,WellNow - New Patient Visit,Yes,No,"Daye, Lisa, MD",,Orthopedic Vip,Lantran,,02/08/2022,02/14/2022,9664835.0,,No,No
1,"Alwasim, Hesham",909327,02/14/2022,02/11/2022,3,Access,No,No,Team Jennings,Cardiovascular,,,,,01/06/2022,9669640.0,Other,Yes,No
2,"Bender, Easton",931590,02/18/2022,02/18/2022,0,WellNow - New Patient Visit,No,No,"Aduddle, Melissa, PA",,,,,,,9692062.0,,No,No


In [None]:
global detail_debugging 
detail_debugging = True 

##  Step 4: Clean the data and make it consistent in the PANDAS Dataframe.

In [21]:
column_datatype_Str = str(df_event_log.dtypes)
column_datatype_Str =  column_datatype_Str.replace('dtype: object','').replace('object','varchar[255], ').replace('datetime64[ns]','timestamp, ').replace('float64','float, ')
print('create column SQL string:\n', column_datatype_Str)

create column SQL string:
 Event_ID                   varchar[255], 
Process_Name               varchar[255], 
Event_Name                 varchar[255], 
Event_Date         timestamp, 
Event_Time         timestamp, 
Task_Start_Time           float, 
Task_End_Time             float, 
TASk_Duration              varchar[255], 
Comments                   varchar[255], 



In [22]:
file_Name = df_import_files[1]

KeyError: 1

In [None]:
df_import_files.head() 

## Step 5: Check the data consistency and perform change control if there are differences. 

## Step 6: Convert the pandas dataframes into SQL table Create Statements  

## Step 7: Creates the SQL tables in the target Database 

## Step 8: Insert the the PANDAS Rows into SQL using the to_SQL Method. 

## Step 9: Add event logging to capture the performance of the entire process. 

## Step 10: Document the SCHEMA into an easy to use Excel Spreadsheet. 

## Step 11: Check the total number of records imported via SQL to the total raw record count to make sure no data is Left Behind. 

In [None]:
sql_columns = column_create_SQL(df_e_log)
sql_columns_cleaned = ' '.join([character_replacements.get(i, i) for i in sql_columns.split()])
sql_dtypes =  df_e_log.dtypes 
sql_column_data_types_cleaned = ' '.join([data_type_replacements.get(i, i) for i in sql_dtypes])

print('SQL Columns  =',sql_columns ,' /n ' 'SQL Columns Cleaned =',sql_columns_cleaned) 
 


In [None]:
def determine_table_name_for_path(path):

    table_name = path.replace(Data_Import_Starting_Directory,"").replace(' ','_').replace('/','').replace('\\','')
    return table_name 


path = 'Y:/_Kaleida_Input/Available_Slots/'
table_name_for_path = determine_table_name_for_path(path)
print('Table Name:{} is determined from path:{}'.format(table_name_for_path,path) )

In [None]:
def read_and_import_all_csv_files(path):
    
    if importing_xlsx_files: 
        extension = 'xlsx'
    if importing_csv_files: 
        extension = 'csv'        
    os.chdir(path)
    print('CSV Files to Import from Directory:', path)
    csv_file_count = 0
    for file in glob.glob('*.{}'.format(extension)):
        csv_file_count += 1 
        print('File',str(csv_file_count),": ", file)
        add_log_event(Process_Name,'Found Table to Import',Event_Date,Event_Time,Task_Start_Time,Task_End_Time, Task_Duration , 'Found Table to Import :' + file):
   
        
read_and_import_all_csv_files('Y:/_Kaleida_Input/Available_Slots/')  

In [None]:
Drop_Table_SQL  = Create_Drop_Table_SQL('Access_DI')
# SQL_Result = execute_SQL(Drop_Table_SQL)
# out('SQL Execute Result: {} '.format(SQL_Result))

In [None]:
print('Starting Iport Walk at Directory:',Data_Import_Starting_Directory)
list_all_subdirectories(Data_Import_Starting_Directory)
df_import_directories

In [None]:
df_import_directories.head(100)

In [None]:
# Get a list of all the Subfiles to iterate through 
def execute_table_Create_SQL(path, table_name):
    
    executing_SQL = False 
    
    extension = 'csv'
    os.chdir(path)
    print('CSV Files to Import from Directory:', path)
    csv_file_count = 0
    for file in glob.glob('*.{}'.format(extension)):
        csv_file_count += 1 
        if csv_file_count == 1:
            print('Creating_Table',table_name,' based upon 1st sample','File',str(csv_file_count),": ", file)     
            data_folder =  path
            filename = data_folder + table_name +'.csv'
            print ('raw file name to rcreate from:',filename)
            DROP_table_SQL = build_DROP_table_SQL(file, table_name,'_DI]')  
            create_table_SQL = build_table_create_SQL(file, table_name,'_DI]')

    if executing_SQL:     
            print ('/n DROP SQL = ',DROP_table_SQL  )         
            print ('/n create SQL = ',create_table_SQL  )     
            execute_SQL(DROP_table_SQL)            
            execute_SQL(create_table_SQL)
        
execute_table_Create_SQL('Y:/_Kaleida_Input/Access/','Access')   

In [None]:
# Get a list of all the Subfiles to iterate through 
def walk_sub_directories(root_directory): 
 #print('list_sub_directories for root Ditrectory {} \n'.format(root_directory) )   
 directory_entry = 0 
 Table_Name = ''
 print('About to Walk')
 for root,directory, file in os.walk(root_directory):
    print('Walking....   ')
    #print('Root Directory: {} subdir: {} \n'.format(root_directory,directory_entry) )
    
    if root.find('Access') >= 0:
        Table_Name = 'Access'
    elif root.find('Daily Time Card') >= 0:
        Table_Name = 'Daily_Time_Card'                
    elif root.find('Employee Census') >= 0:
        Table_Name = 'Employee_Census'               
    elif root.find('ADP') >= 0:
        Table_Name = 'ADP'  
    elif root.find('Employee Census') >= 0:
        Table_Name = 'Employee_Census'   
    elif root.find('Available_Slots') >= 0:
        Table_Name = 'Available_Slots'     
    elif root.find('Available_Slots_Past') >= 0:
        Table_Name = 'Available_Slots_Past'    
    elif root.find('Call Center') >= 0:
        Table_Name = 'Call_Center'    
    elif root.find('CPT Visit') >= 0:
        Table_Name = 'CPT_Visit'    
    elif root.find('Visit') >= 0:
        Table_Name = 'Visit'  
    elif root.find('DailyAppointments') >= 0:
        Table_Name = 'Daily_Appointments'    
    elif root.find('DailyCPT') >= 0:
        Table_Name = 'Daily_CPT'    
    elif root.find('DailyMultipleAppointmentSameDay') >= 0:
        Table_Name = 'Daily_Multiple_Appointment_Same_Day'   
    elif root.find('DailyScheduledOfficeAppointmentVisit') >= 0:
        Table_Name = 'Daily_Scheduled_Office_Appointment_Visit'               
        PatientExperienceDefault
    else:
        Table_Name = 'Unknown Table Name'
    print('Root Directory'+str(directory_entry) +':',root+'Default Table Name for Directory :', Table_Name,' \n')            
    #list_all_csv_files(root)  
    if Table_Name != 'Unknown Table Name':
        execute_table_Create_SQL(root,Table_Name)  
    directory_entry += 1     
 
# Test function call     
list_sub_directories('Y:/_Kaleida_Input/')    

In [None]:
# Get a list of all the Subfiles to iterate through 
def walk_sub_directories(root_directory):
    df_import_directories = create_directory_dataframe() 
    print('list_sub_directories for root Directory {} \n'.format(root_directory) )   
    directory_entry = 0 
    files_to_import = 0 
    Table_Name = ''
    for root, subdirectories, files in os.walk(root_directory):
        directory_entry += 1 
        print('Directory entry# {} {} .'.format(directory_entry,root ))
        #print('Root: {} has subdirs{}'.format(root, subdirectories)
              
    print('End of function ')              
    return df_import_directories
              
walk_sub_directories('Y:/_Kaleida_Input/')              

#         for subdir in subdirectories:
#               print('S---- Subdir: {}'.format(subdir)
#               for file in files:
#                    if dir_entry.find('.csv') >= 1:
#                        files_to_import += 1 
#                        print('F---- ----- file# {} to import: {}'.format(files_to_import,file) 
#                    else:
#                        print('NF--- ----- file#   to NOT import: {}'.format(file)                           
                             
                             
                 
        
        
#         for dir_entry in directory_contents:
#             print('Walking directory{} entry {}'.format(directory_entry, dir_entry))
#             if os.path.isfile(dir_entry):
#                 if dir_entry.find('.csv') >= 1:
#                     print('File #{} to Import: {}'.format(directory_entry, dir_entry))
             
            
#         row_values = [root,directory, file,'Pretend Tablename']
#         df_import_directories.loc[directory_entry] = row_values

#         print('Walking Root Directory: {} subdir: {} '.format(root_directory,directory_entry) )
#         if root.find('Access') >= 0:
#             Table_Name = 'Access'
#         elif root.find('Daily Time Card') >= 0:
#             Table_Name = 'Daily_Time_Card'                
#         elif root.find('Employee Census') >= 0:
#             Table_Name = 'Employee_Census'               
#         elif root.find('ADP') >= 0:
#             Table_Name = 'ADP'  
#         elif root.find('Employee Census') >= 0:
#             Table_Name = 'Employee_Census'   
#         elif root.find('Available_Slots') >= 0:
#             Table_Name = 'Available_Slots'     
#         elif root.find('Available_Slots_Past') >= 0:
#             Table_Name = 'Available_Slots_Past'    
#         elif root.find('Call Center') >= 0:
#             Table_Name = 'Call_Center'    
#         elif root.find('CPT Visit') >= 0:
#             Table_Name = 'CPT_Visit'    
#         elif root.find('Visit') >= 0:
#             Table_Name = 'Visit'  
#         elif root.find('DailyAppointments') >= 0:
#             Table_Name = 'Daily_Appointments'    
#         elif root.find('DailyCPT') >= 0:
#             Table_Name = 'Daily_CPT'    
#         elif root.find('DailyMultipleAppointmentSameDay') >= 0:
#             Table_Name = 'Daily_Multiple_Appointment_Same_Day'   
#         elif root.find('DailyScheduledOfficeAppointmentVisit') >= 0:
#             Table_Name = 'Daily_Scheduled_Office_Appointment_Visit'               
#             PatientExperienceDefault
#         else:
#             Table_Name = 'Unknown Table Name'
#         #print('Root Directory'+str(directory_entry) +':',root+'Default Table Name for Directory :', Table_Name,' \n')            
#         #list_all_csv_files(root)  
#         if Table_Name != 'Unknown Table Name':
#             execute_table_Create_SQL(root,Table_Name)  
#         directory_entry += 1     

#         # Test function call   \


In [None]:
def build_table_create_SQL(filename , table_name, table_Postfix ):
    Table_Schema_Prefix = '[pbic_1_0].[' 
    Table_Name = table_name        # 'Daily_Appointments'
    Table_Name_Postfix = table_Postfix  # Passed in as '_DI]' or '_HX]'
    Table_Name = Table_Name + Table_Name_Postfix
    column_str = '' 
    print('Import File =', filename)   

    df_input_csv = pd.read_csv(filename, nrows=10)
    number_of_columns = df_input_csv.shape[1]
    header_columns = df_input_csv.columns
    row1_columns = df_input_csv.iloc[0:1, : ]
    print('Header: ',header_columns)
    print('row1_columns: ',row1_columns)    

    #column_list = [x.strip().title().replace("^","") for x in df_input_csv.columns]
    for col in range(0,number_of_columns):
        #column_str = column_str + str(df_input_csv.columns[col]) + ' ' + str(df_input_csv.dtypes[col]) + ' NULL, ' 
        column_str = column_str + str(df_input_csv.columns[col]) + ' ' + str(df_input_csv.dtypes[col]) + ', ' 
    if table_Postfix == '_DI]':
        column_str = column_str.replace("object","nvarchar(255) ").replace("float64","nvarchar(255)  ").replace("int64","nvarchar(255)  ")
    if table_Postfix == '_HX]':
        column_str = column_str.replace("object","nvarchar(255) ").replace("float64","float  ").replace("int64","int  ")
        
    Create_table_SQL  = 'Create Table ' + Table_Schema_Prefix + Table_Name + "("  + column_str + "); "
    Create_table_SQL = Create_table_SQL.replace(", );",");") 
    return Create_table_SQL

data_folder =  Path('Y:\_Kaleida_Input\DailyAppointments')
filename = data_folder / 'July 2022.csv'
create_table_SQL = build_table_create_SQL(filename, 'Daily_Appointments','_DI]')
print("\n Historical Table" + create_table_SQL) 

In [None]:
df_import_directories.head()

In [None]:
#DROP the table Dynamically 
def drop_table_SQL(drop_SQL):
    global sql_connector
    print('Drop Table - Before SQL Connect - Call')
 #   logging.debug('Drop Table - Before SQL Connect - Call')
    cnxn = pyodbc.connect(sql_connector)
    cursor = cnxn.cursor()
    sql_execute_result = cursor.execute(drop_SQL)
    print('After SQL Call','Result Code: ',sql_execute_result)
 #   logging.debug('Drop Table - After SQL Connect - Call')    
    
    cnxn.commit()
    cursor.close()


In [None]:
#Execute SQL  Dynamically 
def execute_SQL(execute_SQL_command):
    global sql_connector
    print('Execute SQL Connect - Call')
    cnxn = pyodbc.connect(sql_connector)
    cursor = cnxn.cursor()
    sql_execute_result = cursor.execute(execute_SQL_command)
    print('After SQL Call','Result Code: ',sql_execute_result)
 #   logging.debug('Drop Table - After SQL Connect - Call')    
    
    cnxn.commit()
    cursor.close()

In [None]:
drop_SQL = 'DROP TABLE [pbic_1_0].[Access_DI]'

drop_table_SQL(drop_SQL)

In [None]:
def Create_Drop_Table_SQL(Table_Name):
    #drop_SQL = 'DROP TABLE [pbic_1_0].[' + Table_Name + ']'
    
    
    drop_SQL =  'DROP TABLE [pbic_1_0].[{}]'.format("'", Table_Name)
 
# IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[pbic_1_0].[Access_DI]') AND type in (N'U'))
# DROP TABLE [pbic_1_0].[Access_DI]    
    return drop_SQL

In [None]:
Drop_Table_SQL = Create_Drop_Table_SQL('Access_DI')

print(Drop_Table_SQL) 
#execute_SQL(Drop_Table_SQL)

In [None]:
drop_SQL = 'DROP TABLE [pbic_1_0].[Daily_Appointments_DI]'
drop_table_SQL(drop_SQL)

In [None]:
def read_and_clean_file(data_folder, filename):

    print('Import File =', filename)                 
    df_input_csv = pd.read_csv(filename, nrows=10)
    #print(df_input_csv.columns)
    df_input_csv.columns = df_input_csv.columns.map('^'.join)
    df_input_csv.columns  = [x.strip().title().replace("^","").replace(" ","_").replace("#","Number").replace("#","Number").replace("%","Percentage") \
                             .replace('_Unnamed','').replace('Unnamed','').replace('Unnamed:','').replace('_Level','').replace("$","Dollar") \
                             .replace('_1','').replace('_2','').replace('_3','').replace('_4','').replace('_5','')  \
                             .replace('_6','').replace('_7','').replace('_8','').replace('_9','')  \
                             .replace('1','').replace('2','').replace('3','').replace('4','').replace('5','')  \
                             .replace('6','').replace('7','').replace('8','').replace('9','').replace('0','')  \
                             .replace('_0','').replace(':7','').replace(':8','').replace(':','').replace('Unnamed: ','')  \
                             for x in df_input_csv.columns]

    print(df_input_csv.columns)


In [None]:
def build_Dataframe_table_create_SQL(dataframe_to_Create, table_name, table_Postfix  ):
    Table_Schema_Prefix = '[pbic_1_0].[' 
    Table_Name = table_name        # 'Daily_Appointments'
    Table_Name_Postfix = table_Postfix  # Passed in as '_DI]' or '_HX]'
    Table_Name = Table_Name + Table_Name_Postfix
    column_str = '' 
 

    df_input_csv = pd.read_csv(filename, nrows=10)
    number_of_columns = df_input_csv.shape[1]
    header_columns = df_input_csv.columns
    row1_columns = df_input_csv.iloc[0:1, : ]
    print('Header: ',header_columns)
    print('row1_columns: ',row1_columns)    

    
    df_input_csv.columns  = [x.strip().title().replace("Address 1","Street_Address").replace("Address 2","Address_Two") \
                             .replace("^","").replace("-","_").replace(" ","_").replace("#","Number").replace("#","Number") \
                             .replace("%","Percentage").replace('_Unnamed','').replace('Unnamed','') \
                             .replace('Unnamed:','').replace('_Level','').replace("$","Dollar") \
                             .replace('_1','').replace('_2','').replace('_3','').replace('_4','').replace('_5','')  \
                             .replace('_6','').replace('_7','').replace('_8','').replace('_9','')  \
                             .replace('1','').replace('2','').replace('3','').replace('4','').replace('5','')  \
                             .replace('6','').replace('7','').replace('8','').replace('9','').replace('0','')  \
                             .replace('_0','').replace(':7','').replace(':8','').replace(':','').replace('Unnamed: ','')  \
                             for x in df_input_csv.columns]
    
         
            
    #column_list = [x.strip().title().replace("^","") for x in df_input_csv.columns]
    for col in range(0,number_of_columns):
        #column_str = column_str + str(df_input_csv.columns[col]) + ' ' + str(df_input_csv.dtypes[col]) + ' NULL, ' 
        column_str = column_str + str(df_input_csv.columns[col]) + ' ' + str(df_input_csv.dtypes[col]) + ', ' 
    if table_Postfix == '_DI]':
        column_str = column_str.replace("object","nvarchar(255) ").replace("float64","nvarchar(255)  ").replace("int64","nvarchar(255)  ")
    if table_Postfix == '_HX]':
        column_str = column_str.replace("object","nvarchar(255) ").replace("float64","float  ").replace("int64","int  ")
        
    Create_table_SQL  = 'Create Table ' + Table_Schema_Prefix + Table_Name + "("  + column_str + "); "
    Create_table_SQL = Create_table_SQL.replace(", );",");") 
    return Create_table_SQL

data_folder =  Path('Y:\_Kaleida_Input\DailyAppointments')
filename = data_folder / 'July 2022.csv'
create_table_SQL = build_table_create_SQL(filename, 'Daily_Appointments','_DI]')
print("\n Historical Table" + create_table_SQL) 

In [None]:
def build_DROP_table_SQL(filename , table_name, table_Postfix ):
    Table_Schema_Prefix = '[pbic_1_0].[' 
    Table_Name = table_name        # 'Daily_Appointments'
    Table_Name_Postfix = table_Postfix  # Passed in as '_DI]' or '_HX]'
    Table_Name = Table_Name + Table_Name_Postfix
    DROP_table_SQL  = 'DROP Table ' + Table_Schema_Prefix + Table_Name  
    return DROP_table_SQL


In [None]:
data_folder =  Path('Y:\_Kaleida_Input\DailyAppointments')
filename = data_folder / 'July 2022.csv'
create_table_SQL = build_table_create_SQL(filename, 'Daily_Appointments','_DI]')

print("\nDaily Table Create SQL: \n" + create_table_SQL)

create_table_SQL = build_table_create_SQL(filename, 'Daily_Appointments','_HX]')

print("\nHistorical Table Create SQL: \n" + create_table_SQL)

In [None]:
# Y:/_Kaleida_Input/Access/2459652_467_20220313074723_dmhmreport_EHRSUPPORT_2179577.csv
data_folder =  Path('Y:\_Kaleida_Input\Access')
filename = data_folder / '2459652_467_20220313074723_dmhmreport_EHRSUPPORT_2179577.csv'
create_table_SQL = build_table_create_SQL(filename, 'Daily_Appointments')

print("\n" + create_table_SQL)     

In [None]:
# IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[pbic_1_0].[Daily_Appointments_DI]') AND type in (N'U'))
# DROP TABLE [pbic_1_0].[Daily_Appointments_DI]


drop_SQL = 'DROP TABLE [pbic_1_0].[Daily_Appointments_DI]'
drop_table_SQL(drop_SQL)

In [None]:
execute_SQL(create_table_SQL)
print("\n" + create_table_SQL)

In [None]:
# data_folder = Path('Y:\_Kaleida_Input\Access')
# filename = data_folder / '2459638_97_20220227013752_dmhmreport_EHRSUPPORT_5187581.csv'

data_folder =  Path('Y:\_Kaleida_Input\DailyAppointments')
filename = data_folder / 'July 2022.csv'
 
read_and_clean_file(data_folder,filename) 

In [None]:
 def create_table_headers(input_data_frame):
    global column_inserts
    global column_question_mark
    global create_table_SQL
    global create_real_table_SQL
    global insert_records_SQL
    global Table_Name_Prefix
    global Table_Name_Extension_Daily
    global Table_Name_Extension_Historical
    global create_schema_SQL
    global create_real_schema_SQL
        
    Table_Name = 'Access'
    sample_row = 3
    create_table_SQL = ''
    create_real_table_SQL = ''
    insert_records_SQL = ''
    Table_Name_Daily = Table_Name_Prefix + Table_Name + Table_Name_Extension_Daily
    Table_Name_Historical = Table_Name_Prefix + Table_Name + Table_Name_Extension_Historical
    df_cols = input_data_frame.columns
    df_types = input_data_frame.dtypes
    col_number = 0
    column_creates = ''
    column_values = ''
    column_inserts = ''
    real_column_creates = ''
    column_question_mark = ''
    for column_name in df_cols:
        col_number = col_number + 1


        if df_types[col_number-1] == 'object':
            sql_column_type = 'Varchar(255)'
        elif df_types[col_number-1] == 'float64': 
            sql_column_type = 'Varchar(255)'
        else:
            sql_column_type = 'Varchar(255)'

        if df_types[col_number-1] == 'object':
            real_sql_column_type = 'Varchar(255)'
        elif df_types[col_number-1] == 'float64': 
            real_sql_column_type = 'Varchar(255)'
        else:
            real_sql_column_type = 'Varchar(255)'                
                

        column_name = column_name.title()
        column_name = column_name.replace(' ','_')
        column_name = column_name.replace('#','Number')
        column_inserts = column_inserts + column_name
        column_value = str(input_data_frame.iloc[sample_row,col_number-1])
        column_creates = column_creates + column_name + " "  + sql_column_type
        real_column_creates = real_column_creates + column_name + " "  + real_sql_column_type
        column_values = column_values + "'" + column_value + "'"
        print(col_number, '  ', column_name)
            
    insert_records_SQL = 'INSERT INTO ' + Table_Name_Daily + '  (' + column_inserts + ') + VALUES (' + column_values + '); '
    create_table_SQL = 'CREATE TABLE ' + Table_Name_Daily + '  (' + column_creates + '); '
    create_real_table_SQL = 'CREATE TABLE ' + Table_Name_Historical + '  (' + real_column_creates + '); '
    create_schema_SQL = create_schema_SQL + create_table_SQL
    create_real_schema_SQL = create_real_schema_SQL + create_real_table_SQL
    #logging.debug('Table Create Finished')
     

In [None]:
#filename = 'Y:\_Kaleida_Input\Access\2459638_97_20220227013752_dmhmreport_EHRSUPPORT_5187581.csv'
start_time1 = time.time()

data_folder = Path('Y:\_Kaleida_Input\Access')
filename = data_folder / '2459638_97_20220227013752_dmhmreport_EHRSUPPORT_5187581.csv'

print('Import File =', filename)                 
df_input_csv = pd.read_csv(filename, nrows=10, header=[0,1])
df_input_csv.columns = df_input_csv.columns.map('_'.join)
create_table_headers(df_input_csv) 


#print('\n' + ' column_inserts:  ', column_inserts, '\n') 
#print('\n' + 'column_question_mark:  ', column_question_mark, '\n') 
#print('\n' + 'insert_records_SQL:  ', insert_records_SQL, '\n')
print('\n' + 'create_table_SQL:  ', create_table_SQL, '\n')
print('\n' + 'create_real_table_SQL:  ', create_real_table_SQL, '\n')

# logging.debug('Table Create Finished')
end_time2 = time.time()
print(f'{start_time1-end_time2:.5f}')


In [None]:
df_input_csv.head(2)

In [None]:
dailyappointment_df = pd.read_csv(r'C:\DailyAppointment_A_J_Test\Main 3.1 to 9.1.xlsx', low_memory = False, header = [0,1])
Main 3.1 to 9.1.xlsx

Z:\GPPC_SOURCE_FILES\Oneday_data_04072022\Oneday_data\Available_Slots
dailyappointment_df.columns = dailyappointment_df.columns.map('_'.join)
dailyappointment_df = dailyappointment_df.fillna(0)
dailyappointment_df['Unnamed: 3_Appt Length'] = dailyappointment_df['Unnamed: 3_Appt Length'].astype(int)
#...
engine = sqlalchemy.create_engine(
               "mssql+pyodbc://gppc:Elephant-Trunk-06@Kalpwvsqlgppc01/GPPC_DEV?DRIVER={ODBC Driver 17 for SQL Server}",
               echo=False)
# # df = pd.read_sql_query('SELECT * FROM pbic_1_0.Access',conn)
import time 
start_time1 = time.time()
#dailyappointment_df.to_sql('dailyappointment_test', con=engine, if_exists='replace')
end_time2 = time.time()
print(f'{start_time1-end_time2:.5f}')

In [None]:
start_time1 = time.time()
# Available_Slots_df = pd.read_excel(r'Z:\GPPC_SOURCE_FILES\Oneday_data_04072022\Oneday_data\Available_Slots\Main 2.28 to 8.28.xlsx' )
# Available_Slots_df = pd.read_excel(r'Z:\GPPC_SOURCE_FILES\Oneday_data_04072022\Oneday_data\Available_Slots\Main 3.1 to 9.1.xlsx' )
#Available_Slots_df = pd.read_excel(r'Z:\GPPC_SOURCE_FILES\Oneday_data_04072022\Oneday_data\Available_Slots\Main 3.2 to 9.2.xlsx' )

#Available_Slots_df = pd.read_excel(r'Z:\GPPC_SOURCE_FILES\Oneday_data_04072022\Oneday_data\Available_Slots\Sub II 2.28 to 8.28.xlsx' )
#Available_Slots_df = pd.read_excel(r'Z:\GPPC_SOURCE_FILES\Oneday_data_04072022\Oneday_data\Available_Slots\Sub II 3.1 to 9.1.xlsx' )
#Available_Slots_df = pd.read_excel(r'Z:\GPPC_SOURCE_FILES\Oneday_data_04072022\Oneday_data\Available_Slots\Sub II 3.2 to 8.2.xlsx' )

# Available_Slots_df = pd.read_excel(r'Z:\GPPC_SOURCE_FILES\Oneday_data_04072022\Oneday_data\Available_Slots\Sub III 2.28 to 8.28.xlsx' )
#Available_Slots_df = pd.read_excel(r'Z:\GPPC_SOURCE_FILES\Oneday_data_04072022\Oneday_data\Available_Slots\Sub III 3.1 to 9.1.xlsx' )
#Available_Slots_df = pd.read_excel(r'Z:\GPPC_SOURCE_FILES\Oneday_data_04072022\Oneday_data\Available_Slots\Sub III 3.2.xlsx' )

Available_Slots_df = pd.read_excel(r'Z:\GPPC_SOURCE_FILES\Oneday_data_04072022\Oneday_data\Available_Slots\Main 3.1 to 9.1.xlsx' )
Available_Slots_df.rename(columns={'Doctor Name' : 'Doctor_Name','Loc UId' : 'Loc_UId'}, inplace = True)
 
shape = Available_Slots_df.shape
print('\nDataFrame Shape :', shape)
print('\nNumber of rows :', shape[0])
print('\nNumber of columns :', shape[1])

# logging.debug('Table Create Finished')
end_time2 = time.time()
# row_count = Available_Slots_df.shape[1]
file_read_time = end_time2-start_time1
print(' Rows Count:{}'.format(row_count) )

print('Read raw file to Pandas Read Time',f'{file_read_time :.5f}')
print(' Rows per second:',str(row_count/execute_time) )

create_table_headers(Available_Slots_df)
 


In [None]:
# Get a list of all the Subfiles to iterate through 
def list_all_csv_files(path):
    
    extension = 'xlsx'
    os.chdir(path)
    print('CSV Files to Import from Directory:', path)
    csv_file_count = 0
    for file in glob.glob('*.{}'.format(extension)):
        csv_file_count += 1 
        print('File',str(csv_file_count),": ", file)
   
        
list_all_csv_files('Z:/GPPC_SOURCE_FILES/Oneday_data4_1_22/Available_Slots/')   

 

In [None]:
constring = "mssql+pyodbc://gppc:Elephant-Trunk-06@Kalpwvsqlgppc01/GPPC_DEV?DRIVER={ODBC Driver 17 for SQL Server}"  
engine = sqlalchemy.create_engine(constring,fast_executemany=True,echo=False)

start_time1 = time.time()


Available_Slots_df.to_sql('Available_Slots', con=engine, if_exists="append",index=False,chunksize=20000, dtype =  
                             {'datefld': sqlalchemy.DateTime(), 
                             'intfld':  sqlalchemy.types.INTEGER(),
                             'strfld': sqlalchemy.types.NVARCHAR(length=255),
                             'floatfld': sqlalchemy.types.Float(precision=3, asdecimal=True),
                             'booleanfld': sqlalchemy.types.Boolean,
                             'bool' : sqlalchemy.types.Boolean,
                             'float64' : sqlalchemy.types.NVARCHAR(length=255),
                             'int64' : sqlalchemy.types.INTEGER(),
                             'object' : sqlalchemy.types.NVARCHAR(length=50000)})


# shape = Available_Slots_df.shape
# print('\nDataFrame Shape :', shape)
# print('\nNumber of rows :', shape[0])
# print('\nNumber of columns :', shape[1])
 
# logging.debug('Table Create Finished')
end_time2 = time.time()
# row_count = Available_Slots_df.shape[1]
execute_time = end_time2-start_time1
print(' Rows Count:{}'.format(row_count) )

print('SQL Insert Execution Time',f'{execute_time :.5f}')
print(' Rows per second:',str(row_count/execute_time) )



In [None]:
create_table_headers(Available_Slots_df)

In [None]:
CREATE TABLE [pbic_1_0].[Available_Slots](
	[Date] [nvarchar](20) NULL,
	[Day] [nvarchar](50) NULL,
	[Time] [nvarchar](20) NULL,
	[Length] [int] NULL,
	[Dr] [nvarchar](50) NULL,
	[Doctor_Name] [nvarchar](max) NULL,
	[Loc] [nvarchar](50) NULL,
	[Loc_UId] [nvarchar](50) NULL,
	[Type] [nvarchar](50) NULL
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO

In [None]:
Available_Slots_df.info 

In [None]:
constring = "mssql+pyodbc://gppc:Elephant-Trunk-06@Kalpwvsqlgppc01/GPPC_DEV?DRIVER={ODBC Driver 17 for SQL Server}"  
engine = sqlalchemy.create_engine(constring,fast_executemany=True,echo=False)

df.to_sql('Available_Slots', con=engine, if_exists="append",index=False,chunksize=1000, dtype =  
                             {'datefld': sqlalchemy.DateTime(), 
                             'intfld':  sqlalchemy.types.INTEGER(),
                             'strfld': sqlalchemy.types.NVARCHAR(length=255),
                             'floatfld': sqlalchemy.types.Float(precision=3, asdecimal=True),
                             'booleanfld': sqlalchemy.types.Boolean,
                             'bool' : sqlalchemy.types.Boolean,
                             'float64' : sqlalchemy.types.NVARCHAR(length=255),
                             'int64' : sqlalchemy.types.INTEGER(),
                             'object' : sqlalchemy.types.NVARCHAR(length=50000)})

In [None]:
#Insert a row of values 
def insert_row_SQL(insert_row_SQL):
    global server #= 'Kalpwvsqlgppc01' 
    global database #database = 'GPPC_DEV' 
    global username # =  'GPPC'
    global pwd # ='Elephant-Trunk-06'
    cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER='+server+';DATABASE='+database+';Trusted_Connection=No;UID='+username+';PWD='+pwd)
    cursor = cnxn.cursor()
    sql_execute_result = cursor.execute(insert_row_SQL)
    print('After SQL Call','Result Code: ',sql_execute_result)
    cnxn.commit()
    cursor.close()

In [None]:
#Insert a row of values 
def insert_row_SQL(insert_row_SQL):
    global server #= 'Kalpwvsqlgppc01' 
    global database #database = 'GPPC_DEV' 
    global username # =  'GPPC'
    global pwd # ='Elephant-Trunk-06'
    cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER='+server+';DATABASE='+database+';Trusted_Connection=No;UID='+username+';PWD='+pwd)
    cursor = cnxn.cursor()
    sql_execute_result = cursor.execute(insert_row_SQL)
    print('After SQL Call','Result Code: ',sql_execute_result)
    cnxn.commit()
    cursor.close()

In [None]:
drop_table_SQL = 'DROP TABLE [pbic_1_0].[Access_DI]'
drop_table_SQL(drop_table_SQL)

In [None]:
drop_table_SQL = 'DROP TABLE [pbic_1_0].[dailyappointment_test]'
drop_SQL_table(drop_table_SQL)

In [None]:
constring = "mssql+pyodbc://gppc:Elephant-Trunk-06@Kalpwvsqlgppc01/GPPC_DEV?DRIVER={ODBC Driver 17 for SQL Server}"  
engine = sqlalchemy.create_engine(constring,fast_executemany=True,echo=False)

df.to_sql('Hx', con=engine, if_exists="append",index=False,chunksize=1000, dtype = 
{'datefld': sqlalchemy.DateTime(), 
'intfld': sqlalchemy.types.INTEGER(),
'strfld': sqlalchemy.types.NVARCHAR(length=255),
'floatfld': sqlalchemy.types.Float(precision=3, asdecimal=True),
'booleanfld': sqlalchemy.types.Boolean,
'bool' : sqlalchemy.types.Boolean,
'float64' : sqlalchemy.types.NVARCHAR(length=255),
'int64' : sqlalchemy.types.INTEGER(),
'object' : sqlalchemy.types.NVARCHAR(length=50000)})

In [None]:
insert_SQL = create_insert_row(df_input_csv, 3, 'Access_DI')
print('Insert SQL: ', insert_SQL)

In [None]:
create_SQL_table(create_table_SQL)

In [None]:
global Process_Name 

today = date.today()

start_time1 = time.time() 
time.sleep(3)
end_time2 = time.time() 
execute_time = end_time2-start_time1
add_log_event(Process_Name,'Starting Import Process',date.today(),datetime.now(),start_time1,end_time2, execute_time , "Starting Import Process")
start_time1 = time.time() 
time.sleep(2)
end_time2 = time.time() 
execute_time = end_time2-start_time1
add_log_event(Process_Name,'Reading the CSV files',date.today(),datetime.now(),start_time1,end_time2, execute_time , "Reading the CSV filesS")
start_time1 = time.time() 
time.sleep(1)
end_time2 = time.time() 
execute_time = end_time2-start_time1
add_log_event(Process_Name,'Writing to SQL Server',date.today(),datetime.now(),start_time1,end_time2, execute_time , "Writing to SQL Server")
start_time1 = time.time() 
time.sleep(2)
end_time2 = time.time() 
execute_time = end_time2-start_time1
add_log_event(Process_Name,'Import Process END ',date.today(),datetime.now(),start_time1,end_time2, execute_time , "Import Process END")

df_e_log.head() 

In [None]:
insert_SQL = create_insert_row(df_input_csv, 3, 'Access_DI')
print('Insert SQL: ', insert_SQL)

In [None]:
def insert_rows(insert_SQL):
    

In [None]:
#Identify the parent directory for all the data subdirectories 
parent_dir = 'Y:/_Kaleida_Input/' #path to folder that contians the data folders

path = parent_dir
import_file_type = '\*.csv'
create_table_SQL = ''
insert_records_SQL = '' 
create_schema_SQL = '' 
column_inserts  = ''
column_question_mark  = '' 

In [None]:
def iterate_import_files(data_directory_path, import_file_type ):
    all_files = glob.glob(data_directory_path + import_file_type)
    return all_files


In [None]:
insert_SQL = create_insert_row(df_input_csv, 3, 'Access_DI')
   ...: print('Insert SQL: ', insert_SQL)
   ...: insert_row_SQL(insert_SQL)

In [None]:
 create_SQL_table(create_table_SQL)

In [None]:
def create_table_headers(input_data_frame):
    global column_inserts  
    global column_question_mark  
    global create_table_SQL 
    global insert_records_SQL 

    Table_Name = 'Access'
    df_cols = input_data_frame.columns
    df_types = input_data_frame.dtypes 
    col_number = 0 
    column_inserts = '' 
    column_creates = '' 
    column_question_mark = '' 
    for column_name in df_cols:
        col_number = col_number + 1
        if len(column_inserts) > 1:
            column_inserts = column_inserts + ", "
        if len(column_creates) > 1:
            column_creates = column_creates + ", "     
        if df_types[col_number-1] == 'object':
            sql_column_type = 'Varchar(255)'
        elif df_types[col_number-1] == 'float64':  
            sql_column_type = 'Varchar(255)' 
        else:
            sql_column_type = 'Varchar(255)' 
        column_name = column_name.title()
        column_name = column_name.replace(' ','_')
        column_name = column_name.replace('#','Number')
        column_inserts = column_inserts + column_name 
        
        column_creates = column_creates + column_name + " "  + sql_column_type 
        column_question_mark = column_question_mark + "?, "
        print(col_number, '  ', column_name) 
    #print('column_inserts:  ', column_inserts) 
    #print('column_question_mark:  ', column_question_mark) 
    print('column_creates:  ', column_creates)  

    insert_records_SQL = 'INSERT INTO ' + Table_Name + '(' + column_inserts + ') VALUES (' + column_question_mark + ');' 
    create_table_SQL = 'CREATE TABLE ' + Table_Name + '(' + column_creates + ');' 
    


In [None]:
df_input_csv.dtypes

In [None]:
%who str


In [None]:
all_files = iterate_import_files('Y:\_Kaleida_Input\Access','\\*.csv')
# print(all_files[1])
for filename in all_files:
    print(filename)
    df = pd.read_csv(filename, nrows=10)
df.head()


In [None]:
all_files = glob.glob('C:\Data\Behavioral Health'+ '\*.csv')
print(all_files)
print(all_files[1])

In [None]:
#remove all csv files from dir and unzip folder
parent_dir = 'C:/Power BI/' #path to folder
path = parent_dir

#get csv list
for file in os.listdir(path):
    if file.endswith('.csv'):
        os.remove(path+file)    
        
with ZipFile(path+'PowerBiDownload.zip', 'r') as zipObj:
   zipObj.extractall(path)   

#remove files
#remove files not in list
csv_path = r'S:\Data Team\Source Data\python sql\needed tables.csv'
ext = ".csv"
with open(csv_path, 'r') as csvfile:
    good_files = []
    for n in csv.reader(csvfile):
        if len(n) > 0: good_files.append(n[0])
    all_files = os.listdir(path)
    for filename in all_files:
        if filename.endswith(ext) and filename not in good_files:
            full_file_path = os.path.join(path, filename)
            os.remove(full_file_path)

print('Old files removed, new files unzipped')

In [None]:
#Pull in helper tables, covert to csv and delete old helper tables
with open('S:/Data Team/Source Data/python sql/helper tables.csv', newline='', encoding='utf-8-sig') as csvfile:
    linereader = csv.reader(csvfile, delimiter=',')
    for row in linereader:
        name = row[0]
        shutil.copy(name, 'C:\Power BI\\' + os.path.basename(name))
                
print('All Helper Tables Moved')

searchdir = 'C:\Power BI\\'

for xls_file in glob.glob(os.path.join(searchdir,"*.xlsx")):
    data_xls = pd.read_excel(xls_file, index_col = None)
    csv_file = os.path.splitext(xls_file)[0]+".csv"
    data_xls.to_csv(csv_file, encoding = 'utf-8', index = False)
    
print('All Helper Tables Changed to CSV')

df_pipe = pd.read_csv('C:/Power BI/hec daily.txt', delimiter = '|', index_col = None, header = None, on_bad_lines='skip')

df_pipe.to_csv('C:/Power BI/hec daily.csv', sep = ',', header = False, index = False)    
    
print('HeC Daily Converted to CSV')

pathtodelete =r"C:\Power BI"
filenames_xlsx = glob.glob(pathtodelete + "/*.xlsx")
for i in filenames_xlsx:
    os.remove(i)
    
filenames_txt = glob.glob(pathtodelete + "/*.txt")
for k in filenames_txt:
    os.remove(k)    
    
print('Old Helper Tables Removed')

In [None]:
#rename files longer than >=63 char
for filename in os.listdir(path):
    if len(filename) > 63:
        os.rename(path+filename, path+filename[-60:])
        print(filename+' renamed to '+filename[-60:])
            
#get csv list
csv_files = []
for file in os.listdir(path):
    if file.endswith('.csv'):
        csv_files.append(file)
        
data_path = path
#create dataframes
df = {}
for file in csv_files:
    try:
        df[file] = pd.read_csv(data_path+file, low_memory=False, index_col=False)
        
    except UnicodeDecodeError:
        df[file] = pd.read_csv(data_path+file, encoding="cp437", low_memory=False, index_col=False, errors='ignore')
    
    print('Loading ' + file + ' into dataframe')    
print('loading completed')

In [None]:
for k in csv_files:
    
    dataframe = df[k]
    
    clean_tbl_name = k.lower().replace(" ","_").replace("-","").replace(".", "_").replace("(", "").replace(")", "").replace(",", "")\
    .replace("_csv", "").replace("___", "_").replace("__", "_")    
    
    tbl_name = clean_tbl_name
    
    print(k + ' changing to ' + clean_tbl_name)

    #clean column names
    dataframe.columns = [x.lower().replace(" ", "_").replace("-", "").replace("#","num").replace("?", "")\
                     .replace("=","").replace("\n","").replace("\r","").replace("\r\n","").replace("]","_")\
                     .replace("]","_").replace("[","_").replace("\\","_").replace(".","_").replace("$","")\
                     .replace("%","").replace("#","").replace("(","").replace(")","").replace("?","")\
                     .replace(",","").replace("*","").replace(":","").replace("'","").replace("&","")\
                     .replace(";","").replace("__", "_").replace("/", "")
                     for x in dataframe.columns]

     #limit column length to 64 and reading right to left
    dataframe.columns = dataframe.columns.str[-60:] 

     #adding a number if duplicated column name
    def uniquify(dataframe):
        seen = set()

        for item in dataframe:
            fudge = 1
            newitem = item

            while newitem in seen:
                fudge += 1
                newitem = "{}_{}".format(item, fudge)

            yield newitem
            seen.add(newitem)

    dataframe.columns = uniquify(dataframe)

    dataframe.columns = dataframe.columns.str[-60:] 
    
    #db settings and connection
    #get password
    f=open("S:/Data Team Secure/secrets/postgres.txt","r")
    lines=f.readlines()
    password=lines[1]
    f.close()
    
   
    user="Joes_User_Name"
    host = 'Joes_Host_Name'
    dbname = 'postgres'
        
    engine = create_engine('postgresql://'+user+':'+password+'@'+host+'/'+dbname)
   
    #print('opened database successfully')
    
    #create table
    #dataframe.to_sql(k, engine, schema = None, if_exists='append', index=False, dtype = 'text')
    dataframe.to_sql(clean_tbl_name, engine, schema = None, if_exists='append', index=False, dtype =  
                             {'datefld': sqlalchemy.DateTime(), 
                             'intfld':  sqlalchemy.types.INTEGER(),
                             'strfld': sqlalchemy.types.NVARCHAR(length=255),
                             'floatfld': sqlalchemy.types.Float(precision=3, asdecimal=True),
                             'booleanfld': sqlalchemy.types.Boolean,
                             'bool' : sqlalchemy.types.Boolean,
                             'float64' : sqlalchemy.types.NVARCHAR(length=255),
                             'int64' : sqlalchemy.types.INTEGER(),
                             'object' : sqlalchemy.types.NVARCHAR(length=50000)})
    
    print(clean_tbl_name+' uploaded to database')
    
print('All uploads complete')

## for automation, send email to email list when complete
#get email and file list
email_list = pd.read_csv('S:/Data Team/Source Data/python sql/email_db_upload.csv')
emails = email_list['email']

# email loop
for i in range(len(emails)):
    
    email = emails[i]
    
    # Open the Outlook
    outlook = win32.Dispatch('outlook.application')

    # Create the email
    mail = outlook.CreateItem(0)

    # Set the email subject
    mail.Subject = 'AUTOMATED EMAIL: Database Updated '+ datetime.now().strftime('%b %#d %Y %H:%M')

    # Set the receiver email
    mail.To = email

    # Write the email content
    mail.HTMLBody = r"""
    <p>Hello</p>
    <p>The database has had been updated successfully.</p>
    <p>Thanks</p>
    <p>The Data Team</p>
    """

    # Send the email
    mail.Send()
    print('Email sent to ' + email)
print('All Emails Processed')

In [None]:
filepath = 'J:/OPA/GLIN Reporting Data Services Output/GPPC Scorecards/Sample report structure and colors.xlsx'

 
File_Size = os.path.getsize(filepath)
File_Last_Modified =  time.ctime(os.path.getmtime(filepath))
File_Create_Date =  time.ctime(os.path.getctime(filepath))
print('File Size ',File_Size)
print('File Modified: ',File_Last_Modified)
print('File Created: ',File_Create_Date)

