### Import Statements:

In [44]:
import os
import datetime
from datetime import datetime
import csv
from csv import reader, writer, DictWriter
import shutil
from shutil import copyfile
import glob

### Establishing local directory path to Winter Turf SD Card folders:

In [45]:
# Enter local directory path to base of working folder, and folder of SD card info inside repo, here:

base_of_folder = r'/Users/michaelfelzan/Desktop/WTSB6'

SD_folder = r'/Users/michaelfelzan/Desktop/WTSB6/2021-2022-SD-Cards'

os.chdir(SD_folder)
os.getcwd()

'/Users/michaelfelzan/Desktop/WTSB6/2021-2022-SD-Cards'

##### (printing names of all SD card folders at base of directory):

In [46]:
node_folders = []

for nodefoldername in os.listdir():
    if nodefoldername != '.DS_Store':
        node_folders.append(nodefoldername)
        
for folder in node_folders:
    print(folder)

Winter_Turf_Type_B_-_6
Winter_Turf_Type_B_-_1
Winter_Turf_Type_A_-_7
Winter_Turf_Type_A_-_8
Winter_Turf_Type_A_-_10
Winter_Turf_Type_A_-_28
Winter_Turf_Type_A_-_17
Winter_Turf_Type_A_-_21
Winter_Turf_Type_A_-_19
Winter_Turf_Type_A_-_26
Winter_Turf_Type_A_-_18
Winter_Turf_Type_A_-_27
Winter_Turf_Type_A_-_20
Winter_Turf_Type_A_-_16
Winter_Turf_Type_A_-_34
Winter_Turf_Type_B_-_12
Winter_Turf_Type_A_-_35
Winter_Turf_Type_B_-_5
Winter_Turf_Type_A_-_3
Winter_Turf_Type_A_-_2
Winter_Turf_Type_A_-_5
Winter_Turf_Type_B_-_4
Winter_Turf_Type_A_-_14
Winter_Turf_Type_A_-_22
Winter_Turf_Type_A_-_23
Winter_Turf_Type_A_-_24
Winter_Turf_Type_A_-_12
Winter_Turf_Type_A_-_15
Winter_Turf_Type_B_-_11
Winter_Turf_Type_A_-_31
Winter_Turf_Type_A_-_36


### Defining 'stand-alone' functions:

In [47]:
def ListDirNoDSstore(path):
    """This function simply lists the contents of a
    directory path, though leaves out the '.DS_Store' file.
    
    Parameters
    ------------
    path: 'raw' str (path to directory)
    """
    rawcontents = os.listdir(path)
    contents = []
    for item in rawcontents:
        if item != '.DS_Store':
            contents.append(item)
    
    return contents



def LogInfoGetter(txtlog):
    """This function is utilized by some attributes/sub-functions
    of the 'SubFolder()' class. The function parses the text file (log)
    which is inputted, determines which rows are headers (column names)
    vs. data, and returns a dict containing info about the .txt log.
    
    Info in the returned dict include:
    'ColNames': a list, with all column names for the log in sequential order
               (if the log has no column names, generic names Col_1, Col_2, etc.
               are created for the log)
    'Data':  The rows of the .txt which represent data (not headers); list
    'AllLines': All rows of the data (headers and data) in list form
    'HeaderIndicies': indicies which represent headers in the 'All Lines' list
    'LogDate': date of the log (eg. 2021-09-17)
    
    Parameters
    ------------
    txtlog: 'raw' str (path to .txt file log)
    """
    
    with open(txtlog, "r") as file_object:
        Lines = file_object.readlines()
    
        cleanlines = [] # all lines, sans '\n'
        header_indicies = [] 
        column_names = []
        row_entries = [] # data, corresponding to col. names
    
        for item in Lines:
            cleanlines.append(item.split('\n')[0])
        
        for line in cleanlines:
            lineindex = cleanlines.index(line)
            splitbycomma = line.split(',') #splitting each line in txt by commas
        
            # If a line in the .txt doesnt have a comma, it is classified
            # as a header
            try:
                splitbycomma[1]
            except:
                header_indicies.append(lineindex)
            
            # If the first entry of a line can't be converted to a 'float'
            # (column names all assummed to contain letter characters), then
            # it is classified as a header.
            try:
                float(splitbycomma[0])
            except:
                if lineindex not in header_indicies:
                    header_indicies.append(lineindex)

        for line in cleanlines:
            lineindex = cleanlines.index(line)   # getting index no.'s of all lines
            if lineindex not in header_indicies: # if line is not a header:
                linechunks = line.split(',')     # split by comma, and
                row_entries.append(linechunks)   # append each chunk into 'row entries'
        
        try:
            row_entries[0]
        except:
            return('error: no data in .txt') # this is returned if .txt is blank
        
        # If .txt actually has data:
        else:
            len_first_entry = len(row_entries[0]) # getting length of col's in first row,
            for entry in row_entries:             # to confirm all other rows have the
                if len_first_entry != len(entry): # same length as the first row.
                    result = False
                    print("All elements are not equal")
                    break
    
            # A Trigger variable for a 'detected header fit' is established here;
            # the variable is effictively switched back to 'False' every time a 
            # new .txt file is opened.
            header_fit_detected = False 
            for headerindex in header_indicies:
                headeritem_w_commasplit = (((cleanlines[headerindex]).split(',')))
                len_of_header = len(headeritem_w_commasplit)
                # if the no. columns of header contender is same as len of first entry,
                # AND the trigger isn't already set to 'True'....
                # ...then the contents of header are assigned as the column names,
                # and trigger is set to 'True' (errors out if there are 2 poss. matches)
                if len_of_header == len_first_entry:
                    if header_fit_detected == True:
                        print(f"ERROR -- two possible matches for headers detected for {txtlog}")
                        break
                    else:
                        header_fit_detected = True
                        column_names = headeritem_w_commasplit
            
            # After all of that, if the header_fit_detected is still set to 'False',
            # then .txt log is determined to have no header which represents col. names,
            # and generic column names are created for the dataset
            if header_fit_detected == False:
                #print("no header detected for x; columns named numerically")
                for i in range(len_first_entry):
                    column_names.append(f'Col_{i}') #Col_1, Col_2, Col_3, etc.

            # (parsing .txt dir path for just the 'date' (eg. 2021-07-16))
            firstnamesplit = txtlog.split('/Logs/')[1]
            secondnamesplit = firstnamesplit.split('/Log')[1]
            thirdnamesplit = secondnamesplit.split('.txt')[0]
            
            nodefolder_split1 = txtlog.split('/GEMS/')[0]
            nodefolder_split2 = nodefolder_split1.split(f"{SD_folder}" + r"/")[1]
        
            # Two new empty lists are created here, which will store the data collected
            # by this function, PLUS the NODE_ID (eg. e00fce68816c2bc59976cdf2) and the
            # log date (2000-01-01), so this may be stored on .CSV
            data_w_nodeID_logdate_appended = []
            colnames_w_nodeID_logdate_appended = []
        
            for datapiece in row_entries:
                iterdatapiece = []
                subfoldname_split1 = txtlog.split('/GEMS/')[1]
                subfoldname = subfoldname_split1.split('/Logs/')[0]
                iterdatapiece.append(nodefolder_split2)   # appending device name,
                iterdatapiece.append(subfoldname)    # then node ID,
                iterdatapiece.append(thirdnamesplit) # then log date,
                for number in datapiece:             # then every data number 
                    iterdatapiece.append(number)     # within row 
                data_w_nodeID_logdate_appended.append(iterdatapiece)
            
            colnames_w_nodeID_logdate_appended.append('DEVICE_NAME')
            colnames_w_nodeID_logdate_appended.append('NODE_ID') # appending column names
            colnames_w_nodeID_logdate_appended.append('OG_FILENAME')# for this data as well.
            for colname in column_names:
                colnames_w_nodeID_logdate_appended.append(colname)
    
            return {
                'ColNames': colnames_w_nodeID_logdate_appended,
                'Data':  data_w_nodeID_logdate_appended,
                'AllLines': cleanlines,
                'HeaderIndicies': header_indicies,
                'LogDate': thirdnamesplit
                   }

In [48]:
class NodeFolder:
    """This class gathers the properties of a 'Node Folder'
    eg. 'Winter_Turf_Type_A_-_2', so that the various info
    associated with one of these folders may be accessed
    by calling the modules associated with the created 
    class object.
    
    Attributes
    ------------
    - nodename (str): Name of the 'Node' 
            eg. Winter_Turf_Type_A_-_2

    Modules
    ------------
    - self.gems_folderpath : retrieves path to /GEMS/ dir
    - self.node_subfolders : provides list containing 
            names of 'subfolders' associated with each
            Node. (eg. e00fce68816c2bc59976cdf2)
    - self.no_of_subfolders : lists number of subfolders
            within each node folder (most have 1; though 
            some have 2.)
    """
    def __init__(self, nodename):
        self.nodename = nodename
        
        gemsfolderpath = os.path.join(SD_folder,nodename,'GEMS')
        nodesubfolders = ListDirNoDSstore(gemsfolderpath)
            
        self.gems_folderpath = gemsfolderpath
        self.node_subfolders = nodesubfolders
        self.no_of_subfolders = len(nodesubfolders) 
        
class SubNodeFolder:
    """This class gathers the properties of a 'Sub-Node 
    Folder' eg. 'e00fce68816c2bc59976cdf2', so that the 
    info associated with one of these subfolders may be 
    accessed by calling the modules associated with the 
    class object. 
    Additionally, this class includes two
    functions -- one which gathers all log .txt paths 
    within a subfolder, and another which incorporates
    the standalone 'LogInfoGetter()' function to create
    a .csv file(s) associated with all .txt logs within
    a subfolder.
    
    Attributes
    ------------
    - subfoldername (str) : eg. e00fce68816c2bc59976cdf2
    - nodename (str) : eg. Winter_Turf_Type_A_-_2
    - gemsfolderpath (raw str) eg. r'/Users/mf/Desktop/
            SD_Cards/Winter_Turf_Type_A_-_23/GEMS'

    Modules
    ------------
    - self.subfolder_path : path to subfolder on local dir
    - self.subfolder_contents : list of items (folders 
            and files) within subfold
    - self.path_to_logs : path to /Logs/ dir within subfold
    - self.logs_contents : list of items within /Logs/
    - self.years_as_ints : list of folders named by years
        within /Logs/ eg. [2000, 2020, 2021]
    - self.year_folders : year folders as strings
    
    Functions (descriptions for functions below)
    ------------
    - LogTextPathsRetriever(self)
    - Log_CSV(self)
    
    """
    def __init__(self, subfoldername, nodename, gemsfolderpath):
        self.subfoldername = subfoldername
        self.nodename = nodename
        self.gemsfolderpath = gemsfolderpath
        
        subfolderpath = os.path.join(gemsfolderpath, subfoldername)
        subfoldercontents = ListDirNoDSstore(subfolderpath)
        
        path_to_logs = os.path.join(subfolderpath, 'Logs')
        logs_contents = ListDirNoDSstore(path_to_logs)
        
        self.subfolder_path = subfolderpath  
        self.subfolder_contents = subfoldercontents
        self.path_to_logs = path_to_logs
        self.logs_contents = logs_contents
        
        # (may need to be updated if different 'non-year' folder items
        # are encountered)
        non_year_items = ['UnsentLogs.dat', 'Header.dat', 'UnsentLogs.txt']
        year_folders = []
        years_as_ints = []
        
        for content in self.logs_contents:
            if content not in non_year_items:
                # in case this script has already printed .csv logs in folder:
                if '.csv' not in content:
                    years_as_ints.append(int(content)) 
                    # year folder names converted to integers so they may be
                    # sorted chronologically:
        years_as_ints.sort()
        
        for intyear in years_as_ints:
            year_folders.append(str(intyear)) # sorted year folds as str's
            
        self.years_as_ints = years_as_ints
        self.year_folders = year_folders
 

    def LogTextPathsRetriever(self):
        """This function may be called on a 'SubFolder' object to retrieve
        a list of dir paths to each .txt file log for that subfolder.
        
        The .txt log paths are sorted chronologically, with the exception
        that a .txt log is named with an 'invalid date' (eg. 2000-00-01.txt;
        2021-11-17.1.txt). Logs named with invalid dates are appended at
        ~the start of the list~, followed by logs with valid date names.
        NOTE: this may be subject to change...

        """
        textlogpaths = []
        chronological_dates = []
        nonreal_dates = []
        chronological_textlogpaths = []
        
        for yearfold in self.year_folders:
            iterloglist = ListDirNoDSstore(os.path.join(
                self.path_to_logs,
                yearfold))
            for textfilename in iterloglist:
                textlogpaths.append(os.path.join(
                    self.path_to_logs,
                    yearfold,
                    textfilename))
        
        for path in textlogpaths:
            firstnamesplit = path.split('/Logs/')[1]
            secondnamesplit = firstnamesplit.split('/Log')[1]
            thirdnamesplit = secondnamesplit.split('.txt')[0]
            try:
                date_time_obj = datetime.fromisoformat(thirdnamesplit)
            except:
                nonreal_dates.append(thirdnamesplit)
            else:
                chronological_dates.append(date_time_obj)
            
        chronological_dates.sort()
        
        for baddate in nonreal_dates:
            splitbaddate = baddate.split('-')
            pathtobaddatetxt = os.path.join(self.path_to_logs,
                                           splitbaddate[0],
                                           f'Log{baddate}.txt')
            chronological_textlogpaths.append(pathtobaddatetxt)
        
        for timeobj in chronological_dates:
            itr_year = int(timeobj.year)
            itr_month = int(timeobj.month)
            itr_day = int(timeobj.day)
            if itr_month < 10:
                itr_month = '0'+str(itr_month)
            if itr_day < 10:
                itr_day = '0'+str(itr_day)
            newiterpath = os.path.join(self.path_to_logs,
                                      str(itr_year),
                                      f'Log{itr_year}-{itr_month}-{itr_day}.txt')
            chronological_textlogpaths.append(newiterpath)
        
        return chronological_textlogpaths
    
    
    def Log_CSV(self):
        
        firstdayincycle = 'null'
        lastdayincycle = 'null'
        
        CurrentCSVtoAppendto = 'null'
        CurrentColNames = 'null'
        CurrentHeaderLength = 'null'
        
        
        if not self.LogTextPathsRetriever():
            print(f"No .txt logs in {self.subfoldername} of {self.nodename}.")
            pass
        else:
            if LogInfoGetter((self.LogTextPathsRetriever())[0]) == 'error: no data in .txt':
                pass
            else:
                firstloginfo = LogInfoGetter((self.LogTextPathsRetriever())[0])
                firstdayincycle = firstloginfo['LogDate']
                CurrentCSVtoAppendto = os.path.join(self.path_to_logs,
                                               f'Logs_{firstdayincycle}.csv')
                CurrentColNames = firstloginfo['ColNames']
                CurrentHeaderLength  = len(CurrentColNames)
        
                with open(CurrentCSVtoAppendto,'w',newline='') as csvfile:
                    fieldnames = CurrentColNames
                    thewriter = csv.DictWriter(csvfile, fieldnames=fieldnames)
                    thewriter.writeheader()
                    for D in range(len(firstloginfo['Data'])):
                        iter_dict = {}
                        for i in range(len((firstloginfo)['ColNames'])):
                            iter_dict[firstloginfo['ColNames'][i]] = firstloginfo['Data'][D][i]
                        thewriter.writerow(iter_dict)
                    print(f"<<{self.nodename}, {self.subfoldername}>> Created ~Logs_{firstdayincycle}.csv~ at base of /Logs/")
                    csvfile.close()
                
            textfile_iteration_counter = 0
            while textfile_iteration_counter < len(self.LogTextPathsRetriever())-1: 
                for i in range(len(self.LogTextPathsRetriever())):
                    if i != 0:
                        iterloginfo = LogInfoGetter((self.LogTextPathsRetriever())[i])
                        if iterloginfo  == 'error: no data in .txt':
                            textfile_iteration_counter+=1
                            pass
                        else:
                            iterlogColNames = iterloginfo['ColNames']
                            #iterlogHeaderLength = len(iterlogColNames)
                
                            if iterlogColNames == CurrentColNames:
                                with open(CurrentCSVtoAppendto,'a',newline='') as csvfile:
                                    dictwriter_object = DictWriter(csvfile,
                                                               fieldnames=CurrentColNames)
                                    for D in range(len(iterloginfo['Data'])):
                                        iter_dict = {}
                                        for i in range(len((iterloginfo)['ColNames'])):
                                            iter_dict[iterloginfo['ColNames'][i]] = iterloginfo['Data'][D][i]
                                        dictwriter_object.writerow(iter_dict)
                                    csvfile.close()
                                lastdayincycle = iterloginfo['LogDate']
                                newCSVname = os.path.join(
                                    self.path_to_logs,
                                    f'Logs_{firstdayincycle}_to_{lastdayincycle}.csv'
                                )   
                                os.rename(CurrentCSVtoAppendto,
                                         newCSVname)
                                CurrentCSVtoAppendto = newCSVname
                                textfile_iteration_counter += 1
                                if textfile_iteration_counter > len(self.LogTextPathsRetriever()):
                                    break
                            
                            else:
                                pastCSV_date_presplit = CurrentCSVtoAppendto.split('.csv')[0]
                                pastCSV_date = pastCSV_date_presplit.split('_')[-1]
                                CurrentCSVtoAppendto = os.path.join(self.path_to_logs,
                                                                    'Logs_'+iterloginfo["LogDate"]+'.csv')
                                CurrentColNames = iterlogColNames
                                firstdayincycle = iterloginfo['LogDate']
                                lastdayincycle = 'null'
                                with open(CurrentCSVtoAppendto,'w',newline='') as csvfile:
                                    fieldnames = CurrentColNames
                                    thewriter = csv.DictWriter(csvfile, fieldnames=fieldnames)
                                    thewriter.writeheader()
                                    for D in range(len(iterloginfo['Data'])):
                                        iter_dict = {}
                                        for i in range(len((iterloginfo)['ColNames'])):
                                            iter_dict[iterloginfo['ColNames'][i]] = iterloginfo['Data'][D][i]
                                        thewriter.writerow(iter_dict)
                                    print(f"<<{self.nodename}, {self.subfoldername}>> Inconsistent log field names/amount between {pastCSV_date} and {firstdayincycle};"+
                                          f" Starting new CSV file (which following logs will be appended to) at base of /Logs/")
                                    csvfile.close()
                                textfile_iteration_counter += 1
                                if textfile_iteration_counter > len(self.LogTextPathsRetriever()):
                                    break

### Running functions on every SD card folder:

In [49]:
for nodefld in node_folders:
    iternodefld = NodeFolder(nodefld)
    for subfldr in iternodefld.node_subfolders:
        itersubfld = SubNodeFolder(subfldr,
                                   iternodefld.nodename,
                                   iternodefld.gems_folderpath)
        itersubfld.Log_CSV() 

<<Winter_Turf_Type_B_-_6, e00fce685b02f35fe13a0a2d>> Created ~Logs_2000-00-01.csv~ at base of /Logs/
<<Winter_Turf_Type_B_-_1, e00fce6838cebdf42fc24391>> Created ~Logs_2000-00-01.csv~ at base of /Logs/
<<Winter_Turf_Type_B_-_1, e00fce6838cebdf42fc24391>> Inconsistent log field names/amount between 2000-00-01 and 2021-11-30.1; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_B_-_1, e00fce6838cebdf42fc24391>> Inconsistent log field names/amount between 2021-11-30.1 and 2020-11-17; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_A_-_7, e00fce682a79a64999b7b409>> Created ~Logs_2000-00-01.csv~ at base of /Logs/
<<Winter_Turf_Type_A_-_7, e00fce682a79a64999b7b409>> Inconsistent log field names/amount between 2021-10-28 and 2021-12-13; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_A_-_8, e00fce6829163099f836ae78>> Inconsistent log fi

<<Winter_Turf_Type_A_-_12, e00fce68b6dc19eefd628b4e>> Created ~Logs_2000-00-21.csv~ at base of /Logs/
<<Winter_Turf_Type_A_-_12, e00fce68b6dc19eefd628b4e>> Inconsistent log field names/amount between 2000-00-01 and 2000-00-00; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_A_-_12, e00fce68b6dc19eefd628b4e>> Inconsistent log field names/amount between 2165-25-45 and 2020-11-14; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_A_-_12, e00fce68b6dc19eefd628b4e>> Inconsistent log field names/amount between 2020-12-25 and 2021-12-11; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_A_-_15, e00fce68ccb7400e1fb8aa98>> Created ~Logs_2000-00-01.csv~ at base of /Logs/
<<Winter_Turf_Type_A_-_15, e00fce68ccb7400e1fb8aa98>> Inconsistent log field names/amount between 2021-03-24 and 2021-12-10; Starting new CSV file (which following logs wil

### Creating new folder structure for outputted CSV logs:

In [50]:
CSV_Logs_Folder = os.path.join(
    base_of_folder,
    'WT_Outputted_CSV_Logs'
) 

try: 
    os.mkdir(CSV_Logs_Folder)
    print(f"Created {CSV_Logs_Folder}")
except:
    print("Error: couldn't create CSV_Logs_Folder. Path already exists")



Created /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs


In [51]:
for nodefld in node_folders:
    iternodefld = NodeFolder(nodefld)
    for subfldr in iternodefld.node_subfolders:
        os.makedirs(os.path.join(CSV_Logs_Folder,
                             nodefld,
                             subfldr))
print("Successfully created folder structure inside 'WT_Outputted_CSV_Logs'")

Successfully created folder structure inside 'WT_Outputted_CSV_Logs'


In [52]:
for nodefld in node_folders:
    iternodefld = NodeFolder(nodefld)
    for subfldr in iternodefld.node_subfolders:
        itersubfld = SubNodeFolder(subfldr,
                                   iternodefld.nodename,
                                   iternodefld.gems_folderpath)
        
        itersubfold_NEWpathtologs = os.path.join(CSV_Logs_Folder,
                                                 nodefld,
                                                 subfldr,
                                                 'Logs')
        
        shutil.copytree(itersubfld.path_to_logs, itersubfold_NEWpathtologs)
        
print(f"Successfully copied contents of Logs folders from {SD_folder} to {CSV_Logs_Folder}")

Successfully copied contents of Logs folders from /Users/michaelfelzan/Desktop/WTSB6/2021-2022-SD-Cards to /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs


In [53]:
# Changing working directory to CSV Logs folder:

os.chdir(CSV_Logs_Folder)
os.getcwd()

'/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs'

In [54]:
for nodefld in node_folders:
    iternodefld = NodeFolder(nodefld)
    for subfldr in iternodefld.node_subfolders:
        itersubfld = SubNodeFolder(subfldr,
                                   iternodefld.nodename,
                                   iternodefld.gems_folderpath)
        
        itersubfold_NEWpathtologs = os.path.join(CSV_Logs_Folder,
                                                 nodefld,
                                                 subfldr,
                                                 'Logs')
        for logfolditem in os.listdir(itersubfold_NEWpathtologs):
            if ".csv" in logfolditem:
                dest = shutil.move(os.path.join(
                    itersubfold_NEWpathtologs,
                    logfolditem),
                                   os.path.join(CSV_Logs_Folder,
                                               nodefld,
                                               subfldr,
                                               logfolditem)
                                  ) 
print("Succesfully moved all .CSV logs to base of subnode folder.")
            

Succesfully moved all .CSV logs to base of subnode folder.


In [55]:
node_folders = []

for nodefoldername in os.listdir():
    if nodefoldername != '.DS_Store':
        node_folders.append(nodefoldername)
        
for folder in node_folders:
    print(folder)

Winter_Turf_Type_B_-_6
Winter_Turf_Type_B_-_1
Winter_Turf_Type_A_-_7
Winter_Turf_Type_A_-_8
Winter_Turf_Type_A_-_10
Winter_Turf_Type_A_-_28
Winter_Turf_Type_A_-_17
Winter_Turf_Type_A_-_21
Winter_Turf_Type_A_-_19
Winter_Turf_Type_A_-_26
Winter_Turf_Type_A_-_18
Winter_Turf_Type_A_-_27
Winter_Turf_Type_A_-_20
Winter_Turf_Type_A_-_16
Winter_Turf_Type_A_-_34
Winter_Turf_Type_B_-_12
Winter_Turf_Type_A_-_35
Winter_Turf_Type_B_-_5
Winter_Turf_Type_A_-_3
Winter_Turf_Type_A_-_2
Winter_Turf_Type_A_-_5
Winter_Turf_Type_B_-_4
Winter_Turf_Type_A_-_14
Winter_Turf_Type_A_-_22
Winter_Turf_Type_A_-_23
Winter_Turf_Type_A_-_24
Winter_Turf_Type_A_-_12
Winter_Turf_Type_A_-_15
Winter_Turf_Type_B_-_11
Winter_Turf_Type_A_-_31
Winter_Turf_Type_A_-_36


In [56]:
sub_nodes = []

for nodefold in node_folders:
    for subfold in os.listdir(nodefold):
        if subfold != ".DS_Store":
            sub_nodes.append((subfold, nodefold))

sub_nodes

[('e00fce685b02f35fe13a0a2d', 'Winter_Turf_Type_B_-_6'),
 ('e00fce6838cebdf42fc24391', 'Winter_Turf_Type_B_-_1'),
 ('e00fce682a79a64999b7b409', 'Winter_Turf_Type_A_-_7'),
 ('e00fce6829163099f836ae78', 'Winter_Turf_Type_A_-_8'),
 ('e00fce68206506b1159c8936', 'Winter_Turf_Type_A_-_10'),
 ('e00fce683d1ce7e541f9698f', 'Winter_Turf_Type_A_-_28'),
 ('e00fce686565dea3e22b623c', 'Winter_Turf_Type_A_-_17'),
 ('e00fce68af529fe2d2d7c809', 'Winter_Turf_Type_A_-_21'),
 ('e00fce682d4d9ca0f9a3b12d', 'Winter_Turf_Type_A_-_19'),
 ('e00fce684b4112eb5390b0a0', 'Winter_Turf_Type_A_-_26'),
 ('e00fce68e485873e6b6e983b', 'Winter_Turf_Type_A_-_18'),
 ('e00fce68e5bd8b129dc5e774', 'Winter_Turf_Type_A_-_27'),
 ('e00fce6867d3a0cda48e32a2', 'Winter_Turf_Type_A_-_20'),
 ('e00fce682699a15d165801b1', 'Winter_Turf_Type_A_-_16'),
 ('e00fce687038a71d446ef776', 'Winter_Turf_Type_A_-_34'),
 ('e00fce68c2bc8e9d3b033655', 'Winter_Turf_Type_A_-_34'),
 ('e00fce6891afbb6bddd89915', 'Winter_Turf_Type_B_-_12'),
 ('e00fce6857f0346

In [57]:
for s, n in sub_nodes:
    path_to_subn = os.path.join(CSV_Logs_Folder,n,s)
    itercsvs = []
    for file in os.listdir(path_to_subn):
        if ".csv" in file:
            itercsvs.append(os.path.join(path_to_subn, file))
        else:
            continue
            
    allheaders_in_subf = []
    
    for csv_f in itercsvs:
        with open(csv_f) as csv_file:
            csv_reader = csv.reader(csv_file, delimiter = ',')
            iter_header = next(csv_reader)
            allheaders_in_subf.append(tuple(iter_header))
    
    seen = set()
    dupes = []
    
    for header in allheaders_in_subf:
            if header in seen:
                if header not in dupes:
                    dupes.append(header)
                else:
                    continue
            else:
                seen.add(header)
    
    if dupes != []:
        merge_dir_indicies = []
        for f, s in enumerate(dupes):
            merge_dir_indicies.append(f)
            
        
        for mergeindx in merge_dir_indicies:
            iter_mergefold_name = os.path.join(
                path_to_subn,
                f"merge_dir_{mergeindx}"
            )
            if os.path.exists(iter_mergefold_name) == False:
                os.mkdir(iter_mergefold_name)
        
        for csv_f in itercsvs:
            with open(csv_f) as csv_file:
                csv_reader = csv.reader(csv_file, delimiter = ',')
                itrheader = tuple(next(csv_reader))
                
                for indx in dict(enumerate(dupes)):
                    if dict(enumerate(dupes))[indx] == itrheader:
                        iter_merge_dir = f"merge_dir_{indx}"
                        iterpathtosubn = path_to_subn
                        iterfilename = csv_f
                        splitfilename = (iterfilename.split(
                            iterpathtosubn)[1]).split(
                                r'/')[1]
                        iter_copyfile_path = os.path.join(
                            path_to_subn,
                            iter_merge_dir,
                            splitfilename)

                        
                        if os.path.exists(iter_copyfile_path) == False:     #if file isnt already in merge dir,
                            with open(iter_copyfile_path, 'wb') as outfile: #copy it to there 
                                with open(csv_f, 'rb') as infile:
                                    shutil.copyfileobj(infile, outfile)
                                    print(f"Copied {iter_copyfile_path} to {iter_merge_dir}")
                                    
                                    #if os.path.exists(iter_copyfile_path)
                                    
                        if os.path.exists(csv_f) == True:                     #if og file exists...
                            if os.path.exists(iter_copyfile_path) == True:     #and merge dir file exists...
                                with open(iter_copyfile_path, 'rb') as indirfile: 
                                    with open(csv_f, 'rb') as ogplacement:
                                        if indirfile.readline() == ogplacement.readline(): #and they are the same files...
                                            os.remove(csv_f)
                                            print(f"removed {csv_f}, as it exists in merge dir")
                                        else:
                                            print(f"Did not remove \n    {csv_f} \nfrom base of subnode folder, as the"
                                                  + f" contents do not match that of \n    {iter_copyfile_path}!")

Copied /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_1/e00fce6838cebdf42fc24391/merge_dir_0/Logs_2000-00-01.csv to merge_dir_0
removed /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_1/e00fce6838cebdf42fc24391/Logs_2000-00-01.csv, as it exists in merge dir
Copied /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_1/e00fce6838cebdf42fc24391/merge_dir_0/Logs_2020-11-17_to_2021-11-29.csv to merge_dir_0
removed /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_1/e00fce6838cebdf42fc24391/Logs_2020-11-17_to_2021-11-29.csv, as it exists in merge dir
Copied /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_8/e00fce6829163099f836ae78/merge_dir_0/Logs_2000-00-00.csv to merge_dir_0
removed /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_8/e00fce6829163099f836ae78/Logs_2000-00-00.csv, as it exists in merge dir
Copied /Users/m

In [58]:
for s, n in sub_nodes:
    path_to_subn = os.path.join(CSV_Logs_Folder,n,s)
    for file in os.listdir(path_to_subn):
        if "merge_dir_" in file:
            path_to_merge_dir = os.path.join(
                path_to_subn,
                file)
            listofmergedir_files = os.listdir(path_to_merge_dir)
            merge_dir_no = path_to_merge_dir.split("merge_dir_")[1]
            
            concatCSV = os.path.join(path_to_merge_dir,
                                     f"concatCSV_{merge_dir_no}.csv")
            
            if os.path.exists(concatCSV) == False: 
                allFiles = glob.glob(path_to_merge_dir + "/*.csv")
                allFiles.sort()
                with open(concatCSV, 'wb') as outfile:
                    for i, fname in enumerate(allFiles):
                        with open(fname, 'rb') as infile:
                            if i != 0:
                                infile.readline()  # Throw away header on all but first file
                            shutil.copyfileobj(infile, outfile) # Block copy rest of file from input to output without parsing
                trimmednames = []
                for csvfil in allFiles:
                    trimmednames.append(
                        csvfil.split(f"{path_to_merge_dir}/")[1]
                    )
                print("     *~*~~~~~**~~~~~~~~~~~~~~**~*~*\n"
                      f"{trimmednames}"
                      f"\n in folder {n}, {s}, merge_dir_{merge_dir_no}"
                      f"\n have been successfully merged together"
                      "\n     *~*~~~~~**~~~~~~~~~~~~~~**~*~*")

     *~*~~~~~**~~~~~~~~~~~~~~**~*~*
['Logs_2000-00-01.csv', 'Logs_2020-11-17_to_2021-11-29.csv']
 in folder Winter_Turf_Type_B_-_1, e00fce6838cebdf42fc24391, merge_dir_0
 have been successfully merged together
     *~*~~~~~**~~~~~~~~~~~~~~**~*~*
     *~*~~~~~**~~~~~~~~~~~~~~**~*~*
['Logs_2001-01-01.1.csv', 'Logs_2021-12-10_to_2022-02-26.csv']
 in folder Winter_Turf_Type_A_-_8, e00fce6829163099f836ae78, merge_dir_1
 have been successfully merged together
     *~*~~~~~**~~~~~~~~~~~~~~**~*~*
     *~*~~~~~**~~~~~~~~~~~~~~**~*~*
['Logs_2000-00-00.csv', 'Logs_2165-25-45_to_2020-11-14.csv']
 in folder Winter_Turf_Type_A_-_8, e00fce6829163099f836ae78, merge_dir_0
 have been successfully merged together
     *~*~~~~~**~~~~~~~~~~~~~~**~*~*
     *~*~~~~~**~~~~~~~~~~~~~~**~*~*
['Logs_2000-00-00.csv', 'Logs_2021-11-11_to_2021-12-01.csv']
 in folder Winter_Turf_Type_B_-_12, e00fce6891afbb6bddd89915, merge_dir_0
 have been successfully merged together
     *~*~~~~~**~~~~~~~~~~~~~~**~*~*
     *~*~~~~~

In [59]:
for s, n in sub_nodes:
    path_to_subn = os.path.join(CSV_Logs_Folder,n,s)
    for file in os.listdir(path_to_subn):
        if "merge_dir_" in file:
            path_to_merge_dir = os.path.join(
                path_to_subn,
                file)
            listofmergedir_files = os.listdir(path_to_merge_dir)
            for mergedirfile in listofmergedir_files:
                if "concatCSV" in mergedirfile:
                    og_dest_concatCSV = os.path.join(
                        path_to_merge_dir,
                        mergedirfile
                    )
                    
                    new_dest_concatCSV = os.path.join(
                        path_to_subn,
                        mergedirfile
                    )
                    
                    shutil.move(
                        og_dest_concatCSV,
                        new_dest_concatCSV
                    )
                    
                    print(f"{mergedirfile} was moved to base of subnode folder"
                         f"\n {new_dest_concatCSV}")

concatCSV_0.csv was moved to base of subnode folder
 /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_1/e00fce6838cebdf42fc24391/concatCSV_0.csv
concatCSV_1.csv was moved to base of subnode folder
 /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_8/e00fce6829163099f836ae78/concatCSV_1.csv
concatCSV_0.csv was moved to base of subnode folder
 /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_8/e00fce6829163099f836ae78/concatCSV_0.csv
concatCSV_0.csv was moved to base of subnode folder
 /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_12/e00fce6891afbb6bddd89915/concatCSV_0.csv
concatCSV_0.csv was moved to base of subnode folder
 /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_5/e00fce68c014249653ebc049/concatCSV_0.csv
concatCSV_1.csv was moved to base of subnode folder
 /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_

In [60]:
for s, n in sub_nodes:
    path_to_subn = os.path.join(CSV_Logs_Folder,n,s)
    for file in os.listdir(path_to_subn):
        if "merge_dir_" in file:
            path_to_merge_dir = os.path.join(
                path_to_subn,
                file)
            shutil.rmtree(path_to_merge_dir)
            print(f"Successfully deleted {path_to_merge_dir}")

Successfully deleted /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_1/e00fce6838cebdf42fc24391/merge_dir_0
Successfully deleted /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_8/e00fce6829163099f836ae78/merge_dir_1
Successfully deleted /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_8/e00fce6829163099f836ae78/merge_dir_0
Successfully deleted /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_12/e00fce6891afbb6bddd89915/merge_dir_0
Successfully deleted /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_5/e00fce68c014249653ebc049/merge_dir_0
Successfully deleted /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_4/e00fce682c26b88b84ab26f2/merge_dir_1
Successfully deleted /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_4/e00fce682c26b88b84ab26f2/merge_dir_0
Successfully deleted /Users/micha

In [61]:
file_names = {}
file_names = set()
naming_counter = 0

for s, n in sub_nodes:
    path_to_subn = os.path.join(CSV_Logs_Folder,n,s)
    for file in os.listdir(path_to_subn):
        if ".csv" in file:
            iterfullcsvpath = os.path.join(path_to_subn,
                                          file)
            with open(iterfullcsvpath, 'r') as read_obj:
                read_obj.readline()
                csv_reader = reader(read_obj)
                csvdata = []
                for row in csv_reader:
                    csvdata.append(int(row[3]))
                csvdata_MIN_date = min(csvdata)
                csvdata_MAX_date = max(csvdata)
                
                datetime_MIN = datetime.fromtimestamp(csvdata_MIN_date)
                datetime_MAX = datetime.fromtimestamp(csvdata_MAX_date)
                formatted_MIN_date = datetime_MIN.strftime('%Y_%m_%d')
                formatted_MAX_date = datetime_MAX.strftime('%Y_%m_%d')

                samedate = formatted_MIN_date == formatted_MAX_date
            
            if samedate == True:
                newCSVname = formatted_MIN_date
            else:
                newCSVname = f"{formatted_MIN_date}_to_{formatted_MAX_date}"
        
            if newCSVname in file_names:
                naming_counter = naming_counter + 1
                newCSVname = newCSVname + f"_({naming_counter})"
                file_names.add(newCSVname)
            else:
                file_names.add(newCSVname)
                
            newCSVname_fullpath = os.path.join(
                path_to_subn,
                newCSVname
            ) +".csv"
            
            os.rename(iterfullcsvpath,
                      newCSVname_fullpath)
            print(f"{n} {s}; \n      Renamed '{file}' to '{newCSVname}.csv'")

Winter_Turf_Type_B_-_6 e00fce685b02f35fe13a0a2d; 
      Renamed 'Logs_2000-00-01_to_2021-04-13.csv' to '2020_11_16_to_2021_04_13.csv'
Winter_Turf_Type_B_-_1 e00fce6838cebdf42fc24391; 
      Renamed 'Logs_2021-11-30.1.csv' to '1999_11_29_to_2031_01_09.csv'
Winter_Turf_Type_B_-_1 e00fce6838cebdf42fc24391; 
      Renamed 'concatCSV_0.csv' to '2020_11_16_to_2021_11_30.csv'
Winter_Turf_Type_A_-_7 e00fce682a79a64999b7b409; 
      Renamed 'Logs_2000-00-01_to_2021-10-28.csv' to '2020_11_13_to_2021_10_28.csv'
Winter_Turf_Type_A_-_7 e00fce682a79a64999b7b409; 
      Renamed 'Logs_2021-12-13.csv' to '2021_12_09_to_2022_04_08.csv'
Winter_Turf_Type_A_-_8 e00fce6829163099f836ae78; 
      Renamed 'concatCSV_1.csv' to '2000_12_31_to_2031_01_09.csv'
Winter_Turf_Type_A_-_8 e00fce6829163099f836ae78; 
      Renamed 'concatCSV_0.csv' to '2000_11_29_to_2020_11_17.csv'
Winter_Turf_Type_A_-_10 e00fce68206506b1159c8936; 
      Renamed 'Logs_2000-00-01_to_2020-12-28.csv' to '2020_11_14_to_2021_03_30.csv'
Winter_

In [62]:
for s, n in sub_nodes:
    path_to_subn = os.path.join(CSV_Logs_Folder,n,s)
    for file in os.listdir(path_to_subn):
        if ".csv" in file:
            iterfullcsvpath = os.path.join(path_to_subn,
                                          file)
            print(iterfullcsvpath)

/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_6/e00fce685b02f35fe13a0a2d/2020_11_16_to_2021_04_13.csv
/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_1/e00fce6838cebdf42fc24391/2020_11_16_to_2021_11_30.csv
/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_B_-_1/e00fce6838cebdf42fc24391/1999_11_29_to_2031_01_09.csv
/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_7/e00fce682a79a64999b7b409/2021_12_09_to_2022_04_08.csv
/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_7/e00fce682a79a64999b7b409/2020_11_13_to_2021_10_28.csv
/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_8/e00fce6829163099f836ae78/2000_12_31_to_2031_01_09.csv
/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winter_Turf_Type_A_-_8/e00fce6829163099f836ae78/2000_11_29_to_2020_11_17.csv
/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/Winte

In [63]:
csvs = []
allheaders = []

seen_3 = {}
seen_3 = set()
dupes = []

mergefolderpaths = []
mergefolder_header_dict = {}


for s, n in sub_nodes:
    path_to_subn = os.path.join(CSV_Logs_Folder,n,s)
    for file in os.listdir(path_to_subn):
        if ".csv" in file:
            csvs.append(os.path.join(path_to_subn, file))
        else:
            continue

            
for csv_f in csvs:
    with open(csv_f) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter = ',')
        iter_header = next(csv_reader)
        allheaders.append(tuple(iter_header))

        
for header in allheaders:
    if header in seen_3:
        if header not in dupes:
            dupes.append(header)
        else:
            continue
    else:
        seen_3.add(header)
    
if dupes != []:         #write exception
    merge_dir_dictionary = {}
    for f, s in enumerate(dupes):
        merge_dir_dictionary[f"{s}"] = f
        
        
for header in merge_dir_dictionary:
    iter_mergefold_name = os.path.join(
        CSV_Logs_Folder,
        f"merge_dir_{merge_dir_dictionary[f'{header}']}"
    )
    mergefolderpaths.append(iter_mergefold_name)
    
print(mergefolderpaths)


['/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_0', '/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_1', '/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_2', '/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_3', '/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_4']


In [64]:
csvs = []
allheaders = []

seen_3 = {}
seen_3 = set()
dupes = []

mergefoldernames = []
mergefolderpaths = []
mergefolder_header_dict = {}

naming_counter = 0
filenames = {}
filenames = set()


for s, n in sub_nodes:
    path_to_subn = os.path.join(CSV_Logs_Folder,n,s)
    for file in os.listdir(path_to_subn):
        if ".csv" in file:
            csvs.append(os.path.join(path_to_subn, file))
        else:
            continue

            
for csv_f in csvs:
    with open(csv_f) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter = ',')
        iter_header = next(csv_reader)
        allheaders.append(tuple(iter_header))

        
for header in allheaders:
    if header in seen_3:
        if header not in dupes:
            dupes.append(header)
        else:
            continue
    else:
        seen_3.add(header)
    
if dupes != []:         #write exception
    merge_dir_dictionary = {}
    for f, s in enumerate(dupes):
        merge_dir_dictionary[f"{s}"] = f
       
                
        
for header in merge_dir_dictionary:
    iter_mergefold_name = os.path.join(
        CSV_Logs_Folder,
        f"merge_dir_{merge_dir_dictionary[f'{header}']}"
    )
    mergefolderpaths.append(iter_mergefold_name)
    mergefoldernames.append(f"merge_dir_{merge_dir_dictionary[f'{header}']}")

    
if len(mergefolderpaths) != len(dupes):
    print("Error...number of 'merge folders' created does not match list of duplicate headers")
else:
    for mergefold in mergefolderpaths:
        if os.path.exists(mergefold) == False:
            os.mkdir(mergefold)
            
for csv_f in csvs:
    with open(csv_f) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter = ',')
        itrheader = str(tuple(next(csv_reader)))
        for hdr in merge_dir_dictionary:
            if hdr == itrheader:
                iter_merge_dir = f"merge_dir_{merge_dir_dictionary[itrheader]}"
                iterfilename = csv_f
                splitfilename = iterfilename.split(r'/')[-1]
                if splitfilename in filenames:
                    naming_counter = naming_counter + 1
                    splitfilename = splitfilename.split('.csv')[0] + f"_({naming_counter}).csv"
                    filenames.add(splitfilename)
                else:
                    filenames.add(splitfilename)
                
                iter_copyfile_path = os.path.join(
                    CSV_Logs_Folder,
                    iter_merge_dir,
                    splitfilename)

                
                if os.path.exists(iter_copyfile_path) == False:     #if file isnt already in merge dir,
                    with open(iter_copyfile_path, 'wb') as outfile: #copy it to there 
                        with open(csv_f, 'rb') as infile:
                            shutil.copyfileobj(infile, outfile)
                            print(f"Copied {iter_copyfile_path} to {iter_merge_dir}")
                                                 
                                
final_merged_folderpath = os.path.join(CSV_Logs_Folder, 'final_merged')
if os.path.exists(final_merged_folderpath) == False:
    os.mkdir(final_merged_folderpath)


  

Copied /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_0/2020_11_16_to_2021_04_13.csv to merge_dir_0
Copied /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_0/2020_11_16_to_2021_11_30.csv to merge_dir_0
Copied /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_4/1999_11_29_to_2031_01_09.csv to merge_dir_4
Copied /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_1/2021_12_09_to_2022_04_08.csv to merge_dir_1
Copied /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_2/2020_11_13_to_2021_10_28.csv to merge_dir_2
Copied /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_1/2000_12_31_to_2031_01_09.csv to merge_dir_1
Copied /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_2/2000_11_29_to_2020_11_17.csv to merge_dir_2
Copied /Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_2/2020_11_14_to_2021_03_30.csv to merge_dir_2
Copied /Users/michaelfelzan/Desktop/WTSB

In [65]:
mergefolderpaths

['/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_0',
 '/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_1',
 '/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_2',
 '/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_3',
 '/Users/michaelfelzan/Desktop/WTSB6/WT_Outputted_CSV_Logs/merge_dir_4']

In [76]:
for mergedir in mergefolderpaths:
    listofmergedir_files = os.listdir(mergedir)
    merge_dir_no = mergedir.split("merge_dir_")[1]
    concatCSV = os.path.join(mergedir,
                             f"concatCSV_{merge_dir_no}.csv")
    if os.path.exists(concatCSV) == False: 
        allFiles = glob.glob(mergedir + "/*.csv")
        allFiles.sort()
        with open(concatCSV, 'wb') as outfile:
            for i, fname in enumerate(allFiles):
                with open(fname, 'rb') as infile:
                    if i != 0:
                        infile.readline()  # Throw away header on all but first file
                    shutil.copyfileobj(infile, outfile) # Block copy rest of file from input to output without parsing
                trimmednames = []
                for csvfil in allFiles:
                    trimmednames.append(
                        csvfil.split(f"{mergedir}/")[1]
                    )
            print("     *~*~~~~~**~~~~~~~~~~~~~~**~*~*\n"
                  f"{trimmednames}"
                  f"\n in folder merge_dir_{merge_dir_no}"
                  f"\n have been successfully merged together"
                  "\n     *~*~~~~~**~~~~~~~~~~~~~~**~*~*")

     *~*~~~~~**~~~~~~~~~~~~~~**~*~*
['2020_11_16_to_2021_04_13.csv', '2020_11_16_to_2021_04_14.csv', '2020_11_16_to_2021_11_01.csv', '2020_11_16_to_2021_11_11.csv', '2020_11_16_to_2021_11_16.csv', '2020_11_16_to_2021_11_30.csv']
 in folder merge_dir_0
 have been successfully merged together
     *~*~~~~~**~~~~~~~~~~~~~~**~*~*
     *~*~~~~~**~~~~~~~~~~~~~~**~*~*
['1999_11_29_to_2022_04_11.csv', '2000_01_01_to_2022_05_30.csv', '2000_01_30_to_2022_07_22.csv', '2000_12_31_to_2022_04_01.csv', '2000_12_31_to_2031_01_09.csv', '2002_07_24_to_2022_04_06.csv', '2021_11_30_to_2023_02_03.csv', '2021_12_08_to_2021_12_13.csv', '2021_12_09_to_2022_03_16.csv', '2021_12_09_to_2022_03_18.csv', '2021_12_09_to_2022_03_21.csv', '2021_12_09_to_2022_03_23.csv', '2021_12_09_to_2022_04_01.csv', '2021_12_09_to_2022_04_08.csv', '2021_12_09_to_2022_04_11.csv', '2021_12_09_to_2022_04_12.csv', '2021_12_09_to_2022_04_13.csv', '2021_12_09_to_2022_04_29.csv', '2021_12_09_to_2022_05_02.csv', '2021_12_09_to_2022_05_10.c

In [77]:
for mergedir in mergefolderpaths:
    listofmergedir_files = os.listdir(mergedir)
    merge_dir_no = mergedir.split("merge_dir_")[1]
    
    for file in listofmergedir_files:
        if "concatCSV" in file:
            
            og_placement = os.path.join(mergedir, file)
            new_dest = os.path.join(final_merged_folderpath, file)
    
    
            if os.path.exists(new_dest) == False: 
                shutil.move(
                    og_placement,
                    new_dest
                )
            print(f"{file} was moved to base of 'final_merged' folder")

concatCSV_0.csv was moved to base of 'final_merged' folder
concatCSV_1.csv was moved to base of 'final_merged' folder
concatCSV_2.csv was moved to base of 'final_merged' folder
concatCSV_3.csv was moved to base of 'final_merged' folder
concatCSV_4.csv was moved to base of 'final_merged' folder
