### Import Statements:

In [793]:
import os
from datetime import datetime
import csv
from csv import reader, writer, DictWriter

### Establishing local directory path to Winter Turf SD Card folders:

In [876]:
# Enter local directory path to SD cards here:
workindir = r'/Users/michaelfelzan/Desktop/DEMO_2021-2022_SD_Cards'
os.chdir(workindir)

os.getcwd()

'/Users/michaelfelzan/Desktop/DEMO_2021-2022_SD_Cards'

##### (printing names of all SD card folders at base of directory):

In [875]:
node_folders = []

for nodefoldername in os.listdir():
    if nodefoldername != '.DS_Store':
        node_folders.append(nodefoldername)
        
for folder in node_folders:
    print(folder)

Winter_Turf_Type_B_-_6
Winter_Turf_Type_B_-_1
Winter_Turf_Type_A_-_7
Winter_Turf_Type_A_-_8
Winter_Turf_Type_A_-_10
Winter_Turf_Type_A_-_28
Winter_Turf_Type_A_-_17
Winter_Turf_Type_A_-_21
Winter_Turf_Type_A_-_19
Winter_Turf_Type_A_-_26
Winter_Turf_Type_A_-_18
Winter_Turf_Type_A_-_27
Winter_Turf_Type_A_-_20
Winter_Turf_Type_A_-_16
Winter_Turf_Type_A_-_34
Winter_Turf_Type_B_-_12
Winter_Turf_Type_A_-_35
Winter_Turf_Type_B_-_5
Winter_Turf_Type_A_-_3
Winter_Turf_Type_A_-_2
Winter_Turf_Type_A_-_5
Winter_Turf_Type_B_-_4
Winter_Turf_Type_A_-_14
Winter_Turf_Type_A_-_22
Winter_Turf_Type_A_-_23
Winter_Turf_Type_A_-_24
Winter_Turf_Type_A_-_12
Winter_Turf_Type_A_-_15
Winter_Turf_Type_B_-_11
Winter_Turf_Type_A_-_31
Winter_Turf_Type_A_-_36


### Defining 'stand-alone' functions:

In [870]:
def ListDirNoDSstore(path):
    """This function simply lists the contents of a
    directory path, though leaves out the '.DS_Store' file.
    
    Parameters
    ------------
    path: 'raw' str (path to directory)
    """
    rawcontents = os.listdir(path)
    contents = []
    for item in rawcontents:
        if item != '.DS_Store':
            contents.append(item)
    
    return contents



def LogInfoGetter(txtlog):
    """This function is utilized by some attributes/sub-functions
    of the 'SubFolder()' class. The function parses the text file (log)
    which is inputted, determines which rows are headers (column names)
    vs. data, and returns a dict containing info about the .txt log.
    
    Info in the returned dict include:
    'ColNames': a list, with all column names for the log in sequential order
               (if the log has no column names, generic names Col_1, Col_2, etc.
               are created for the log)
    'Data':  The rows of the .txt which represent data (not headers); list
    'AllLines': All rows of the data (headers and data) in list form
    'HeaderIndicies': indicies which represent headers in the 'All Lines' list
    'LogDate': date of the log (eg. 2021-09-17)
    
    Parameters
    ------------
    txtlog: 'raw' str (path to .txt file log)
    """
    
    with open(txtlog, "r") as file_object:
        Lines = file_object.readlines()
    
        cleanlines = [] # all lines, sans '\n'
        header_indicies = [] 
        column_names = []
        row_entries = [] # data, corresponding to col. names
    
        for item in Lines:
            cleanlines.append(item.split('\n')[0])
        
        for line in cleanlines:
            lineindex = cleanlines.index(line)
            splitbycomma = line.split(',') #splitting each line in txt by commas
        
            # If a line in the .txt doesnt have a comma, it is classified
            # as a header
            try:
                splitbycomma[1]
            except:
                header_indicies.append(lineindex)
            
            # If the first entry of a line can't be converted to a 'float'
            # (column names all assummed to contain letter characters), then
            # it is classified as a header.
            try:
                float(splitbycomma[0])
            except:
                if lineindex not in header_indicies:
                    header_indicies.append(lineindex)

        for line in cleanlines:
            lineindex = cleanlines.index(line)   # getting index no.'s of all lines
            if lineindex not in header_indicies: # if line is not a header:
                linechunks = line.split(',')     # split by comma, and
                row_entries.append(linechunks)   # append each chunk into 'row entries'
        
        try:
            row_entries[0]
        except:
            return('error: no data in .txt') # this is returned if .txt is blank
        
        # If .txt actually has data:
        else:
            len_first_entry = len(row_entries[0]) # getting length of col's in first row,
            for entry in row_entries:             # to confirm all other rows have the
                if len_first_entry != len(entry): # same length as the first row.
                    result = False
                    print("All elements are not equal")
                    break
    
            # A Trigger variable for a 'detected header fit' is established here;
            # the variable is effictively switched back to 'False' every time a 
            # new .txt file is opened.
            header_fit_detected = False 
            for headerindex in header_indicies:
                headeritem_w_commasplit = (((cleanlines[headerindex]).split(',')))
                len_of_header = len(headeritem_w_commasplit)
                # if the no. columns of header contender is same as len of first entry,
                # AND the trigger isn't already set to 'True'....
                # ...then the contents of header are assigned as the column names,
                # and trigger is set to 'True' (errors out if there are 2 poss. matches)
                if len_of_header == len_first_entry:
                    if header_fit_detected == True:
                        print(f"ERROR -- two possible matches for headers detected for {txtlog}")
                        break
                    else:
                        header_fit_detected = True
                        column_names = headeritem_w_commasplit
            
            # After all of that, if the header_fit_detected is still set to 'False',
            # then .txt log is determined to have no header which represents col. names,
            # and generic column names are created for the dataset
            if header_fit_detected == False:
                #print("no header detected for x; columns named numerically")
                for i in range(len_first_entry):
                    column_names.append(f'Col_{i}') #Col_1, Col_2, Col_3, etc.

            # (parsing .txt dir path for just the 'date' (eg. 2021-07-16))
            firstnamesplit = txtlog.split('/Logs/')[1]
            secondnamesplit = firstnamesplit.split('/Log')[1]
            thirdnamesplit = secondnamesplit.split('.txt')[0]
        
            # Two new empty lists are created here, which will store the data collected
            # by this function, PLUS the NODE_ID (eg. e00fce68816c2bc59976cdf2) and the
            # log date (2000-01-01), so this may be stored on .CSV
            data_w_nodeID_logdate_appended = []
            colnames_w_nodeID_logdate_appended = []
        
            for datapiece in row_entries:
                iterdatapiece = []
                subfoldname_split1 = txtlog.split('/GEMS/')[1]
                subfoldname = subfoldname_split1.split('/Logs/')[0]
                iterdatapiece.append(subfoldname)    # appending node ID,
                iterdatapiece.append(thirdnamesplit) # then log date,
                for number in datapiece:             # then every data number 
                    iterdatapiece.append(number)     # within row 
                data_w_nodeID_logdate_appended.append(iterdatapiece)
            
            colnames_w_nodeID_logdate_appended.append('NODE_ID') # appending column names
            colnames_w_nodeID_logdate_appended.append('log_date')# for this data as well.
            for colname in column_names:
                colnames_w_nodeID_logdate_appended.append(colname)
    
            return {
                'ColNames': colnames_w_nodeID_logdate_appended,
                'Data':  data_w_nodeID_logdate_appended,
                'AllLines': cleanlines,
                'HeaderIndicies': header_indicies,
                'LogDate': thirdnamesplit
                   }

In [871]:
class NodeFolder:
    """This class gathers the properties of a 'Node Folder'
    eg. 'Winter_Turf_Type_A_-_2', so that the various info
    associated with one of these folders may be accessed
    by calling the modules associated with the created 
    class object.
    
    Attributes
    ------------
    - nodename (str): Name of the 'Node' 
            eg. Winter_Turf_Type_A_-_2

    Modules
    ------------
    - self.gems_folderpath : retrieves path to /GEMS/ dir
    - self.node_subfolders : provides list containing 
            names of 'subfolders' associated with each
            Node. (eg. e00fce68816c2bc59976cdf2)
    - self.no_of_subfolders : lists number of subfolders
            within each node folder (most have 1; though 
            some have 2.)
    """
    def __init__(self, nodename):
        self.nodename = nodename
        
        gemsfolderpath = os.path.join(workindir,nodename,'GEMS')
        nodesubfolders = ListDirNoDSstore(gemsfolderpath)
            
        self.gems_folderpath = gemsfolderpath
        self.node_subfolders = nodesubfolders
        self.no_of_subfolders = len(nodesubfolders) 
        
class SubNodeFolder:
    """This class gathers the properties of a 'Sub-Node 
    Folder' eg. 'e00fce68816c2bc59976cdf2', so that the 
    info associated with one of these subfolders may be 
    accessed by calling the modules associated with the 
    class object. 
    Additionally, this class includes two
    functions -- one which gathers all log .txt paths 
    within a subfolder, and another which incorporates
    the standalone 'LogInfoGetter()' function to create
    a .csv file(s) associated with all .txt logs within
    a subfolder.
    
    Attributes
    ------------
    - subfoldername (str) : eg. e00fce68816c2bc59976cdf2
    - nodename (str) : eg. Winter_Turf_Type_A_-_2
    - gemsfolderpath (raw str) eg. r'/Users/mf/Desktop/
            SD_Cards/Winter_Turf_Type_A_-_23/GEMS'

    Modules
    ------------
    - self.subfolder_path : path to subfolder on local dir
    - self.subfolder_contents : list of items (folders 
            and files) within subfold
    - self.path_to_logs : path to /Logs/ dir within subfold
    - self.logs_contents : list of items within /Logs/
    - self.years_as_ints : list of folders named by years
        within /Logs/ eg. [2000, 2020, 2021]
    - self.year_folders : year folders as strings
    
    Functions (descriptions for functions below)
    ------------
    - LogTextPathsRetriever(self)
    - Log_CSV(self)
    
    """
    def __init__(self, subfoldername, nodename, gemsfolderpath):
        self.subfoldername = subfoldername
        self.nodename = nodename
        self.gemsfolderpath = gemsfolderpath
        
        subfolderpath = os.path.join(gemsfolderpath, subfoldername)
        subfoldercontents = ListDirNoDSstore(subfolderpath)
        
        path_to_logs = os.path.join(subfolderpath, 'Logs')
        logs_contents = ListDirNoDSstore(path_to_logs)
        
        self.subfolder_path = subfolderpath  
        self.subfolder_contents = subfoldercontents
        self.path_to_logs = path_to_logs
        self.logs_contents = logs_contents
        
        
        non_year_items = ['UnsentLogs.dat', 'Header.dat', 'UnsentLogs.txt']
        year_folders = []
        years_as_ints = []
        
        for content in self.logs_contents:
            if content not in non_year_items:
                if '.csv' not in content:
                    years_as_ints.append(int(content))
                    
        years_as_ints.sort()
        
        for intyear in years_as_ints:
            year_folders.append(str(intyear))
            
        self.years_as_ints = years_as_ints
        self.year_folders = year_folders
 

    def LogTextPathsRetriever(self):
        
        textlogpaths = []
        chronological_dates = []
        nonreal_dates = []
        chronological_textlogpaths = []
        
        for yearfold in self.year_folders:
            iterloglist = ListDirNoDSstore(os.path.join(
                self.path_to_logs,
                yearfold))
            for textfilename in iterloglist:
                textlogpaths.append(os.path.join(
                    self.path_to_logs,
                    yearfold,
                    textfilename))
        
        for path in textlogpaths:
            firstnamesplit = path.split('/Logs/')[1]
            secondnamesplit = firstnamesplit.split('/Log')[1]
            thirdnamesplit = secondnamesplit.split('.txt')[0]
            try:
                date_time_obj = datetime.fromisoformat(thirdnamesplit)
            except:
                nonreal_dates.append(thirdnamesplit)
            else:
                chronological_dates.append(date_time_obj)
            
        chronological_dates.sort()
        
        for baddate in nonreal_dates:
            splitbaddate = baddate.split('-')
            pathtobaddatetxt = os.path.join(self.path_to_logs,
                                           splitbaddate[0],
                                           f'Log{baddate}.txt')
            chronological_textlogpaths.append(pathtobaddatetxt)
        
        for timeobj in chronological_dates:
            itr_year = int(timeobj.year)
            itr_month = int(timeobj.month)
            itr_day = int(timeobj.day)
            if itr_month < 10:
                itr_month = '0'+str(itr_month)
            if itr_day < 10:
                itr_day = '0'+str(itr_day)
            newiterpath = os.path.join(self.path_to_logs,
                                      str(itr_year),
                                      f'Log{itr_year}-{itr_month}-{itr_day}.txt')
            chronological_textlogpaths.append(newiterpath)
        
        return chronological_textlogpaths
    
    
    def Log_CSV(self):
        
        firstdayincycle = 'null'
        lastdayincycle = 'null'
        
        CurrentCSVtoAppendto = 'null'
        CurrentColNames = 'null'
        CurrentHeaderLength = 'null'
        
        if not self.LogTextPathsRetriever():
            print(f"No .txt logs in {self.subfoldername} of {self.nodename}.")
            pass
        else:
            if LogInfoGetter((self.LogTextPathsRetriever())[0]) == 'error: no data in .txt':
                pass
            else:
                firstloginfo = LogInfoGetter((self.LogTextPathsRetriever())[0])
                firstdayincycle = firstloginfo['LogDate']
                CurrentCSVtoAppendto = os.path.join(self.path_to_logs,
                                               f'Logs_{firstdayincycle}.csv')
                CurrentColNames = firstloginfo['ColNames']
                CurrentHeaderLength  = len(CurrentColNames)
        
                with open(CurrentCSVtoAppendto,'w',newline='') as csvfile:
                    fieldnames = CurrentColNames
                    thewriter = csv.DictWriter(csvfile, fieldnames=fieldnames)
                    thewriter.writeheader()
                    for D in range(len(firstloginfo['Data'])):
                        iter_dict = {}
                        for i in range(len((firstloginfo)['ColNames'])):
                            iter_dict[firstloginfo['ColNames'][i]] = firstloginfo['Data'][D][i]
                        thewriter.writerow(iter_dict)
                    print(f"<<{self.nodename}, {self.subfoldername}>> Created ~Logs_{firstdayincycle}.csv~ at base of /Logs/")
                    csvfile.close()
                
            textfile_iteration_counter = 0
            while textfile_iteration_counter < len(self.LogTextPathsRetriever())-1: 
                for i in range(len(self.LogTextPathsRetriever())):
                    if i != 0:
                        iterloginfo = LogInfoGetter((self.LogTextPathsRetriever())[i])
                        if iterloginfo  == 'error: no data in .txt':
                            textfile_iteration_counter+=1
                            pass
                        else:
                            iterlogColNames = iterloginfo['ColNames']
                            #iterlogHeaderLength = len(iterlogColNames)
                
                            if iterlogColNames == CurrentColNames:
                                with open(CurrentCSVtoAppendto,'a',newline='') as csvfile:
                                    dictwriter_object = DictWriter(csvfile,
                                                               fieldnames=CurrentColNames)
                                    for D in range(len(iterloginfo['Data'])):
                                        iter_dict = {}
                                        for i in range(len((iterloginfo)['ColNames'])):
                                            iter_dict[iterloginfo['ColNames'][i]] = iterloginfo['Data'][D][i]
                                        dictwriter_object.writerow(iter_dict)
                                    csvfile.close()
                                lastdayincycle = iterloginfo['LogDate']
                                newCSVname = os.path.join(
                                    self.path_to_logs,
                                    f'Logs_{firstdayincycle}_to_{lastdayincycle}.csv'
                                )
                                os.rename(CurrentCSVtoAppendto,
                                         newCSVname)
                                CurrentCSVtoAppendto = newCSVname
                                textfile_iteration_counter += 1
                                if textfile_iteration_counter > len(self.LogTextPathsRetriever()):
                                    break
                            
                            else:
                                pastCSV_date_presplit = CurrentCSVtoAppendto.split('.csv')[0]
                                pastCSV_date = pastCSV_date_presplit.split('_')[-1]
                                CurrentCSVtoAppendto = os.path.join(self.path_to_logs,
                                                                    'Logs_'+iterloginfo["LogDate"]+'.csv')
                                CurrentColNames = iterlogColNames
                                firstdayincycle = iterloginfo['LogDate']
                                lastdayincycle = 'null'
                                with open(CurrentCSVtoAppendto,'w',newline='') as csvfile:
                                    fieldnames = CurrentColNames
                                    thewriter = csv.DictWriter(csvfile, fieldnames=fieldnames)
                                    thewriter.writeheader()
                                    for D in range(len(iterloginfo['Data'])):
                                        iter_dict = {}
                                        for i in range(len((iterloginfo)['ColNames'])):
                                            iter_dict[iterloginfo['ColNames'][i]] = iterloginfo['Data'][D][i]
                                        thewriter.writerow(iter_dict)
                                    print(f"<<{self.nodename}, {self.subfoldername}>> Inconsistent log field names/amount between {pastCSV_date} and {firstdayincycle};"+
                                          f" Starting new CSV file (which following logs will be appended to) at base of /Logs/")
                                    csvfile.close()
                                textfile_iteration_counter += 1
                                if textfile_iteration_counter > len(self.LogTextPathsRetriever()):
                                    break

### Running functions on every SD card folder:

In [872]:
for nodefld in node_folders:
    iternodefld = NodeFolder(nodefld)
    for subfldr in iternodefld.node_subfolders:
        itersubfld = SubNodeFolder(subfldr,
                                   iternodefld.nodename,
                                   iternodefld.gems_folderpath)
        itersubfld.Log_CSV() 

<<Winter_Turf_Type_B_-_6, e00fce685b02f35fe13a0a2d>> Created ~Logs_2000-00-01.csv~ at base of /Logs/
<<Winter_Turf_Type_B_-_1, e00fce6838cebdf42fc24391>> Created ~Logs_2000-00-01.csv~ at base of /Logs/
<<Winter_Turf_Type_B_-_1, e00fce6838cebdf42fc24391>> Inconsistent log field names/amount between 2000-00-01 and 2021-11-30.1; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_B_-_1, e00fce6838cebdf42fc24391>> Inconsistent log field names/amount between 2021-11-30.1 and 2020-11-17; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_A_-_7, e00fce682a79a64999b7b409>> Created ~Logs_2000-00-01.csv~ at base of /Logs/
<<Winter_Turf_Type_A_-_7, e00fce682a79a64999b7b409>> Inconsistent log field names/amount between 2021-10-28 and 2021-12-13; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_A_-_8, e00fce6829163099f836ae78>> Inconsistent log fi

<<Winter_Turf_Type_A_-_12, e00fce68b6dc19eefd628b4e>> Created ~Logs_2000-00-21.csv~ at base of /Logs/
<<Winter_Turf_Type_A_-_12, e00fce68b6dc19eefd628b4e>> Inconsistent log field names/amount between 2000-00-01 and 2000-00-00; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_A_-_12, e00fce68b6dc19eefd628b4e>> Inconsistent log field names/amount between 2165-25-45 and 2020-11-14; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_A_-_12, e00fce68b6dc19eefd628b4e>> Inconsistent log field names/amount between 2020-12-25 and 2021-12-11; Starting new CSV file (which following logs will be appended to) at base of /Logs/
<<Winter_Turf_Type_A_-_15, e00fce68ccb7400e1fb8aa98>> Created ~Logs_2000-00-01.csv~ at base of /Logs/
<<Winter_Turf_Type_A_-_15, e00fce68ccb7400e1fb8aa98>> Inconsistent log field names/amount between 2021-03-24 and 2021-12-10; Starting new CSV file (which following logs wil