In [1]:
#Imports
from os import listdir
from os.path import isfile, join
from collections import Counter
import re
import pandas as pd
import matplotlib.pyplot as plt
import time

In [2]:
def fileFilter(path):
    allFiles = [f for f in listdir(path) if isfile(join(path, f)) and f.endswith('.csv')]
    nrFilesInitialy = len(allFiles)
    print ("Number of traces: ", nrFilesInitialy)

    patternsWhiteList =['BF1', 'BF2', 'BF3', 'BF4', 'BF5', 'BF6']
    patternsBlackList = ['TesRel']
    patternCounter = [0] * len(patternsWhiteList)
    
    wrongFiles = []
    for file in allFiles:
        for idx, pattern in enumerate(patternsWhiteList):    
            match = re.search(pattern,file)
            if match:
                patternCounter[idx] +=1
                if pattern in patternsBlackList:
                    wrongFiles.append(file)
                break
            if (not match and pattern==patternsWhiteList[-1]):
                wrongFiles.append(file)
    
    print ("TRACES COUNT:")
    for count, pat in zip(patternCounter, patternsWhiteList):
        print (pat, ": ",count )
    
    patternCounter.append(len(allFiles)-sum(patternCounter))
    print ("Others: ", patternCounter[-1])
    print ("Number of files to discard: ", len(wrongFiles))
    allFiles = [x for x in allFiles if x not in wrongFiles]
    return allFiles

In [3]:
def processAndTransform(path, allFiles):
    if 'dfResult' in locals():
        del(dfResult)
    for currentFileId in range(len(allFiles)):
    #for currentFileId in range(10):
    # 2. Fuentes de datos
        file = join(path, allFiles[currentFileId])
        dfStatic = pd.read_csv(file, nrows=5)
        dfStatic = dfStatic.set_index(dfStatic.columns[0]).T 
        dfStatic = dfStatic.reset_index()
    #    print(dfStatic)
    #    print("Data Shape: ", dfStatic.shape, "\n")
    #    print(dfStatic.dtypes)
    
        dfGraphic = pd.read_csv(file, skiprows=6)
    #    print (dfGraphic.head())
    #    print("Data Shape: ", dfStatic.shape, "\n")
    #    print(dfStatic.dtypes)
    
    # 3.1 Ajuste de los atributos
        if (10 == len(dfGraphic.columns)):
            dfGraphic.columns = ['Time', 'Speed1', 'Speed2', 'AbsolutePosition', 'Torque1', 'BrakeTorque', 'BrakeCMDForce', 'Position1', 'Position2', 'SystemLinearSpeed']
        else:
            print ("Graph contains unexpected number of columns in file: ", allFiles[currentFileId])
        if (6 == len(dfStatic.columns)):
            dfStatic.columns = ['ID', 'Date', 'Time', 'Name', 'MotorTemperature', 'BrakeTemperature']
        else:
            print ("Static info contains unexpected number of columns in file: ", allFiles[currentFileId])
    
        dfStatic["ID"] = pd.to_numeric(dfStatic["ID"])
        dfStatic[["MotorTemperature", "BrakeTemperature"]] = dfStatic[["MotorTemperature", "BrakeTemperature"]].apply(pd.to_numeric)
        dfStatic["Date"] = pd.to_datetime(dfStatic["Date"], infer_datetime_format=True)
        dfStatic["Time"] = pd.to_timedelta(dfStatic["Time"])
    #    print(dfStatic.dtypes)
    
        dfGraphic[["Torque1", "BrakeTorque"]] = dfGraphic[["Torque1", "BrakeTorque"]].apply(pd.to_numeric, downcast="float")
        dfGraphic["BrakeCMDForce"] = dfGraphic["BrakeCMDForce"].apply(pd.to_numeric)
    #    print(dfGraphic.dtypes)
    
    # 3.1.2 Eliminamos atributos no necesarios
        dfGraphic.drop(columns=['Speed2', 'Position2'], inplace=True)
        dfGraphic.drop(columns=['SystemLinearSpeed', 'AbsolutePosition', 'BrakeTorque'], inplace=True)
    
    # 3.2 Missing values
        if (0 != sum(dfGraphic.isnull().sum()) and 0 == sum(dfStatic.isnull().sum())):
            print ("Missing values found")
            print ("Graph: ", dfGraphic.isnull().sum())
            print ("Static Info: ", dfStatic.isnull().sum())
            print ("\nFollowing Graph is discarded: ", allFiles[currentFileId])
            del allFiles[currentFileId]
            print ("\n")
    
    # 4. Transformaciones
    # 4.1 Datos estáticos
        # Split the 'Name' column into 'Config' and 'Process'
        dfStatic['Config'] = dfStatic['Name'].str.split('_', 1, expand=True)[1]
        dfStatic['Process'] = dfStatic['Name'].str.split('_').str[-1]
        # Drop the 'Name' column
        dfStatic.drop(columns=['Name'], inplace=True)
        #Apply factor
        dfStatic['MotorTemperature'] = dfStatic['MotorTemperature'].div(100).round(2)
        dfStatic['BrakeTemperature'] = dfStatic['BrakeTemperature'].div(100).round(2)
    #    print(dfStatic)
    
    # 4.2 Datos del gráfico
        dfGraphic['Torque1'] = dfGraphic['Torque1'].div(100).round(2)
    #    print(dfGraphic)
    
        process = dfStatic.iloc[0]['Process'].lower()
        if (process == 'bf1'):
            accEnd = 150
            topEnd = 414
        elif (process == 'bf2'):
            accEnd = 150
            topEnd = 756
        elif (process == 'bf3'):
            accEnd = 150
            topEnd = 570
        elif (process == 'bf4'):
            accEnd = 150
            topEnd = 337
        elif (process == 'bf5'):
            accEnd = 150
            topEnd = 756
        elif (process == 'bf6'):
            accEnd = 150
            topEnd = 324
        else:
            accEnd = 150
            topEnd = len(dfGraphic.index)-200
         
        df_acc = dfGraphic.iloc[:accEnd,:]
        df_top = dfGraphic.iloc[accEnd:topEnd,:]
        df_decc = dfGraphic.iloc[topEnd:,:]
    #    print("Shape of new dataframes - {} , {} , {}".format(df_acc.shape, df_top.shape, df_decc.shape))
    
    # 4.2.1 Aceleración
        col=['CurveProcess', 'TimeLength',\
             'PositionInit','PositionLength',\
             'Speed1Mean','Speed1Std','Speed1Min','Speed1Q1', 'Speed1Q2', 'Speed1Q3','Speed1Max',\
             'Torque1Mean', 'Torque1Std', 'Torque1Min', 'Torque1Q1', 'Torque1Q2', 'Torque1Q3', 'Torque1Max',\
             'BrakeCMDForceMean', 'BrakeCMDForceStd', 'BrakeCMDForceMin', 'BrakeCMDForceQ1', 'BrakeCMDForceQ2', 'BrakeCMDForceQ3', 'BrakeCMDForceMax']
        dfGraphicTrans = pd.DataFrame(columns=col, dtype=int)
        
        df_1 = pd.DataFrame(df_acc['Speed1'].describe()).T
        df_2 = pd.DataFrame(df_acc['Torque1'].describe()).T
        df_3 = pd.DataFrame(df_acc['BrakeCMDForce'].describe()).T
        
        dfGraphicTrans.loc[0] =  'acceleration', df_acc.shape[0], \
                        df_acc['Position1'].iloc[0],df_acc['Position1'].iloc[-1]-df_acc['Position1'].iloc[0],\
                        df_1.iloc[0][1],df_1.iloc[0][2],df_1.iloc[0][3],df_1.iloc[0][4],df_1.iloc[0][5],df_1.iloc[0][6],df_1.iloc[0][7],\
                        df_2.iloc[0][1],df_2.iloc[0][2],df_2.iloc[0][3],df_2.iloc[0][4],df_2.iloc[0][5],df_2.iloc[0][6],df_2.iloc[0][7],\
                        df_3.iloc[0][1],df_3.iloc[0][2],df_3.iloc[0][3],df_3.iloc[0][4],df_3.iloc[0][5],df_3.iloc[0][6],df_3.iloc[0][7]
    #    dfGraphicTrans
    
    # 4.2.2 Velocidad punta
        df_1 = pd.DataFrame(df_top['Speed1'].describe()).T
        df_2 = pd.DataFrame(df_top['Torque1'].describe()).T
        df_3 = pd.DataFrame(df_top['BrakeCMDForce'].describe()).T
        dfGraphicTrans.loc[1] =  'top', df_top.shape[0], \
                    df_top['Position1'].iloc[0],df_top['Position1'].iloc[-1]-df_top['Position1'].iloc[0],\
                    df_1.iloc[0][1],df_1.iloc[0][2],df_1.iloc[0][3],df_1.iloc[0][4],df_1.iloc[0][5],df_1.iloc[0][6],df_1.iloc[0][7],\
                    df_2.iloc[0][1],df_2.iloc[0][2],df_2.iloc[0][3],df_2.iloc[0][4],df_2.iloc[0][5],df_2.iloc[0][6],df_2.iloc[0][7],\
                    df_3.iloc[0][1],df_3.iloc[0][2],df_3.iloc[0][3],df_3.iloc[0][4],df_3.iloc[0][5],df_3.iloc[0][6],df_3.iloc[0][7]
    #    print(dfGraphicTrans)
    
    # 4.2.3 Deceleración
        df_1 = pd.DataFrame(df_top['Speed1'].describe()).T
        df_2 = pd.DataFrame(df_top['Torque1'].describe()).T
        df_3 = pd.DataFrame(df_top['BrakeCMDForce'].describe()).T
        dfGraphicTrans.loc[2] =  'decceleration', df_decc.shape[0], \
                    df_decc['Position1'].iloc[0],df_decc['Position1'].iloc[-1]-df_decc['Position1'].iloc[0],\
                    df_1.iloc[0][1],df_1.iloc[0][2],df_1.iloc[0][3],df_1.iloc[0][4],df_1.iloc[0][5],df_1.iloc[0][6],df_1.iloc[0][7],\
                    df_2.iloc[0][1],df_2.iloc[0][2],df_2.iloc[0][3],df_2.iloc[0][4],df_2.iloc[0][5],df_2.iloc[0][6],df_2.iloc[0][7],\
                    df_3.iloc[0][1],df_3.iloc[0][2],df_3.iloc[0][3],df_3.iloc[0][4],df_3.iloc[0][5],df_3.iloc[0][6],df_3.iloc[0][7]
    #    print(dfGraphicTrans)
    
    # 4.2.4. Instancias finales del gráfico
        dfStatic.loc[1] = dfStatic.loc[0]
        dfStatic.loc[2] = dfStatic.loc[0]
        
        if 'dfResult' not in locals():
            dfResult = pd.concat([dfStatic, dfGraphicTrans], axis=1)
        else:
            #dfResult = dfResult.append(pd.concat([dfStatic, dfGraphicTrans], axis=1))
            dfResult = pd.concat([dfResult, pd.concat([dfStatic, dfGraphicTrans], axis=1)], axis=0, ignore_index=True)
    #    print (dfResult)
    
    dfResult = dfResult.set_index(dfResult.columns[0]).reset_index()
    dfResult.head()
    return dfResult
    #dfResult.to_csv(join(basePath, 'TransformedData.csv'), sep='\t', encoding='utf-8')

#### Configure the following list with the folder names

In [4]:
basePath=".."
folderNames = ["20230317", "20230319", "20230320", "20230321", "20230322", "20230326", "20230327", "20230328", "20230329", "20230330", "20230331",\
               "20230401", "20230402", "20230403", "20230404", "20230405", "20230406", "20230407", "20230408", "20230409", "202304010",
               "20230411", "20230412", "20230413"]

In [5]:
for folderName in folderNames:
    pathFolder = join(basePath,folderName)
    path = join (pathFolder, folderName+"Traces")
    print("\n\033[1m" + path + "\033[0m")
    allFiles = fileFilter (path)
    
    start_time = time.time()
    dfResult = processAndTransform(path, allFiles)
    end_time = time.time()
    elapsed_time = (end_time - start_time)/60
    # Print the elapsed time
    print(f"Time taken to perform processAndTransform: {elapsed_time:.2f} minutes")
    
    dfResult.to_csv(join(pathFolder, 'TransformedData.csv'), sep='\t', encoding='utf-8')


[1m..\20230317\20230317Traces[0m
Number of traces:  17997
TRACES COUNT:
BF1 :  2997
BF2 :  3000
BF3 :  3000
BF4 :  3000
BF5 :  3000
BF6 :  3000
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 14.52 minutes

[1m..\20230319\20230319Traces[0m
Number of traces:  196373
TRACES COUNT:
BF1 :  32701
BF2 :  32733
BF3 :  32731
BF4 :  32736
BF5 :  32735
BF6 :  32737
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 388.14 minutes

[1m..\20230320\20230320Traces[0m
Number of traces:  48066
TRACES COUNT:
BF1 :  8004
BF2 :  8012
BF3 :  8012
BF4 :  8013
BF5 :  8012
BF6 :  8013
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 67.13 minutes

[1m..\20230321\20230321Traces[0m
Number of traces:  8510
TRACES COUNT:
BF1 :  1416
BF2 :  1418
BF3 :  1420
BF4 :  1421
BF5 :  1419
BF6 :  1416
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 10.90 minutes

[1m..\2

UnboundLocalError: local variable 'dfResult' referenced before assignment

In [6]:
basePath=".."
folderNames = ["20230330", "20230331",\
               "20230401", "20230402", "20230403", "20230404", "20230405", "20230406", "20230407", "20230408", "20230409", "202304010",
               "20230411", "20230412", "20230413"]

In [7]:
for folderName in folderNames:
    pathFolder = join(basePath,folderName)
    path = join (pathFolder, folderName+"Traces")
    print("\n\033[1m" + path + "\033[0m")
    allFiles = fileFilter (path)
    
    start_time = time.time()
    result = processAndTransform(path, allFiles)
    end_time = time.time()
    elapsed_time = (end_time - start_time)/60
    # Print the elapsed time
    print(f"Time taken to perform processAndTransform: {elapsed_time:.2f} minutes")
    
    result.to_csv(join(pathFolder, 'TransformedData.csv'), sep='\t', encoding='utf-8')


[1m..\20230330\20230330Traces[0m
Number of traces:  15943
TRACES COUNT:
BF1 :  2656
BF2 :  2659
BF3 :  2660
BF4 :  2657
BF5 :  2654
BF6 :  2657
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 17.14 minutes

[1m..\20230331\20230331Traces[0m
Number of traces:  35993
TRACES COUNT:
BF1 :  5994
BF2 :  5999
BF3 :  6000
BF4 :  6000
BF5 :  6000
BF6 :  6000
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 40.11 minutes

[1m..\20230401\20230401Traces[0m
Number of traces:  71958
TRACES COUNT:
BF1 :  11987
BF2 :  11995
BF3 :  11995
BF4 :  11991
BF5 :  11994
BF6 :  11996
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 89.38 minutes

[1m..\20230402\20230402Traces[0m
Number of traces:  71960
TRACES COUNT:
BF1 :  11984
BF2 :  11997
BF3 :  11995
BF4 :  11993
BF5 :  11997
BF6 :  11994
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 88.98 minutes

[1

FileNotFoundError: [WinError 3] The system cannot find the path specified: '..\\202304010\\202304010Traces'

In [11]:
basePath=".."
folderNames = ["20230410", "20230411", "20230412", "20230413"]

In [12]:
for folderName in folderNames:
    pathFolder = join(basePath,folderName)
    path = join (pathFolder, folderName+"Traces")
    print("\n\033[1m" + path + "\033[0m")
    allFiles = fileFilter (path)
    
    start_time = time.time()
    result = processAndTransform(path, allFiles)
    end_time = time.time()
    elapsed_time = (end_time - start_time)/60
    # Print the elapsed time
    print(f"Time taken to perform processAndTransform: {elapsed_time:.2f} minutes")
    
    result.to_csv(join(pathFolder, 'TransformedData.csv'), sep='\t', encoding='utf-8')


[1m..\20230410\20230410Traces[0m
Number of traces:  71641
TRACES COUNT:
BF1 :  11923
BF2 :  11943
BF3 :  11941
BF4 :  11943
BF5 :  11947
BF6 :  11944
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 85.96 minutes

[1m..\20230411\20230411Traces[0m
Number of traces:  71667
TRACES COUNT:
BF1 :  11936
BF2 :  11940
BF3 :  11949
BF4 :  11949
BF5 :  11932
BF6 :  11961
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 85.90 minutes

[1m..\20230412\20230412Traces[0m
Number of traces:  41806
TRACES COUNT:
BF1 :  6965
BF2 :  6975
BF3 :  6956
BF4 :  6971
BF5 :  6971
BF6 :  6968
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 44.28 minutes

[1m..\20230413\20230413Traces[0m
Number of traces:  53804
TRACES COUNT:
BF1 :  8966
BF2 :  8966
BF3 :  8968
BF4 :  8968
BF5 :  8971
BF6 :  8965
Others:  0
Number of files to discard:  0
Time taken to perform processAndTransform: 60.02 minutes
