In [1]:
from IPython.core.display import display, HTML 
display(HTML("<style>.container { width:95% !important; }div.output_wrapper .output { padding-left: 14px; }</style>"))

In [2]:
import pickle
import os
from pathlib import Path
import re
#import mdfreader
import time

import pandas as pd
import json

In [3]:
def convert_to_UNC(path):
    '''Converts the path to a different format -- this solves the
    issue of the paths being too long to be processed (>255chars)'''
    path = Path(path).absolute().as_posix()
    if ':' in path:
        path = path.replace(':', '$')
        return Path('//?/UNC/localhost/' + path)
    else:
        path = path.replace('//', '')
        return Path('//?/UNC/' + path)

In [256]:
df_htoe = pd.read_csv('htoe_raw-channels.txt', header=None, names=['signal_names'])

In [192]:
df_htoe.head()

Unnamed: 0,signal_names
0,$ActiveCalibrationPage
1,$ActiveCalibrationPage_1_
2,$ActiveCalibrationPage_2_
3,$ActiveCalibrationPage_3_
4,$ActiveCalibrationPage_4_


In [260]:
def process_data(x):
    
    def get_suffixes(x):
        fmtre = re.search("(\_([0-9])+)+$", x)
        if fmtre is None:
            return "None"
        else:
            return fmtre.group()
        
    def signal_column_check(data):
        if True in data:
            return True
        else:
            return False
        
    data = x
        
    # initial processing
    data.signal_names = data.signal_names.str.lower()
    data.signal_names = data.signal_names.apply(lambda x: re.sub('\W+','', x))
    data.signal_names = data.signal_names.apply(lambda x: re.sub(' ','', x))

    data["ld"] = pd.notna(data.signal_names.str.extract("(_ld$)"))
    data["dut"] = pd.notna(data.signal_names.str.extract("(_dut$)"))
    data.signal_names = data.signal_names.apply(lambda x: re.sub('(_dut$|_ld$)', '', x))
    
    data["pure"] = data.apply(lambda x: (x['dut'] == False | x['ld'] == False), axis=1)
    
    # signal suffixes
    data.signal_names = data.signal_names.apply(lambda x: re.sub('\_$', '', x))
    data["signal_suffix"] = data.signal_names.apply(lambda x: get_suffixes(x))
    data.signal_suffix = data.signal_suffix.apply(lambda x: re.sub("^\_","", x))
    data.signal_suffix = data.signal_suffix.apply(lambda x: re.sub("\_$","", x))
    data.signal_names = data.signal_names.apply(lambda x: re.sub('(\_([0-9])+)+$','', x))
    
    data[["dut_suffix", "ld_suffix", "pure_suffix"]] = "None"
    data.loc[data['dut'] == True, 'dut_suffix'] = data["signal_suffix"]
    data.loc[data['ld'] == True, 'ld_suffix'] = data["signal_suffix"]
    data.loc[data['pure'] == True, 'pure_suffix'] = data["signal_suffix"]
    
    columns = data.columns
    
    data = pd.DataFrame(data.groupby(by = ['signal_names'], as_index = False)
                     [columns[1:]].agg(["unique"]))
    data = data.reset_index()
    data = data.rename(columns = {"unique" : ""})
    data.columns = columns
        
    data.dut = data.dut.apply(lambda x:  signal_column_check(x))
    data.ld = data.ld.apply(lambda x:  signal_column_check(x))
    data.pure = data.pure.apply(lambda x:  signal_column_check(x))


    return data

In [255]:
import inspect as i
import sys
sys.stdout.write(i.getsource(get_suffixes))

def get_suffixes(x):
    fmtre = re.search("(\_([0-9])+)+$", x)
    if fmtre is None:
        return "None"
    else:
        return fmtre.group()


In [258]:
df_htoe.head(10)

Unnamed: 0,signal_names
0,$ActiveCalibrationPage
1,$ActiveCalibrationPage_1_
2,$ActiveCalibrationPage_2_
3,$ActiveCalibrationPage_3_
4,$ActiveCalibrationPage_4_
5,$ActiveCalibrationPage_4_4
6,$ActiveCalibrationPage_6_
7,$CalibrationLog
8,$CalibrationLog_1_
9,$CalibrationLog_2_


In [261]:
df_htoe_processed = process_data(df_htoe)

In [262]:
df_htoe_processed.head(20)

Unnamed: 0,signal_names,ld,dut,pure,signal_suffix,dut_suffix,ld_suffix,pure_suffix
0,activecalibrationpage,False,False,True,"[None, 1, 2, 3, 4, 4_4, 6]",[None],[None],"[None, 1, 2, 3, 4, 4_4, 6]"
1,actual_rpm,False,False,True,[None],[None],[None],[None]
2,actual_torque,False,False,True,[None],[None],[None],[None]
3,actualvaluevolumeflowcoolantetractsys1,False,False,True,[None],[None],[None],[None]
4,aliv_avl_dt_mot_trct,False,False,True,[None],[None],[None],[None]
5,alpha,False,False,True,[None],[None],[None],[None]
6,avl_dirrt_mot_trct,False,False,True,[None],[None],[None],[None]
7,avl_opmo_mot,False,False,True,[1],[None],[None],[1]
8,avl_opmo_mot_trct,False,False,True,[None],[None],[None],[None]
9,avl_rpm_mot_trct,False,False,True,[None],[None],[None],[None]


In [264]:
df_htoe_processed.to_csv(r'htoe_processed.csv', index = False)

In [266]:
df_mech = pd.read_csv('mech_raw-channels.txt', header=None, names=['signal_names'])
df_mech_processed = process_data(df_mech)
df_mech_processed.head()

Unnamed: 0,signal_names,ld,dut,pure,signal_suffix,dut_suffix,ld_suffix,pure_suffix
0,activecalibrationpage,False,False,True,"[None, 4, 9]",[None],[None],"[None, 4, 9]"
1,actualvaluevolumeflowcoolantetractsys1,True,True,False,[None],[None],[None],[None]
2,aliv_combinedchargerunit100msno1,True,True,False,[None],[None],[None],[None]
3,aliv_combinedchargerunit10msno1,True,True,False,[None],[None],[None],[None]
4,aliv_tractionemachineelectronic1time10msno1,True,True,False,[None],[None],[None],[None]


In [269]:
df_mech_processed.to_csv(r'mech_processed.csv', index = False)

In [268]:
df_ptce = pd.read_csv('ptce_raw-channels.txt', header=None, names=['signal_names'])
df_ptce_processed = process_data(df_ptce)
df_ptce_processed.head(10)

Unnamed: 0,signal_names,ld,dut,pure,signal_suffix,dut_suffix,ld_suffix,pure_suffix
0,21dcurrentdirectcurrentheat1actualvalue,True,True,False,[None],[None],[None],[None]
1,activecalibrationpage,False,False,True,"[None, 4, 4_4, 6]",[None],[None],"[None, 4, 4_4, 6]"
2,actualvaluevolumeflowcoolantetractsys1,True,True,False,[None],[None],[None],[None]
3,actualvaluevolumeflowcoolantetractsys2,True,True,False,[None],[None],[None],[None]
4,air_saturation_value,False,False,True,[None],[None],[None],[None]
5,aliv_combinedchargerunit100msno1,True,True,False,[None],[None],[None],[None]
6,aliv_combinedchargerunit10msno1,True,True,False,[None],[None],[None],[None]
7,aliv_tractionemachineelectronic1time10msno1,True,True,False,[None],[None],[None],[None]
8,alpha,False,False,True,[None],[None],[None],[None]
9,avl_cutil_pcu,True,True,False,[None],[None],[None],[None]


In [270]:
df_ptce_processed.to_csv(r'ptce_processed.csv', index = False)