Imports

In [1]:
# Imports
import os
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display

Variables

In [20]:
# path to directory with csv files
# origin = original data
# filter = directory where to save filteres data
# merged = directory where to save merged data
directory_origin = "C:\\Users\\katri\\\Documents\\Studium\\Master\\2. Semester\\Projekt\\1_Daten\\0_Beispieldaten\\Rohdaten"
directory_filtered = "C:\\Users\\katri\\\Documents\\Studium\\Master\\2. Semester\\Projekt\\1_Daten\\0_Beispieldaten\\Rohdaten_gefiltert"
directory_merged = "C:\\Users\\katri\\\Documents\\Studium\\Master\\2. Semester\\Projekt\\1_Daten\\0_Beispieldaten\\Rohdaten_gemerged"

Functions

In [10]:
# function to extract timestamp from filename
def get_timestamp_from_filename(filename):
    result = filename.split("_")
    return result[1]

In [11]:
# function that checks if file ist EML file
def is_file_type(name, file_type):
    if name.find(file_type) != -1:
        return True
    return False

In [12]:
# calculate timestamp with lower and higher value
def calculate_ts_real(low, high):
    return low + (high*4294967295)

In [13]:
# calculate relative timestamp - substract minimum timestamp from every timestamp
# in seconds
def calculate_ts_relative(ts_real, minimum):
    return (ts_real - minimum)

In [14]:
def get_filename_by_timestamp(ts, filename_list):
    result = ""
    for name in filename_list:
        if ts in name:
            result = name
    return result

In [15]:
# read and process csv file
# file_type 0 = fre
# file_type 1 = eml
def process_file(file_path, file_type):
    # read selected columns from csv file to DataFrame
    path = os.path.join(directory_origin, file_path)
    if file_type == 0:
        column_names = ['DeTimestamp.DeTimestampZGT.DeTimestampLow', 'DeTimestamp.DeTimestampZGT.DeTimestampHigh', 
                        'DeFA_Hoehenwert_VL_00', 'DeFA_Hoehenwert_VR_00',
                        'DeFA_Hoehenwert_HL_00','DeFA_Hoehenwert_HR_00']
    elif file_type == 1:
        column_names = ['DeTimestamp.DeTimestampZGT.DeTimestampLow', 'DeTimestamp.DeTimestampZGT.DeTimestampHigh',
                        'DeAccX', 'DeAccY', 'DeAccZ','DeCurvature', 'DeDrivingDirectionPRange', 'DePitchAngle2Gravity',
                        'DePitchRate', 'DeRollAngle2Gravity', 'DeRollRate', 'DeVelocityX', 'DeVelocityY', 'DeYawAngle',
                        'DeYawRate', 'DePositionX', 'DePositionY', 'DePositionXmm', 'DePositionYmm']
    data = pd.read_csv(path, sep=';', skiprows=[1], usecols=column_names)
    
    # calculate new timestamp
    data = data.assign(ts_real=lambda x: calculate_ts_real(x['DeTimestamp.DeTimestampZGT.DeTimestampLow'],
                                                           x['DeTimestamp.DeTimestampZGT.DeTimestampHigh']))
    ts_minimum = min(data['ts_real'])
    data = data.assign(ts_relative=lambda x: calculate_ts_relative(x['ts_real'], ts_minimum))
    data = data.drop('DeTimestamp.DeTimestampZGT.DeTimestampLow', 1).\
        drop('DeTimestamp.DeTimestampZGT.DeTimestampHigh', 1).\
        drop('ts_real', 1)
    
    # filter duplicated lines
    data = data.assign(ts_before=data['ts_relative'].shift())
    data = data.query('ts_relative != ts_before')
    data = data.drop('ts_before', 1)
    
    return data

Code

In [16]:
# get filenames from directory
filenames = os.listdir(directory_origin)

# split filename list in eml and fre files
eml_files = list(filter(lambda n: is_file_type(n, "EML"), filenames))
fre_files = list(filter(lambda n: is_file_type(n, "FRe"), filenames))
print("Number of eml files: ", len(eml_files))
print("Number of fre file: ", len(fre_files))


Number of eml files:  6
Number of fre file:  6


In [17]:
progress_bar = widgets.IntProgress(
    value=0,
    min=0,
    max=len(eml_files),
    step=1,
    description='Loading:',
    bar_style='', # 'success', 'info', 'warning', 'danger' or ''
    orientation='horizontal'
)

In [18]:
# display progress_bar
display(progress_bar)

IntProgress(value=0, description='Loading:', max=6)

In [21]:
# iterate over list with eml_filenames
for filename in eml_files:
    progress_bar.value += 1
    #filename = eml_files[i]

    # read eml_file
    df_eml = process_file(filename, 1)
    
    # get timestamp from filename
    timestamp = get_timestamp_from_filename(filename)
    
    # find fre_filename by timestamp
    fre_filename = get_filename_by_timestamp(timestamp, fre_files)
    
    # process fre_file if string is not empty
    if fre_filename != "":
        #process fre file
        df_fre = process_file(fre_filename, 0)
        
        # save both files
        df_fre.to_csv(os.path.join(directory_filtered, 'fre_'+timestamp+'.csv'), sep=';', index = False)
        df_eml.to_csv(os.path.join(directory_filtered, 'eml_'+timestamp+'.csv'), sep=';', index = False)
        
        #merge both dataframes to one
        data_merge = pd.merge(df_fre, df_eml, on='ts_relative', how='outer').sort_values(by=['ts_relative'])
        
        # fill empty cells with previous value
        data_fillna = data_merge.fillna(method='ffill')
        
        # save merged and filled data
        merged_filename = timestamp + "_merged.csv"
        data_fillna.to_csv(os.path.join(directory_merged, merged_filename), sep=';', index = False)
    else:
        print("No matching csv file found!")