In [18]:
import numpy as np
import os
import pandas as pd
import pprint
import csv

#path constants
train_path = '../../data/train/'
test_path = '../../data/test/'

output_path = '../../data_all/'

#type constants
vehicle_types = ['ZVe44', 'ZV573', 'ZV63d', 'ZVfd4', 'ZVa9c', 'ZVa78', 'ZV252']

#two label dataframes
train_label_df = pd.read_csv(train_path + 'train_label.csv', delimiter = ',', encoding = 'utf-8')
test_label_df = pd.read_csv(test_path + 'test_label.csv', delimiter = ',', encoding = 'utf-8')

for vehicle_type in vehicle_types:
    if not os.path.exists(output_path+vehicle_type):
        os.makedirs(output_path+vehicle_type)

In [19]:
def getLabel(filename, label_df):
    idx = label_df.loc[label_df['sample_file_name'] == filename]
    return idx.iloc[0]['label']

def TraverseFiles(path, vehicle_type, label_df, output_path):

    #vehicle_type: one string element under vehicle_types = ['ZVe44', 'ZV573', 'ZV63d', 'ZVfd4', 'ZVa9c', 'ZVa78', 'ZV252']
    path = path + vehicle_type
    #these are variables to calculate traversing progress (DO NOT CHANGE)
    counts_per_percent = int(len(os.listdir(path)) / 100)
    percentage_completion = 0
    counter = 0

    output_list = []
    
    for file in os.listdir(path):
        sample_df = pd.read_csv(path + '/' + file, delimiter = ',', encoding = 'utf-8')
        
        
        #exclude idling records:
        
        #engine rpm > 2000
        new_df = sample_df[sample_df.iloc[:, 1] >= 2000]
        #oil pump rpm > 2000
        new_df = new_df[new_df.iloc[:,2] >= 2000]
        #set pressure > 20
        new_df = new_df[new_df.iloc[:,6] >= 20]
        
        if new_df.shape[0] == 0:
            continue
        
        label_dict = dict()
        label_dict['sample_file_name'] = file
        label_dict['label'] = getLabel(file, label_df)
        output_list.append(label_dict)
        
        new_df.to_csv(output_path + vehicle_type + '/'+file, index = False)
        
        
        #belows are to show traversing progress (DO NOT CHANGE)
        counter += 1
        if counter == counts_per_percent:
            counter = 0
            percentage_completion += 1
            print('traversing files under', path, ':', percentage_completion, "%", end="\r", flush=True)
    
    #export label into csv:
    new_label_df = pd.DataFrame(output_list)
    file_name = output_path+vehicle_type+'_label.csv'
    #if file exists, append the content to the end
    if os.path.exists(file_name):
        df = pd.read_csv(file_name, delimiter = ',', encoding = 'utf-8')
        df = df.append(new_label_df, ignore_index=True)
        df.to_csv(file_name, index = False)
    #otherwise create a file and save
    else:
        new_label_df.to_csv(file_name, index = False)

In [20]:
for vehicle_type in vehicle_types:
    TraverseFiles(train_path, vehicle_type, train_label_df, output_path)
    TraverseFiles(test_path, vehicle_type, test_label_df, output_path)
    

traversing files under ../../data/test/ZV252 : 153 %16 % % ../../data/test/ZV63d : 39 % %: 84 % ../../data/train/ZVfd4 : 68 %14 %% ../../data/train/ZV252 : 21 % ../../data/train/ZV252 : 30 % ../../data/train/ZV252 : 48 % % 14 % ../../data/test/ZV252 : 29 % ../../data/test/ZV252 : 41 % 62 % ../../data/test/ZV252 : 68 % ../../data/test/ZV252 : 121 %