In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import gc

import time
from datetime import datetime
from datetime import date

from pathlib import Path

In [2]:
# Get all files from all complete_results folders
def get_file_list(dirName):
    # create a list of file and sub directories
    # names in the given directory
    listOfFile = os.listdir(dirName)
    allFiles = list()
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirName, entry)
        # If entry is a directory then get the list of files in this directory
        if os.path.isdir(fullPath):
            allFiles = allFiles + get_file_list(fullPath)
        else:
            allFiles.append(fullPath)

    return allFiles

In [3]:
data_path = 'E:/source/repos/robot_dataset'
dirName = data_path + '/complete_results'

total_samples = 2045
normal_samples = 0
extra_assembly_samples = 0
damaged_screw_samples = 0
missing_screw_samples = 0
damaged_thread_hole_samples = 0

# Get the list of all files in directory tree at given path
files_list = get_file_list(dirName)

# Print the files
for elem in files_list:
    print(elem)

E:/source/repos/robot_dataset/complete_results\horizontal_normal\1_2020-11-10_13-01-52.csv
E:/source/repos/robot_dataset/complete_results\spiral_damaged_screw_thread\1_2020-11-11_14-44-31.csv
E:/source/repos/robot_dataset/complete_results\spiral_damaged_screw_thread\1_2020-11-13_13-59-46.csv
E:/source/repos/robot_dataset/complete_results\spiral_damaged_screw_thread\2_2020-11-11_14-57-07.csv
E:/source/repos/robot_dataset/complete_results\spiral_damaged_screw_thread\2_2020-11-13_14-03-46.csv
E:/source/repos/robot_dataset/complete_results\spiral_damaged_screw_thread\3_2020-11-12_15-21-13.csv
E:/source/repos/robot_dataset/complete_results\spiral_damaged_screw_thread\3_2020-11-13_14-10-38.csv
E:/source/repos/robot_dataset/complete_results\spiral_damaged_screw_thread\4_2020-11-12_16-05-57.csv
E:/source/repos/robot_dataset/complete_results\spiral_damaged_screw_thread\4_2020-11-13_14-22-59.csv
E:/source/repos/robot_dataset/complete_results\spiral_damaged_screw_thread\5_2020-11-13_11-06-26.csv


In [4]:
def label_samples(complete_data, mode):
    # Change the unscrewing samples(odd numbers) to normal or anomaly
    if mode == 'partial':
        complete_data.loc[complete_data['sample_nr'] % 2 != 0, ['label']]  = 0
    elif mode == 'relabel_full':
        complete_data['label_shifted'] = complete_data['label'].shift(-1)
        complete_data['label'] = np.where(complete_data['label'] < complete_data['label_shifted'],
                                          complete_data['label_shifted'],
                                          complete_data['label'])
        complete_data = complete_data.drop(['label_shifted'], axis=1)
        complete_data.fillna(0)
    elif mode == 'full':
        complete_data['sample_shifted'] = complete_data['sample_nr'] - 1
        complete_data['sample_nr'] = np.where(complete_data['sample_nr'] % 2 == 0,
                                          complete_data['sample_shifted'],
                                          complete_data['sample_nr'])
        complete_data = complete_data.drop(['sample_shifted'], axis=1)

        # Renumber samples
        complete_data['sample_nr'] = (complete_data['sample_nr'] !=
                                      complete_data['sample_nr'].shift(1)).astype(int).cumsum()

    return complete_data

In [5]:
def smooth_samples(complete_data):
    # Smooth all the labels in a sample
    complete_data.groupby(['sample_nr'])['label'].transform(
        lambda x: np.round(x.mean(), decimals=0)
    )

    return complete_data

In [6]:
def load_data():
    dataframe_list = []

    for file in files_list:
        # Reading the file content to create a DataFrame
        dfn = pd.read_csv(file, sep=',')
        dataframe_list.append(dfn)

    # Concat those dataframes
    complete_data = pd.concat(dataframe_list, ignore_index=True)
    del dataframe_list
    gc.collect()

    return complete_data

In [7]:
def transform_data(complete_data, mode='full'):
    # Number samples
    complete_data['sample_nr'] = (complete_data['output_int_register_25'] !=
                                  complete_data['output_int_register_25'].shift(1)).astype(int).cumsum()

    complete_data = smooth_samples(complete_data=complete_data)
    complete_data = label_samples(complete_data=complete_data,
                                  mode=mode)

    return complete_data

In [10]:
# Read all files into pandas
complete_data = load_data()
print(complete_data.dtypes)

timestamp                 float64
target_q_0                float64
target_q_1                float64
target_q_2                float64
target_q_3                float64
                           ...   
output_bit_register_70       bool
output_bit_register_71       bool
output_bit_register_72       bool
label                       int64
sample_nr                   int64
Length: 134, dtype: object


In [12]:
# Change dtypes to smaller
for column in list(complete_data):
    if complete_data[column].dtype == 'int64' or complete_data[column].dtype == 'int32':
        complete_data[column] = complete_data[column].astype('int8')
    if complete_data[column].dtype == 'float64':
        complete_data[column] = complete_data[column].astype('float32')

print(complete_data.dtypes)

timestamp
float32
target_q_0
float64
target_q_1
float64
target_q_2
float64
target_q_3
float64
target_q_4
float64
target_q_5
float64
target_qd_0
float64
target_qd_1
float64
target_qd_2
float64
target_qd_3
float64
target_qd_4
float64
target_qd_5
float64
target_qdd_0
float64
target_qdd_1
float64
target_qdd_2
float64
target_qdd_3
float64
target_qdd_4
float64
target_qdd_5
float64
target_current_0
float64
target_current_1
float64
target_current_2
float64
target_current_3
float64
target_current_4
float64
target_current_5
float64
target_moment_0
float64
target_moment_1
float64
target_moment_2
float64
target_moment_3
float64
target_moment_4
float64
target_moment_5
float64
actual_q_0
float64
actual_q_1
float64
actual_q_2
float64
actual_q_3
float64
actual_q_4
float64
actual_q_5
float64
actual_qd_0
float64
actual_qd_1
float64
actual_qd_2
float64
actual_qd_3
float64
actual_qd_4
float64
actual_qd_5
float64
actual_current_0
float64
actual_current_1
float64
actual_current_2
float64
actual_current_3
fl

In [13]:
complete_data = transform_data(complete_data=complete_data,
                               mode='full')

In [14]:
# Data statistics
# Number of total samples
print('There are {n_samples} samples in total.'.format(n_samples=complete_data['sample_nr'].max()/2))

# Count the different types of labels
grouped_df = complete_data.groupby(['sample_nr']).mean()
grouped_df['label'] = np.round(grouped_df['label'], decimals=0)

(unique, counts) = np.unique(grouped_df['label'], return_counts=True)
count_dict = {A: B for A, B in zip(unique, np.round((counts / len(grouped_df.index)) * 100, decimals=2))}
# count_dict = {A: B for A, B in zip(unique, counts)}

print('The types and counts of different labels as percentage of total data: \n {count_dict}'.format(count_dict=count_dict))

There are 1022.5 samples in total.
The types and counts of different labels as percentage of total data: 
 {0.0: 67.73, 1.0: 13.55, 2.0: 8.61, 3.0: 10.07, 4.0: 0.05}


In [15]:
%matplotlib qt

plot_data = complete_data[:500000]

plt.plot(plot_data['label'] * 10)
plt.plot(plot_data['sample_nr'])
plt.plot(plot_data['output_int_register_25'])

plt.show()