In [1]:
import os
import sys
import pandas as pd
import numpy as np
from src.datafolders import *

In [2]:
def normalize(a, min_a = None, max_a = None):
	if min_a is None: min_a, max_a = np.min(a, axis = 0), np.max(a, axis = 0)
	return (a - min_a) / (max_a - min_a + 0.0001), min_a, max_a


In [3]:
def load_data(dataset, idx):
	folder = os.path.join(output_folder, dataset, f'partition_{idx}')
	os.makedirs(folder, exist_ok=True)

	dataset_folder = 'data/EPS'
	file = os.path.join(dataset_folder, f'eps{idx}.csv')
	df = pd.read_csv(file, header=0)

	lbl = df['ANOMALY']
	df = df.drop(['ANOMALY'], axis=1)

	split = int(len(df) * 0.7) # 70% train, 30% test

	df_train = df.iloc[:split]
	df_test = df.iloc[split:]
	lbl = lbl.iloc[split:]

	print(lbl.value_counts())
	# 0 - Normal, 1 - Anomaly

	train, min_a, max_a = normalize(df_train.values)
	test, _, _ = normalize(df_test.values, min_a, max_a)

	labels = np.zeros_like(test)
   
	# Convert one column to 3 columns of anomaly labels to match the test shape
	for i in range(len(lbl)):
		if lbl.iloc[i] == 1:
			labels[i, :] = 1
		else:
			labels[i, :] = 0

	# First COLUMN is the Speed, Second COLUMN is the Angle, Third COLUMN is the Torque
	print("Train Shape: " + str(train.shape), "Test Shape: " + str(test.shape), "Labels Shape: " + str(labels.shape))

	for file in ['train', 'test', 'labels']:
		np.save(os.path.join(folder, f'{file}'), eval(file)) # Save as NPY file for faster loading

	return pd.DataFrame(train), pd.DataFrame(test), pd.DataFrame(labels)


In [4]:
train1, test1, labels1 = load_data('EPS', 1)
train2, test2, labels2 = load_data('EPS', 2)
train3, test3, labels3 = load_data('EPS', 3)
train4, test4, labels4 = load_data('EPS', 4)

0    37655
1      857
Name: ANOMALY, dtype: int64
Train Shape: (89861, 3) Test Shape: (38512, 3) Labels Shape: (38512, 3)
0    32532
1     1389
Name: ANOMALY, dtype: int64
Train Shape: (79148, 3) Test Shape: (33921, 3) Labels Shape: (33921, 3)
0    36322
1      973
Name: ANOMALY, dtype: int64
Train Shape: (87019, 3) Test Shape: (37295, 3) Labels Shape: (37295, 3)
0    27492
1     1428
Name: ANOMALY, dtype: int64
Train Shape: (67480, 3) Test Shape: (28920, 3) Labels Shape: (28920, 3)


In [5]:
# Concatenate all the dataframes
train = pd.concat([train1, train2, train3, train4])
test = pd.concat([test1, test2, test3, test4])
labels = pd.concat([labels1, labels2, labels3, labels4])

# Save the dataframes
np.save(os.path.join(output_folder, 'EPS', 'train'), train)
np.save(os.path.join(output_folder, 'EPS', 'test'), test)
np.save(os.path.join(output_folder, 'EPS', 'labels'), labels)

In [6]:
train.shape, test.shape, labels.shape

((323508, 3), (138648, 3), (138648, 3))

In [7]:
df1 = pd.read_csv('data/EPS/eps1.csv', header=0)

df2 = pd.read_csv('data/EPS/eps2.csv', header=0)

df3 = pd.read_csv('data/EPS/eps3.csv', header=0)

df4 = pd.read_csv('data/EPS/eps4.csv', header=0)

df = pd.concat([df1, df2, df3, df4])
df.to_csv('data/EPS/eps.csv', index=False)