In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

import threading

### Modify the parameters before running
---


In [None]:

window_size = 21
flattened_size = 777
normal_feature_path = './data/features_norm_scaled.csv'
anomaly_feature_path = './data/features_anom_scaled.csv'


---

In [None]:
data_final = pd.read_csv('sensor_scaled_cleaned.csv', index_col='index')

# split out normal data for feature generation
data_final_norm = data_final[data_final['machine_status'] == 'NORMAL']

l = data_final_norm.shape[0]
thread_size = l/8

# supplemental function to check if indices in a window are consecutive
def checkConsecutive(seq):
    n = len(seq) - 1
    return (sum(np.diff(seq) == 1) >= n)

# define container for 8 threads
normal_sample_0 = np.empty((0, flattened_size))
normal_sample_1 = np.empty((0, flattened_size))
normal_sample_2 = np.empty((0, flattened_size))
normal_sample_3 = np.empty((0, flattened_size))
normal_sample_4 = np.empty((0, flattened_size))
normal_sample_5 = np.empty((0, flattened_size))
normal_sample_6 = np.empty((0, flattened_size))
normal_sample_7 = np.empty((0, flattened_size))

In [None]:
# define functions that will be run on each threads (sketchy but works)
def construct_features_0():
    global normal_sample_0
    for idx in range(0, thread_size):
        temp = data_final_norm.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            normal_sample_0 = np.append(normal_sample_0, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)
        
def construct_features_1():
    global normal_sample_1
    for idx in tqdm(range(thread_size+1, thread_size*2)):
        temp = data_final_norm.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            normal_sample_1 = np.append(normal_sample_1, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)
        
def construct_features_2():
    global normal_sample_2
    for idx in range(thread_size*2+1, thread_size*3):
        temp = data_final_norm.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            normal_sample_2 = np.append(normal_sample_2, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)
        
def construct_features_3():
    global normal_sample_3
    for idx in range(thread_size*3+1, thread_size*4):
        temp = data_final_norm.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            normal_sample_3 = np.append(normal_sample_3, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)
        
def construct_features_4():
    global normal_sample_4
    for idx in range(thread_size*4+1, thread_size*5):
        temp = data_final_norm.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            normal_sample_4 = np.append(normal_sample_4, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)
        
def construct_features_5():
    global normal_sample_5
    for idx in range(thread_size*5+1, thread_size*6):
        temp = data_final_norm.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            normal_sample_5 = np.append(normal_sample_5, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)
        
def construct_features_6():
    global normal_sample_6
    for idx in range(thread_size*6+1, thread_size*7):
        temp = data_final_norm.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            normal_sample_6 = np.append(normal_sample_6, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)
        
def construct_features_7():
    global normal_sample_7
    for idx in range(thread_size*7+1, thread_size*8):
        temp = data_final_norm.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            normal_sample_7 = np.append(normal_sample_7, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)
        


In [None]:
# multithreading to the moon
import threading

threads = []

# construct threads
t1 = threading.Thread(target=construct_features_0)
t2 = threading.Thread(target=construct_features_1)
t3 = threading.Thread(target=construct_features_2)
t4 = threading.Thread(target=construct_features_3)
t5 = threading.Thread(target=construct_features_4)
t6 = threading.Thread(target=construct_features_5)
t7 = threading.Thread(target=construct_features_6)
t8 = threading.Thread(target=construct_features_7)

# add threads to thread array
threads.append(t1)
threads.append(t2)
threads.append(t3)
threads.append(t4)
threads.append(t5)
threads.append(t6)
threads.append(t7)
threads.append(t8)

# start all threads
for thread in threads:
    thread.start()
for thread in threads:
    thread.join()

In [None]:
# write to a file
with open(normal_feature_path, 'ab') as f:
    np.savetxt(f, normal_sample_0, delimiter=',', newline='\n')
    np.savetxt(f, normal_sample_1, delimiter=',', newline='\n')
    np.savetxt(f, normal_sample_2, delimiter=',', newline='\n')
    np.savetxt(f, normal_sample_3, delimiter=',', newline='\n')
    np.savetxt(f, normal_sample_4, delimiter=',', newline='\n')
    np.savetxt(f, normal_sample_5, delimiter=',', newline='\n')
    np.savetxt(f, normal_sample_6, delimiter=',', newline='\n')
    np.savetxt(f, normal_sample_7, delimiter=',', newline='\n')

In [None]:
# indices for all broken status
broken_idx = [17155, 24510, 69318, 77790, 128040, 141131, 166440]

anom_sample_0 = np.empty((0, flattened_size))
anom_sample_1 = np.empty((0, flattened_size))
anom_sample_2 = np.empty((0, flattened_size))
anom_sample_3 = np.empty((0, flattened_size))
anom_sample_4 = np.empty((0, flattened_size))
anom_sample_5 = np.empty((0, flattened_size))
anom_sample_6 = np.empty((0, flattened_size))

In [None]:
# thought multithreading was needed but apparently not
def construct_features_anom_0():
    global anom_sample_0
    global broken_idx
    
    temp_anom = data_final.loc[lambda df: df.index >= broken_idx[0] - window_size + 1, :]
    idx = 0
    
    while temp_anom.iloc[idx+window_size-1]['machine_status'] != 'NORMAL':
        temp = temp_anom.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            anom_sample_0 = np.append(anom_sample_0, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)  
        idx = idx + 1
        
        
def construct_features_anom_1():
    global anom_sample_1
    global broken_idx
    
    temp_anom = data_final.loc[lambda df: df.index >= broken_idx[1] - window_size + 1, :]
    idx = 0
    
    while temp_anom.iloc[idx+window_size-1]['machine_status'] != 'NORMAL':
        temp = temp_anom.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            anom_sample_1 = np.append(anom_sample_1, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)  
        idx = idx + 1
        
        
def construct_features_anom_2():
    global anom_sample_2
    global broken_idx
    
    temp_anom = data_final.loc[lambda df: df.index >= broken_idx[2] - window_size + 1, :]
    idx = 0
    
    while temp_anom.iloc[idx+window_size-1]['machine_status'] != 'NORMAL':
        temp = temp_anom.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            anom_sample_2 = np.append(anom_sample_2, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)  
        idx = idx + 1
        
        
def construct_features_anom_3():
    global anom_sample_3
    global broken_idx
    
    temp_anom = data_final.loc[lambda df: df.index >= broken_idx[3] - window_size + 1, :]
    idx = 0
    
    while temp_anom.iloc[idx+window_size-1]['machine_status'] != 'NORMAL':
        temp = temp_anom.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            anom_sample_3 = np.append(anom_sample_3, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)  
        idx = idx + 1
        
        
def construct_features_anom_4():
    global anom_sample_4
    global broken_idx
    
    temp_anom = data_final.loc[lambda df: df.index >= broken_idx[4] - window_size + 1, :]
    idx = 0
    
    while temp_anom.iloc[idx+window_size-1]['machine_status'] != 'NORMAL':
        temp = temp_anom.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            anom_sample_4 = np.append(anom_sample_4, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)  
        idx = idx + 1
        
        
def construct_features_anom_5():
    global anom_sample_5
    global broken_idx
    
    temp_anom = data_final.loc[lambda df: df.index >= broken_idx[5] - window_size + 1, :]
    idx = 0
    
    while temp_anom.iloc[idx+window_size-1]['machine_status'] != 'NORMAL':
        temp = temp_anom.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            anom_sample_5 = np.append(anom_sample_5, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)  
        idx = idx + 1
        
        
def construct_features_anom_6():
    global anom_sample_6
    global broken_idx
    
    temp_anom = data_final.loc[lambda df: df.index >= broken_idx[6] - window_size + 1, :]
    idx = 0
    
    while temp_anom.iloc[idx+window_size-1]['machine_status'] != 'NORMAL':
        temp = temp_anom.iloc[idx: idx + window_size]
        if checkConsecutive(temp.index.to_numpy()):
            anom_sample_6 = np.append(anom_sample_6, temp.drop(['timestamp', 'machine_status'], axis=1).to_numpy().reshape(1, flattened_size), axis=0)  
        idx = idx + 1
    
    

In [None]:
construct_features_anom_0()
construct_features_anom_1()
construct_features_anom_2()
construct_features_anom_3()
construct_features_anom_4()
construct_features_anom_5()
construct_features_anom_6()

In [None]:
with open(anomaly_feature_path, 'ab') as f:
    np.savetxt(f, anom_sample_0, delimiter=',', newline='\n')
    np.savetxt(f, anom_sample_1, delimiter=',', newline='\n')
    np.savetxt(f, anom_sample_2, delimiter=',', newline='\n')
    np.savetxt(f, anom_sample_3, delimiter=',', newline='\n')
    np.savetxt(f, anom_sample_4, delimiter=',', newline='\n')
    np.savetxt(f, anom_sample_5, delimiter=',', newline='\n')
    np.savetxt(f, anom_sample_6, delimiter=',', newline='\n')