In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import os
import numpy as np
import pywt
import pandas as pd
import pickle as pkl
from matplotlib import pyplot as plt

변수설정

In [4]:
DATA_POINTS_PER_FILE = 2560
TIME_PER_REC = 0.1
SAMPLING_FREQ = 25600 # 25.6 KHz
SAMPLING_PERIOD = 1.0/SAMPLING_FREQ

WIN_SIZE = 20
WAVELET_TYPE = 'morl'

VAL_SPLIT = 0.2

SEQ_LEN = 10 # sequence length

In [5]:
def load_df(pkz_file):  
    with open(pkz_file, 'rb') as f:
        df=pkl.load(f)
    return df

In [6]:
def df_row_ind_to_data_range(ind):  
    return (DATA_POINTS_PER_FILE*ind, DATA_POINTS_PER_FILE*(ind+1))

이미지 특징 추출

In [7]:
def extract_feature_image(ind, feature_name='horiz accel'):   
    data_range = df_row_ind_to_data_range(ind)
    data = df[feature_name].values[data_range[0]:data_range[1]]

    data = np.array([np.mean(data[i:i+WIN_SIZE]) for i in range(0, DATA_POINTS_PER_FILE, WIN_SIZE)])

    coef, _ = pywt.cwt(data, np.linspace(1,128,128), WAVELET_TYPE)

    coef = np.log2(coef**2+0.001)

    coef = (coef - coef.min())/(coef.max() - coef.min()) 
    return coef

#pkz파일 메인 디렉토리 설정

In [8]:
main_dir = '../data/Learning_set/'

#Bearing1_1

In [9]:
pkz_file = main_dir + 'Bearing1_1_noise.pkz'
df = load_df(pkz_file)
df.head()

Unnamed: 0,hour,minute,second,microsecond,horiz accel,vert accel
0,9,39,39,65664.0,0.625779,-0.185953
1,9,39,39,65703.0,0.458504,-0.538453
2,9,39,39,65742.0,0.049784,0.262267
3,9,39,39,65781.0,-0.312906,0.071737
4,9,39,39,65820.0,-0.686911,0.32344


In [12]:
no_of_rows = df.shape[0]
no_of_files = int(no_of_rows / DATA_POINTS_PER_FILE)
print(no_of_rows, no_of_files)

6545920 2557


In [13]:
data = {'x': [], 'y': []}
for i in range(0, no_of_files):
    coef_h = extract_feature_image(i, feature_name='horiz accel')
    coef_v = extract_feature_image(i, feature_name='vert accel')
    x_ = np.array([coef_h, coef_v])
    y_ = i/(no_of_files-1)
    data['x'].append(x_)
    data['y'].append(y_)
data['x']=np.array(data['x'])
data['y']=np.array(data['y'])

assert data['x'].shape==(no_of_files, 2, 128, 128)
print(no_of_files, data['x'].shape, data['y'].shape)

2557 (2557, 2, 128, 128) (2557,)


#pkz파일로 저장

In [14]:
out_file = main_dir+'bearing1_1_noise_all_data.pkz'
with open(out_file, 'wb') as f:
    pkl.dump(data, f)

#Bearing1_2

In [15]:
pkz_file = main_dir + 'Bearing1_2_noise.pkz'
df = load_df(pkz_file)
df.head()

Unnamed: 0,hour,minute,second,microsecond,horiz accel,vert accel
0,8,47,5,196910.0,-0.008994,-0.479514
1,8,47,5,196950.0,0.161053,-0.331896
2,8,47,5,196990.0,0.357321,0.492729
3,8,47,5,197030.0,0.214035,0.084905
4,8,47,5,197070.0,0.443329,0.095946


In [16]:
no_of_rows = df.shape[0]
no_of_files = int(no_of_rows / DATA_POINTS_PER_FILE)
print(no_of_rows, no_of_files)

2229760 871


In [17]:
data = {'x': [], 'y': []}
for i in range(0, no_of_files):
    coef_h = extract_feature_image(i, feature_name='horiz accel')
    coef_v = extract_feature_image(i, feature_name='vert accel')
    x_ = np.array([coef_h, coef_v])
    y_ = i/(no_of_files-1)
    data['x'].append(x_)
    data['y'].append(y_)
data['x']=np.array(data['x'])
data['y']=np.array(data['y'])

assert data['x'].shape==(no_of_files, 2, 128, 128)
print(no_of_files, data['x'].shape, data['y'].shape)

871 (871, 2, 128, 128) (871,)


In [18]:
out_file = main_dir+'bearing1_2_noise_all_data.pkz'
with open(out_file, 'wb') as f:
    pkl.dump(data, f)

# 간편화

In [21]:
main_dir = '../data/Full_Test_Set/'

In [22]:
def process_and_save_data(main_dir, pkz_file):
    df = load_df(pkz_file)
    
    no_of_rows = df.shape[0]
    no_of_files = int(no_of_rows / DATA_POINTS_PER_FILE)  # Define DATA_POINTS_PER_FILE
    print(no_of_rows, no_of_files)
    
    data = {'x': [], 'y': []}
    for i in range(0, no_of_files):
        coef_h = extract_feature_image(i, feature_name='horiz accel')
        coef_v = extract_feature_image(i, feature_name='vert accel')
        x_ = np.array([coef_h, coef_v])
        y_ = i / (no_of_files - 1)
        data['x'].append(x_)
        data['y'].append(y_)
    data['x'] = np.array(data['x'])
    data['y'] = np.array(data['y'])
    
    assert data['x'].shape == (no_of_files, 2, 128, 128)
    print(no_of_files, data['x'].shape, data['y'].shape)
    
    original_file_name = os.path.basename(pkz_file)
    out_file = os.path.join(main_dir, original_file_name+ '_all_data.pkz')
    with open(out_file, 'wb') as f:
        pkl.dump(data, f)
    print('Data processed and saved to', out_file)

In [23]:
process_and_save_data(main_dir, os.path.join(main_dir, 'Bearing1_3_noise.pkz'))
process_and_save_data(main_dir, os.path.join(main_dir, 'Bearing1_4_noise.pkz'))
process_and_save_data(main_dir, os.path.join(main_dir, 'Bearing1_5_noise.pkz'))
process_and_save_data(main_dir, os.path.join(main_dir, 'Bearing1_6_noise.pkz'))
process_and_save_data(main_dir, os.path.join(main_dir, 'Bearing1_7_noise.pkz'))

6080000 2375


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2375 (2375, 2, 128, 128) (2375,)
Data processed and saved to ../data/Full_Test_Set/Bearing1_3_noise.pkz_all_data.pkz
3655680 1428


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


1428 (1428, 2, 128, 128) (1428,)
Data processed and saved to ../data/Full_Test_Set/Bearing1_4_noise.pkz_all_data.pkz
6305280 2463


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2463 (2463, 2, 128, 128) (2463,)
Data processed and saved to ../data/Full_Test_Set/Bearing1_5_noise.pkz_all_data.pkz
6266880 2448


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2448 (2448, 2, 128, 128) (2448,)
Data processed and saved to ../data/Full_Test_Set/Bearing1_6_noise.pkz_all_data.pkz
5783040 2259


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2259 (2259, 2, 128, 128) (2259,)
Data processed and saved to ../data/Full_Test_Set/Bearing1_7_noise.pkz_all_data.pkz


#Bearing1_1noise

In [None]:
pkz_file = main_dir + 'bearing1_2.pkz'
df = load_df(pkz_file)
df.head()
no_of_rows = df.shape[0]
no_of_files = int(no_of_rows / DATA_POINTS_PER_FILE)
print(no_of_rows, no_of_files)
data = {'x': [], 'y': []}
for i in range(0, no_of_files):
    coef_h = extract_feature_image(i, feature_name='horiz accel')
    coef_v = extract_feature_image(i, feature_name='vert accel')
    x_ = np.array([coef_h, coef_v])
    y_ = i/(no_of_files-1)
    data['x'].append(x_)
    data['y'].append(y_)
data['x']=np.array(data['x'])
data['y']=np.array(data['y'])

assert data['x'].shape==(no_of_files, 2, 128, 128)
print(no_of_files, data['x'].shape, data['y'].shape)
out_file = main_dir+'bearing1_2_all_data.pkz'
with open(out_file, 'wb') as f:
    pkl.dump(data, f)

#Bearing2_1

In [18]:
pkz_file = main_dir + 'bearing2_1.pkz'
df = load_df(pkz_file)
df.head()

Unnamed: 0,hour,minute,second,microsecond,horiz accel,vert accel
0,8,14,15,884410.0,-0.391,0.011
1,8,14,15,884450.0,0.292,0.133
2,8,14,15,884490.0,0.596,0.024
3,8,14,15,884530.0,0.23,0.272
4,8,14,15,884570.0,-0.225,0.272


In [19]:
no_of_rows = df.shape[0]
no_of_files = int(no_of_rows / DATA_POINTS_PER_FILE)
print(no_of_rows, no_of_files)

2332160 911


In [20]:
data = {'x': [], 'y': []}
for i in range(0, no_of_files):
    coef_h = extract_feature_image(i, feature_name='horiz accel')
    coef_v = extract_feature_image(i, feature_name='vert accel')
    x_ = np.array([coef_h, coef_v])
    y_ = i/(no_of_files-1)
    data['x'].append(x_)
    data['y'].append(y_)
data['x']=np.array(data['x'])
data['y']=np.array(data['y'])

assert data['x'].shape==(no_of_files, 2, 128, 128)
print(no_of_files, data['x'].shape, data['y'].shape)

911 (911, 2, 128, 128) (911,)


In [21]:
out_file = main_dir+'bearing2_1_all_data.pkz'
with open(out_file, 'wb') as f:
    pkl.dump(data, f)

#Bearing2_2

In [22]:
pkz_file = main_dir + 'bearing2_2.pkz'
df = load_df(pkz_file)
df.head()

Unnamed: 0,hour,minute,second,microsecond,horiz accel,vert accel
0,7,40,33,540660.0,0.038,0.29
1,7,40,33,540700.0,0.125,-0.104
2,7,40,33,540740.0,0.035,-0.314
3,7,40,33,540780.0,-0.092,0.2
4,7,40,33,540820.0,0.033,0.211


In [23]:
no_of_rows = df.shape[0]
no_of_files = int(no_of_rows / DATA_POINTS_PER_FILE)
print(no_of_rows, no_of_files)

2040320 797


In [24]:
data = {'x': [], 'y': []}
for i in range(0, no_of_files):
    coef_h = extract_feature_image(i, feature_name='horiz accel')
    coef_v = extract_feature_image(i, feature_name='vert accel')
    x_ = np.array([coef_h, coef_v])
    y_ = i/(no_of_files-1)
    data['x'].append(x_)
    data['y'].append(y_)
data['x']=np.array(data['x'])
data['y']=np.array(data['y'])

assert data['x'].shape==(no_of_files, 2, 128, 128)
print(no_of_files, data['x'].shape, data['y'].shape)

797 (797, 2, 128, 128) (797,)


In [25]:
out_file = main_dir+'bearing2_2_all_data.pkz'
with open(out_file, 'wb') as f:
    pkl.dump(data, f)

#Bearing3_1

In [26]:
pkz_file = main_dir + 'bearing3_1.pkz'
df = load_df(pkz_file)
df.head()

Unnamed: 0,hour,minute,second,microsecond,horiz accel,vert accel
0,9,10,39,118790.0,0.338,-0.263
1,9,10,39,118830.0,0.278,0.285
2,9,10,39,118870.0,0.143,0.59
3,9,10,39,118910.0,0.09,-0.193
4,9,10,39,118940.0,0.035,-0.109


In [27]:
no_of_rows = df.shape[0]
no_of_files = int(no_of_rows / DATA_POINTS_PER_FILE)
print(no_of_rows, no_of_files)

1318400 515


In [28]:
data = {'x': [], 'y': []}
for i in range(0, no_of_files):
    coef_h = extract_feature_image(i, feature_name='horiz accel')
    coef_v = extract_feature_image(i, feature_name='vert accel')
    x_ = np.array([coef_h, coef_v])
    y_ = i/(no_of_files-1)
    data['x'].append(x_)
    data['y'].append(y_)
data['x']=np.array(data['x'])
data['y']=np.array(data['y'])

assert data['x'].shape==(no_of_files, 2, 128, 128)
print(no_of_files, data['x'].shape, data['y'].shape)

515 (515, 2, 128, 128) (515,)


In [29]:
out_file = main_dir+'bearing3_1_all_data.pkz'
with open(out_file, 'wb') as f:
    pkl.dump(data, f)

#Bearing3_2

In [30]:
pkz_file = main_dir + 'bearing3_2.pkz'
df = load_df(pkz_file)
df.head()

Unnamed: 0,hour,minute,second,microsecond,horiz accel,vert accel
0,8,34,41,978160.0,-0.291,0.181
1,8,34,41,978200.0,0.146,0.185
2,8,34,41,978240.0,0.404,-0.159
3,8,34,41,978280.0,0.191,-0.179
4,8,34,41,978320.0,-0.18,0.072


In [31]:
no_of_rows = df.shape[0]
no_of_files = int(no_of_rows / DATA_POINTS_PER_FILE)
print(no_of_rows, no_of_files)

4190720 1637


In [32]:
data = {'x': [], 'y': []}
for i in range(0, no_of_files):
    coef_h = extract_feature_image(i, feature_name='horiz accel')
    coef_v = extract_feature_image(i, feature_name='vert accel')
    x_ = np.array([coef_h, coef_v])
    y_ = i/(no_of_files-1)
    data['x'].append(x_)
    data['y'].append(y_)
data['x']=np.array(data['x'])
data['y']=np.array(data['y'])

assert data['x'].shape==(no_of_files, 2, 128, 128)
print(no_of_files, data['x'].shape, data['y'].shape)

1637 (1637, 2, 128, 128) (1637,)


In [33]:
out_file = main_dir+'bearing3_2_all_data.pkz'
with open(out_file, 'wb') as f:
    pkl.dump(data, f)