In [106]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [1]:
import os
import datetime
import numpy as np
import pywt
import pandas as pd
import pickle as pkl
from matplotlib import pyplot as plt
import platform
import glob
import re

#변수 설정

In [2]:
DATA_POINTS_PER_FILE = 2560
TIME_PER_REC = 0.1
SAMPLING_FREQ = 25600 # 25.6 KHz
SAMPLING_PERIOD = 1.0/SAMPLING_FREQ

WIN_SIZE = 20
WAVELET_TYPE = 'morl'

#CWT 변환
    (1). pkz파일 불러오기
    (2). 데이터 길이 맞춰주기
    (3). 연속 웨이블릿 변환(CWT) 실행

In [3]:

def load_df(pkz_file):
    with open(pkz_file, 'rb') as f:
        df=pkl.load(f)
    return df

In [4]:
def df_row_ind_to_data_range(ind):
    return (DATA_POINTS_PER_FILE*ind, DATA_POINTS_PER_FILE*(ind+1))

In [5]:

def extract_feature_image(ind, feature_name='horiz accel'):
    data_range = df_row_ind_to_data_range(ind)
    data = df[feature_name].values[data_range[0]:data_range[1]]

    data = np.array([np.mean(data[i:i+WIN_SIZE]) for i in range(0, DATA_POINTS_PER_FILE, WIN_SIZE)])  

    coef, _ = pywt.cwt(data, np.linspace(1,128,128), WAVELET_TYPE)  

    coef = np.log2(coef**2+0.001) 

    coef = (coef - coef.min())/(coef.max() - coef.min()) 
    return coef

## 메인 디렉토리 경로 설정

In [23]:
## root project에서 model 경로에서 실행한다고 가정되어 있음 

def get_root_project_dir():
    """Get the root project directory."""
    if platform.system() == 'Windows':
        return os.path.normpath(os.path.join(os.getcwd(), '..')) 
    else:
        return os.path.normpath(os.path.join(os.getcwd(), '..'))

main_dir = get_root_project_dir()
setting = '/data/Learning_set/' ## /Test_set/ /Learning_set/  중 하나 사용


In [24]:
dir = main_dir + setting
dir

'/home/i4624/vscode/gitclone/org/learning_infer/data/Learning_set/'

## Bearing Loading 

In [8]:
def bearing_load(dir, pkz_file):
    path = dir+'/'+pkz_file
    df=load_df(path)
    df.head()
    no_of_rows = df.shape[0]
    no_of_files = int(no_of_rows / DATA_POINTS_PER_FILE)
    print(no_of_rows, no_of_files)
    return df, no_of_rows, no_of_files

In [19]:
def get_file_list(pattern, directory):
    file_list = glob.glob(f'{directory}/{pattern}')
    filtered_file_list = [os.path.basename(file) for file in file_list if re.search(r'Bearing\d+_\d+_noise\.pkz$', file)]
    #### 위의 패턴과 동일하게 적어야 함 
    return filtered_file_list

In [25]:
## 패턴은 피클즈 파일 이름 앞 부분이 서로 일치하게 되어야 합니다.
pattern = 'Bearing*_*_*.pkz'
file_list = get_file_list(pattern, dir)
file_list = sorted(file_list)
print(file_list)

['Bearing1_1_noise.pkz', 'Bearing1_2_noise.pkz']


In [21]:
dataframes_info = {}  # Dictionary to store dataframe information

for file in file_list:
    df, no_of_rows, no_of_files = bearing_load(dir, file)
    filename = os.path.basename(file)  # Extract filename without path
    dataframes_info[filename] = (df, no_of_rows, no_of_files)
    print(f"Processed {filename}: Rows={no_of_rows}, Files={no_of_files}")


6080000 2375
Processed Bearing1_3_noise.pkz: Rows=6080000, Files=2375
3655680 1428
Processed Bearing1_4_noise.pkz: Rows=3655680, Files=1428
6305280 2463
Processed Bearing1_5_noise.pkz: Rows=6305280, Files=2463
6266880 2448
Processed Bearing1_6_noise.pkz: Rows=6266880, Files=2448
5783040 2259
Processed Bearing1_7_noise.pkz: Rows=5783040, Files=2259


## Plotting 1D vibration signals(그래프로 그리기) 

In [53]:
def plot_acceleration(df, filename, no_of_rows):
    # Create a plot for horizontal acceleration
    plt.plot(range(no_of_rows), df['horiz accel'])
    plt.title(f'Horizontal Acceleration - {filename}')
    plt.xlabel('Time')
    plt.ylabel('Acceleration')
    plt.show()

    # Create a plot for vertical acceleration
    plt.plot(range(no_of_rows), df['vert accel'], 'r')
    plt.title(f'Vertical Acceleration - {filename}')
    plt.xlabel('Time')
    plt.ylabel('Acceleration')
    plt.show()

In [None]:
# Now you can access dataframe information using dataframes_info dictionary
for filename, (df, no_of_rows, no_of_files) in dataframes_info.items():
    print(f"Filename: {filename}, Rows: {no_of_rows}, Files: {no_of_files}")
    print(df.head())
    # Call the plot function for the current dataframe
    plot_acceleration(df, filename, no_of_rows)

**(1)신호처리 - 시간, 주파수 영역 특성 이미지 추출**

In [42]:
def plot_feature_images(ind, no_of_samples=5, no_of_files=None):
    fig, ax = plt.subplots(2, no_of_samples, figsize=[20, 8])
    ax[0, 0].set_ylabel('horiz accel features image')
    ax[1, 0].set_ylabel('vert accel features image')

    for i, p in enumerate(np.linspace(0, 1, no_of_samples)):
        current_ind = int((no_of_files - 1) * p)

        for j, feature_name in enumerate(['horiz accel', 'vert accel']):
            coef = extract_feature_image(ind[current_ind], feature_name=feature_name)
            ax[j, i].set_title('{0:.2f}'.format(p))
            im = ax[j, i].imshow(coef, cmap='coolwarm')
            fig.colorbar(im, ax=ax[j, i], fraction=0.046, pad=0.04)

    plt.tight_layout()
    plt.show()



In [None]:
# Loop through the dataframes_info dictionary
for filename, (df, no_of_rows, no_of_files) in dataframes_info.items():
    print(f"Filename: {filename}, Rows: {no_of_rows}, Files: {no_of_files}")
    print(df.head())
    
    # Create an array of indices
    indices = np.arange(no_of_files)
    
    # Call the feature function for the current dataframe
    plot_feature_images(indices, no_of_samples=5, no_of_files=no_of_files)


##timestamp 저장

In [26]:
for filename in file_list:  # Assuming file_list contains your list of filenames
    # Load the dataframe from the file
    df = load_df(os.path.join(dir, filename))
    no_of_files = int(df.shape[0] / DATA_POINTS_PER_FILE)
    
    data = {'timestamps': [], 'x': []}  # Initialize data inside the loop

    for i in range(0, no_of_files):
        coef_h = extract_feature_image(i,  feature_name='horiz accel')
        coef_v = extract_feature_image(i,  feature_name='vert accel')
        x_ = np.array([coef_h, coef_v])
        data['x'].append(x_)

        # Create a datetime object with only time information
        idx = i * DATA_POINTS_PER_FILE
        timestamp = datetime.datetime.min.time().replace(hour=df.iloc[idx, 0], minute=df.iloc[idx, 1], second=df.iloc[idx, 2])
        data['timestamps'].append(timestamp)

    data['x'] = np.array(data['x'])

    assert data['x'].shape == (no_of_files, 2, 128, 128)
    print(f"{filename}: {no_of_files} files processed, x_ shape: {data['x'].shape}")

    # Generate the output filename with "_timestamp" suffix
    base_filename = os.path.splitext(filename)[0]
    out_filename = base_filename + '_test_data_timestamp.pkz'
    out_path = os.path.join(dir, out_filename)

    # Save the processed data with timestamp
    with open(out_path, 'wb') as f:
        pkl.dump(data, f)

    print(f"Processed data saved to {out_path}")


Bearing1_1_noise.pkz: 2557 files processed, x_ shape: (2557, 2, 128, 128)
Processed data saved to /home/i4624/vscode/gitclone/org/learning_infer/data/Learning_set/Bearing1_1_noise_test_data_timestamp.pkz
Bearing1_2_noise.pkz: 871 files processed, x_ shape: (871, 2, 128, 128)
Processed data saved to /home/i4624/vscode/gitclone/org/learning_infer/data/Learning_set/Bearing1_2_noise_test_data_timestamp.pkz


In [None]:
dir

'/home/i4624/vscode/gitclone/org/learning_infer/data/Test_set/'