In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from scipy.signal import savgol_filter

In [2]:
columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8',
           's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']

feature_columns = ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',
                   's15', 's16', 's17', 's18', 's19', 's20', 's21']

deleted_engine = [1000]

sequence_length = 30

# FD002

### 读取数据、计算RUL、标准化

In [3]:
# read train_FD002
data = pd.read_csv("C-MAPSS-Data\\train_FD00" + '2' + ".txt", delimiter="\s+", header=None)
data.columns = columns
engine_size = max(data['id'])

In [4]:
# Calculate rul
rul = pd.DataFrame(data.groupby('id')['cycle'].max()).reset_index()
rul.columns = ['id', 'max']
print(rul)
data = data.merge(rul, on=['id'], how='left')
data['RUL'] = data['max'] - data['cycle']
data.drop(['max'], axis=1, inplace=True)

      id  max
0      1  149
1      2  269
2      3  206
3      4  235
4      5  154
..   ...  ...
255  256  163
256  257  309
257  258  143
258  259  205
259  260  316

[260 rows x 2 columns]


In [5]:
# Normalize columns other than 'id', 'cycle', 'RUL'
std = StandardScaler()
data['cycle_norm'] = data['cycle']
cols_normalize = data.columns.difference(['id', 'cycle', 'RUL'])
norm_data = pd.DataFrame(std.fit_transform(data[cols_normalize]), columns=cols_normalize, index=data.index)
join_data = data[data.columns.difference(cols_normalize)].join(norm_data)
train_data = join_data.reindex(columns=data.columns)

In [6]:
# Read the test dataset by the RUL_FD001.txt file.
test_data = pd.read_csv("C-MAPSS-Data\\test_FD00" + '2' + ".txt", delimiter="\s+", header=None)
test_data.columns = columns
truth_data = pd.read_csv("C-MAPSS-Data\\RUL_FD00" + '2' + ".txt", delimiter="\s+", header=None)
truth_data.columns = ['truth']
truth_data['id'] = truth_data.index + 1

In [7]:
test_rul = pd.DataFrame(test_data.groupby('id')['cycle'].max()).reset_index()
test_rul.columns = ['id', 'elapsed']
test_rul = test_rul.merge(truth_data, on=['id'], how='left')
test_rul['max'] = test_rul['elapsed'] + test_rul['truth']

In [8]:
test_data = test_data.merge(test_rul, on=['id'], how='left')
test_data['RUL'] = test_data['max'] - test_data['cycle']
test_data.drop(['max'], axis=1, inplace=True)

In [9]:
test_data['cycle_norm'] = test_data['cycle']
norm_test_data = pd.DataFrame(std.transform(test_data[cols_normalize]), columns=cols_normalize,
                              index=test_data.index)
join_test_data = test_data[test_data.columns.difference(cols_normalize)].join(norm_test_data)
test_data = join_test_data.reindex(columns=test_data.columns)

### 数据切片

In [10]:
train_feature_list = []
for i in range(1, engine_size + 1):
    if i in deleted_engine:
        continue
    selected_feature_data = train_data[train_data['id'] == i][feature_columns].values
    # print(selected_feature_data.shape)
    for j in range(0, selected_feature_data.shape[0] - sequence_length + 1):
        train_feature_list.append(selected_feature_data[j:j + sequence_length, :])
train_feature_array = np.array(train_feature_list).astype(np.float32)
train_feature_array.shape

(46219, 30, 21)

In [11]:
train_label_list = []
for i in range(1, engine_size + 1):
    if i in deleted_engine:
        continue
    selected_label_data = train_data[train_data['id'] == i]['RUL'].values
    # print(selected_label_data.shape)
    for j in range(0, selected_label_data.shape[0] - sequence_length + 1):
        train_label_list.append(selected_label_data[j + sequence_length - 1])
train_lable_array = np.array(train_label_list).astype(np.float32).reshape(-1, 1)
train_lable_array.shape

(46219, 1)

In [12]:
test_feature_list = []
for i in range(1, engine_size + 1):
    if i in deleted_engine:
        continue
    selected_feature_data = test_data[test_data['id'] == i][feature_columns].values
    # print(selected_feature_data.shape)
    for j in range(0, selected_feature_data.shape[0] - sequence_length + 1):
        test_feature_list.append(selected_feature_data[j:j + sequence_length, :])
test_feature_array = np.array(test_feature_list).astype(np.float32)
test_feature_array.shape

(26505, 30, 21)

In [13]:
test_label_list = []
for i in range(1, engine_size + 1):
    if i in deleted_engine:
        continue
    selected_label_data = test_data[test_data['id'] == i]['RUL'].values
    # print(selected_label_data.shape)
    for j in range(0, selected_label_data.shape[0] - sequence_length + 1):
        test_label_list.append(selected_label_data[j + sequence_length - 1])
test_lable_array = np.array(test_label_list).astype(np.float32).reshape(-1, 1)
test_lable_array.shape

(26505, 1)

### 数据保存

In [14]:
np.save('./data_processed/train_feature.npy', train_feature_array)
np.save('./data_processed/train_label.npy', train_lable_array)
np.save('./data_processed/test_feature.npy', test_feature_array)
np.save('./data_processed/test_label.npy', test_lable_array)

# 画图

In [None]:
plt.figure(figsize=(11, 7))
plt.subplot(221)
sns.lineplot(x="cycle", y="s2", data=train_data, color='#0099DD', label='Sensor_2')
sns.lineplot(x="cycle", y="s3", data=train_data, color='#FF9933', label='Sensor_3')
plt.xlabel('Cycle')
plt.ylabel('Feature Normalized Value')
plt.legend(loc='upper right')
plt.grid()
plt.subplot(222)
sns.lineplot(x="cycle", y="s7", data=train_data, color= '#0099DD', label='Sensor_7')
sns.lineplot(x="cycle", y="s21", data=train_data, color='#FF9933', label='Sensor_21')
plt.xlabel('Cycle')
plt.ylabel('Feature Normalized Value')
plt.legend(loc='upper right')
plt.grid()
plt.subplot(223)
sns.lineplot(x="cycle", y="s6", data=train_data, color='#0099DD', label='Sensor_6')
plt.xlabel('Cycle')
plt.ylabel('Feature Normalized Value')
plt.legend(loc='upper right')
plt.grid()
plt.subplot(224)
sns.lineplot(x="cycle", y="s1", data=train_data, color='#0099DD', label='Sensor_1')
sns.lineplot(x="cycle", y="s5", data=train_data, color='#FF9933', label='Sensor_5')
plt.xlabel('Cycle')
plt.ylabel('Feature Normalized Value')
plt.legend(loc='upper right')
plt.grid()
plt.tight_layout()
# plt.savefig("D:/workspace/paper/1/figs/features.png",dpi=600)

In [None]:
fig = plt.figure(figsize=(5, 3))
x = [i for i in range(479-192)]
ax1 = fig.add_subplot(111)

rul = []
feature = train_data.loc[192:478, 's7'].values
for d in train_data.loc[192:478, 'RUL']:
    if d > 115:
        rul.append(115)
    else:
        rul.append(d)

_rul_, = ax1.plot(x,rul,color='#0099DD', label='RUL')
ax1.set_ylabel('Cycle Life')
ax1.set_xlabel('Cycle')
plt.grid()
ax2 = ax1.twinx()
_feature_, = ax2.plot(x, feature,color='#FF9933', label='Sensor_7')
ax2.set_ylabel('Feature value')

lns = [_rul_,_feature_]
labels = [l.get_label() for l in lns]
plt.legend(lns,labels)

# plt.savefig("D:/workspace/paper/1/figs/RUL_feature.png",dpi=600)

In [None]:
plt.figure(figsize=(5, 3))
sns.lineplot(x="cycle", y="s1", data=train_data)
sns.lineplot(x="cycle", y="s2", data=train_data)
sns.lineplot(x="cycle", y="s3", data=train_data)
sns.lineplot(x="cycle", y="s4", data=train_data)
sns.lineplot(x="cycle", y="s5", data=train_data)
sns.lineplot(x="cycle", y="s6", data=train_data)
sns.lineplot(x="cycle", y="s7", data=train_data)
sns.lineplot(x="cycle", y="s8", data=train_data)
sns.lineplot(x="cycle", y="s9", data=train_data)
sns.lineplot(x="cycle", y="s10", data=train_data)
sns.lineplot(x="cycle", y="s11", data=train_data)
sns.lineplot(x="cycle", y="s12", data=train_data)
plt.grid()
plt.xlabel('Cycle')
plt.ylabel('Feature Normalized Value')
plt.tight_layout()
plt.savefig("D:/workspace/paper/1/figs/normalized_features.png",dpi=600)

In [None]:
rul = []
feature = train_data.loc[192:478, 's17'].values
for d in train_data.loc[192:478, 'RUL']:
    if d > 115:
        rul.append(115)
    else:
        rul.append(d)
plt.figure(figsize=(10, 3))
plt.subplot(121)
plt.plot(feature, color='#0099DD', label='Sensor_2')
plt.xlabel('Cycle')
plt.ylabel('Feature Normalized Value')
plt.grid()
plt.legend()

feature = savgol_filter(feature,6,2)

plt.subplot(122)
plt.plot(feature, color='#FF9933', label='Sensor_2')
plt.xlabel('Cycle')
plt.ylabel('Feature Normalized Value')
plt.grid()
plt.legend()
plt.tight_layout()

plt.savefig("D:/workspace/paper/1/figs/denoise_features.png",dpi=600)

In [None]:
current_palette = sns.color_palette('blend:#7AB,#EDA', n_colors=24)
plt.figure(figsize=(12, 8))

plt.subplot(221)
weighted_value=[ -1.3525545291981995, 0.8580708041129999, 0.8432959040345853, 0.8185273692842254, -0.08518421240324271, 0.014344690169709321, 0.8080746736887614, 0.7464706886790279, 0.634078690879124, -1.3525545291981995, 0.8293482132538018, 0.7959397025237085, 0.7769987758394079, 0.6557361651410154, 0.8319090714471812, -0.08518421240324271, 0.8503126509071743, -1.3525545291981995, -1.3525545291981995, 0.8332705563081861, 0.8112508345938821]
weights = [w if w >=0 else 0 for w in weighted_value]
waters= ['s1', 's2', 's3','s4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']
df = pd.DataFrame({'features':waters, 'importance':weights})
df.sort_values(by=['importance'],ascending=True,inplace=True)
sns.barplot(x="importance", y="features", data=df, orient='h',palette='Blues')
plt.xlabel("Mp")
plt.ylabel("Sensor")
plt.title("FD001")

plt.subplot(222)
weighted_value=[ 0.6349856534065781, 0.6345172254497131, 0.7280255036646421, 0.7704532243067866, 0.6532500146918113, 0.635776697712685, 0.6322822645235303, 0.6114007857498442, 0.6549378349486613, 0.6139399959958015, 0.7652670869094798, 0.6338647669024404, 0.06011263052365457, 0.6250947719475386, 0.7146342024839003, 0.4415509624016986, 0.7288263251986141, 0.6421262526277283, 0.047940830293252766, 0.6449865864287931, 0.660358380479112]
weights = [w if w >=0 else 0 for w in weighted_value]
waters= ['s1', 's2', 's3','s4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']
df = pd.DataFrame({'features':waters, 'importance':weights})
df.sort_values(by=['importance'],ascending=True,inplace=True)
sns.barplot(x="importance", y="features", data=df, orient='h',palette='Blues')
plt.xlabel("Mp")
plt.ylabel("Sensor")
plt.title("FD002")

plt.subplot(223)
weighted_value=[-1.5574784656475686, 0.8316110515289997, 0.841343175823377, 0.8285534873681247, -0.09808395402285239, 0.1407054700381496, 0.7127104062989118, 0.6109340608867988, 0.7037998406043584, 0.07869900923286073, 0.770199743253822, 0.6844810775917795, 0.6976549117990988, 0.6692217970981309, 0.7829009830050208, -0.09808395402285239, 0.8066354612199966, -1.5574784656475686, -1.5574784656475686, 0.7671878545340377, 0.7679127656271648]
weights = [w if w >=0 else 0 for w in weighted_value]
waters= ['s1', 's2', 's3','s4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']
df = pd.DataFrame({'features':waters, 'importance':weights})
df.sort_values(by=['importance'],ascending=True,inplace=True)
sns.barplot(x="importance", y="features", data=df, orient='h',palette='Blues')
plt.xlabel("Mp")
plt.ylabel("Sensor")
plt.title("FD003")

plt.subplot(224)
weighted_value=[ 0.5089387560755972, 0.5121675290589925, 0.7240903718579894, 0.7440636647722043, 0.5126518120078876, 0.506333457562931, 0.507563206383561, 0.4776631277837427, 0.6279954541821787, 0.5500065837118352, 0.7234265002200237, 0.5021685035072925, 0.24183469235576593, 0.6264476558593762, 0.6248106457731102, 0.2984575273162594, 0.7227375654379092, 0.47479686150199546, -0.016127577443375733, 0.5089173115135502, 0.5070780838586685]
weights = [w if w >=0 else 0 for w in weighted_value]
waters= ['s1', 's2', 's3','s4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']
df = pd.DataFrame({'features':waters, 'importance':weights})
df.sort_values(by=['importance'],ascending=True,inplace=True)
sns.barplot(x="importance", y="features", data=df, orient='h',palette='Blues')
plt.xlabel("Mp")
plt.ylabel("Sensor")
plt.title("FD004")

plt.tight_layout()

plt.savefig("D:/workspace/paper/1/figs/features_select.png",dpi=600)