In [1]:
import pandas as pd
from loading_data import add_rul_1
import plotly.express as px
from torch.autograd import Variable
import torch

In [2]:
"""
Load and preprocess the FD001 dataset.
:param cut: upper limit for target RULs
:return: grouped data per sample
"""
# load data FD001.py
# define filepath to read data
dir_path = './CMAPSSData/'

# define column names for easy indexing
index_names = ['unit_nr', 'time_cycles']
setting_names = ['setting_1', 'setting_2', 'setting_3']
sensor_names = ['s_{}'.format(i) for i in range(1, 22)]
col_names = index_names + setting_names + sensor_names

In [3]:
# read data
train_data = pd.read_csv((dir_path + 'train_FD001.txt'), sep='\s+', header=None, names=col_names)
test_data = pd.read_csv((dir_path + 'test_FD001.txt'), sep='\s+', header=None, names=col_names)
y_test = pd.read_csv((dir_path + 'RUL_FD001.txt'), sep='\s+', header=None, names=['RUL'])

In [5]:
# drop non-informative features, derived from EDA
drop_sensors = ['s_1', 's_5', 's_10', 's_16', 's_18', 's_19']
drop_labels = setting_names + drop_sensors

train_data.drop(labels=drop_labels, axis=1, inplace=True)
title = train_data.iloc[:, 0:2]
data = train_data.iloc[:, 2:]

In [6]:
data_norm = (data - data.min()) / (data.max() - data.min())  # min-max normalization
# data_norm = (data-data.mean())/data.std()  # standard normalization (optional)
train_norm = pd.concat([title, data_norm], axis=1)

In [7]:
# train_norm = add_rul_1(train_norm)
df = train_norm.copy()
"""
def add_rul_1(df):
:param df: raw data frame
:return: data frame labeled with targets
"""
# Get the total number of cycles for each unit
grouped_by_unit = df.groupby(by="unit_nr")
max_cycle = grouped_by_unit["time_cycles"].max()

# Merge the max cycle back into the original frame
result_frame = df.merge(max_cycle.to_frame(name='max_cycle'), left_on='unit_nr', right_index=True)

# Calculate remaining useful life for each row (piece-wise Linear)
remaining_useful_life = result_frame["max_cycle"] - result_frame["time_cycles"]

result_frame["RUL"] = remaining_useful_life
# drop max_cycle as it's no longer needed
result_frame = result_frame.drop("max_cycle", axis=1)

In [8]:
train_norm = result_frame.copy()
# as in piece-wise linear function, there is an upper limit for target RUL,
# however, experimental results shows this goes even better without it:
# train_norm['RUL'].clip(upper=cut, inplace=True)
group_train = train_norm.groupby(by="unit_nr")

test_data.drop(labels=drop_labels, axis=1, inplace=True)
title = test_data.iloc[:, 0:2]
data = test_data.iloc[:, 2:]
data_norm = (data - data.min()) / (data.max() - data.min())
test_norm = pd.concat([title, data_norm], axis=1)
group_test = test_norm.groupby(by="unit_nr")

In [9]:
i = 1
X, y = group_train.get_group(i).iloc[:, 2:-1], group_train.get_group(i).iloc[:, -1:]

In [10]:
X_train_tensors = Variable(torch.Tensor(X.to_numpy()))
X_train_tensors

tensor([[0.1837, 0.4068, 0.3098,  ..., 0.3333, 0.7132, 0.7247],
        [0.2831, 0.4530, 0.3526,  ..., 0.3333, 0.6667, 0.7310],
        [0.3434, 0.3695, 0.3705,  ..., 0.1667, 0.6279, 0.6214],
        ...,
        [0.7319, 0.6143, 0.7377,  ..., 0.8333, 0.2713, 0.2393],
        [0.6416, 0.6828, 0.7346,  ..., 0.5000, 0.2403, 0.3249],
        [0.7018, 0.6621, 0.7588,  ..., 0.6667, 0.2636, 0.0976]])

In [11]:
X_train_tensors = torch.reshape(X_train_tensors, (X_train_tensors.shape[0], 1, X_train_tensors.shape[1]))
X_train_tensors

tensor([[[0.1837, 0.4068, 0.3098,  ..., 0.3333, 0.7132, 0.7247]],

        [[0.2831, 0.4530, 0.3526,  ..., 0.3333, 0.6667, 0.7310]],

        [[0.3434, 0.3695, 0.3705,  ..., 0.1667, 0.6279, 0.6214]],

        ...,

        [[0.7319, 0.6143, 0.7377,  ..., 0.8333, 0.2713, 0.2393]],

        [[0.6416, 0.6828, 0.7346,  ..., 0.5000, 0.2403, 0.3249]],

        [[0.7018, 0.6621, 0.7588,  ..., 0.6667, 0.2636, 0.0976]]])

In [12]:
X_train_tensors[0]

tensor([[0.1837, 0.4068, 0.3098, 1.0000, 0.7262, 0.2424, 0.1098, 0.3690, 0.6333,
         0.2059, 0.1996, 0.3640, 0.3333, 0.7132, 0.7247]])