# Dataset Exploration -  N-CMAPSS DS02

The new C-MAPSS dataset DS02 from NASA provides degradation trajectories of 9 turbofan engines with unknown and different initial health condition for complete flights and two failure modes (HPT efficiency degradation & HPT efficiency degradation combined with LPT efficiency and capacity degradation). The data were synthetically generated with the Commercial Modular Aero-Propulsion System Simulation (C-MAPSS) dynamical model. The data contains multivariate sensors readings of the complete run-to-failure trajectories. Therefore, the records stop at the cycle/time the engine failed. A total number of 6.5M time stamps are available.

Copyright (c) by Manuel Arias.

In [14]:
import os
import h5py
import time
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
from pandas import DataFrame
import matplotlib.pyplot as plt
from matplotlib import gridspec
%matplotlib inline

In [15]:
### Set-up - Define file location
filename = './data_set/N-CMAPSS_DS04.h5'
name_out = filename.replace('./data_set/','').replace('.h5','.csv')

#### Read Raw Data

In [16]:
# Time tracking, Operation time (min):  0.003
t = time.process_time()  

# Load data
with h5py.File(filename, 'r') as hdf:
        # Development set
        W_dev = np.array(hdf.get('W_dev'))             # W
        X_s_dev = np.array(hdf.get('X_s_dev'))         # X_s
        X_v_dev = np.array(hdf.get('X_v_dev'))         # X_v
        T_dev = np.array(hdf.get('T_dev'))             # T
        Y_dev = np.array(hdf.get('Y_dev'))             # RUL  
        A_dev = np.array(hdf.get('A_dev'))             # Auxiliary

        # Test set
        W_test = np.array(hdf.get('W_test'))           # W
        X_s_test = np.array(hdf.get('X_s_test'))       # X_s
        X_v_test = np.array(hdf.get('X_v_test'))       # X_v
        T_test = np.array(hdf.get('T_test'))           # T
        Y_test = np.array(hdf.get('Y_test'))           # RUL  
        A_test = np.array(hdf.get('A_test'))           # Auxiliary
        
        # Varnams
        W_var = np.array(hdf.get('W_var'))
        X_s_var = np.array(hdf.get('X_s_var'))  
        X_v_var = np.array(hdf.get('X_v_var')) 
        T_var = np.array(hdf.get('T_var'))
        A_var = np.array(hdf.get('A_var'))
        
        # from np.array to list dtype U4/U5
        W_var = list(np.array(W_var, dtype='U20'))
        X_s_var = list(np.array(X_s_var, dtype='U20'))  
        X_v_var = list(np.array(X_v_var, dtype='U20')) 
        T_var = list(np.array(T_var, dtype='U20'))
        A_var = list(np.array(A_var, dtype='U20'))
                          
W = np.concatenate((W_dev, W_test), axis=0)  
X_s = np.concatenate((X_s_dev, X_s_test), axis=0)
X_v = np.concatenate((X_v_dev, X_v_test), axis=0)
T = np.concatenate((T_dev, T_test), axis=0)
Y = np.concatenate((Y_dev, Y_test), axis=0) 
A = np.concatenate((A_dev, A_test), axis=0) 
    
print('')
print("Operation time (min): " , (time.process_time()-t)/60)
print('')
print ("W shape: " + str(W.shape))
print ("X_s shape: " + str(X_s.shape))
print ("X_v shape: " + str(X_v.shape))
print ("T shape: " + str(T.shape))
print ("A shape: " + str(A.shape))


Operation time (min):  0.39453125

W shape: (9980013, 4)
X_s shape: (9980013, 14)
X_v shape: (9980013, 14)
T shape: (9980013, 10)
A shape: (9980013, 4)


In [17]:
df_A = DataFrame(data=A, columns=A_var)
df_A.describe()

Unnamed: 0,unit,cycle,Fc,hs
count,9980013.0,9980013.0,9980013.0,9980013.0
mean,5.520297,43.69523,2.593727,0.2072234
std,2.79972,25.67061,0.4911368,0.405317
min,1.0,1.0,2.0,0.0
25%,3.0,22.0,2.0,0.0
50%,5.0,43.0,3.0,0.0
75%,8.0,64.0,3.0,0.0
max,10.0,100.0,3.0,1.0


In [18]:
df_W = DataFrame(data=W, columns=W_var)
df_W['unit'] = df_A['unit'].values

In [19]:
df_X_s_u_c = DataFrame(data=X_s, columns=X_s_var) 
df_X_s_u_c['unit'] = df_A['unit'].values
df_X_s_u_c['cycle'] = df_A['cycle'].values
df_X_s_u_c['alt'] = df_W['alt'].values
df_X_s_u_c['Mach'] = df_W['Mach'].values
df_X_s_u_c['TRA'] = df_W['TRA'].values
df_X_s_u_c['T2'] = df_W['T2'].values
df_X_s_u_c

Unnamed: 0,T24,T30,T48,T50,P15,P2,P21,P24,Ps30,P40,P50,Nf,Nc,Wf,unit,cycle,alt,Mach,TRA,T2
0,620.536686,1500.497778,1918.887446,1305.895297,19.186726,13.769427,19.462331,24.474988,420.022739,426.578936,16.312785,2229.036206,8790.661678,5.117880,1.0,1.0,3003.0,0.261135,81.386139,514.889127
1,620.212233,1501.026264,1918.174792,1306.704845,19.168470,13.765780,19.472561,24.482789,419.831067,426.528828,16.314777,2229.338301,8790.557024,5.113072,1.0,1.0,3014.0,0.260820,81.386139,514.832078
2,620.691023,1501.435624,1917.553771,1305.988738,19.161991,13.768780,19.459324,24.487983,419.814866,426.332878,16.303461,2227.314527,8793.731801,5.113295,1.0,1.0,3023.0,0.262521,81.386139,514.889656
3,620.355416,1500.911321,1917.426968,1305.531529,19.171850,13.754179,19.460298,24.493184,419.747243,426.108069,16.310728,2227.793004,8792.134355,5.112996,1.0,1.0,3032.0,0.262836,81.386139,514.874556
4,620.591359,1501.437449,1918.752200,1305.696191,19.161257,13.744312,19.455585,24.458775,419.775992,426.066808,16.285185,2228.217157,8796.438082,5.109202,1.0,1.0,3042.0,0.262332,81.386139,514.811204
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9980008,575.594962,1300.416493,1537.012644,1138.884044,16.588586,13.928236,16.816479,19.332151,251.698566,256.948591,14.448214,1710.470427,8131.187698,2.458707,10.0,85.0,3016.0,0.291501,34.277100,516.547136
9980009,576.234568,1299.882098,1537.765798,1139.599918,16.574192,13.918071,16.826887,19.309744,251.809845,257.064075,14.456533,1714.111240,8123.240798,2.462037,10.0,85.0,3013.0,0.292068,34.277100,516.589850
9980010,575.367322,1300.168884,1534.960755,1138.679258,16.581624,13.921671,16.832350,19.323433,251.871517,257.033246,14.456594,1710.488984,8132.049740,2.460451,10.0,85.0,3010.0,0.291942,34.277100,516.594146
9980011,575.666962,1301.061768,1535.511495,1139.539488,16.571732,13.930149,16.823578,19.315798,251.751500,257.299571,14.464679,1710.823646,8138.051357,2.460564,10.0,85.0,3007.0,0.291312,34.277100,516.567693


In [None]:
df_X_s_u_c_mean = df_X_s_u_c.groupby(['unit', 'cycle']).mean()
df_X_s_u_c_mean = pd.DataFrame(df_X_s_u_c_mean).reset_index()
df_X_s_u_c_mean

In [None]:
df_X_s_u_c_mean.to_csv("./dataset_csv/"+name_out,index=False)