In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.preprocessing import StandardScaler, MinMaxScaler

pd.set_option('display.max_columns', None)

print("Pandas version: ", pd.__version__)
print("Numpy version: ", np.__version__)
print("Scikit-learn version: ", sklearn.__version__)

Pandas version:  2.0.3
Numpy version:  1.26.4
Scikit-learn version:  1.3.2


In [5]:
# 打印這資料夾 ../../__datasets/CMAPSSData 有多少個 txt 檔案
import os
path = '../../__datasets/CMAPSSData'
files = os.listdir(path)
txt_files = [file for file in files if file.endswith('.txt')]
print(txt_files)

['train_FD003.txt', 'RUL_FD001.txt', 'test_FD003.txt', 'test_FD004.txt', 'train_FD002.txt', 'test_FD001.txt', 'train_FD001.txt', 'test_FD002.txt', 'readme.txt', 'train_FD004.txt', 'RUL_FD003.txt', 'RUL_FD002.txt', 'RUL_FD004.txt']


In [6]:
train_data = pd.read_csv("../../__datasets/CMAPSSData/train_FD001.txt", sep = "\s+", header = None)
train_data.shape

(20631, 26)

In [3]:
train_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,21.61,554.36,2388.06,9046.19,1.3,47.47,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,21.61,553.75,2388.04,9044.07,1.3,47.49,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,21.61,554.26,2388.08,9052.94,1.3,47.27,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,21.61,554.45,2388.11,9049.48,1.3,47.13,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,21.61,554.0,2388.06,9055.15,1.3,47.28,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044


### How many engines are there?

Number of unique values in first column gives us the total number of engines that are under study in this dataset.

In [7]:
train_data[0]

0          1
1          1
2          1
3          1
4          1
        ... 
20626    100
20627    100
20628    100
20629    100
20630    100
Name: 0, Length: 20631, dtype: int64

In [8]:
np.unique(train_data[0])

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100])

In [20]:
train_data.groupby(0)[1]

<pandas.core.groupby.generic.SeriesGroupBy object at 0x7f45f6ccbac0>

In [22]:
list(train_data.groupby(0))

[(1,
       0    1       2       3      4       5       6        7        8      9   \
  0     1    1 -0.0007 -0.0004  100.0  518.67  641.82  1589.70  1400.60  14.62   
  1     1    2  0.0019 -0.0003  100.0  518.67  642.15  1591.82  1403.14  14.62   
  2     1    3 -0.0043  0.0003  100.0  518.67  642.35  1587.99  1404.20  14.62   
  3     1    4  0.0007  0.0000  100.0  518.67  642.35  1582.79  1401.87  14.62   
  4     1    5 -0.0019 -0.0002  100.0  518.67  642.37  1582.85  1406.22  14.62   
  ..   ..  ...     ...     ...    ...     ...     ...      ...      ...    ...   
  187   1  188 -0.0067  0.0003  100.0  518.67  643.75  1602.38  1422.78  14.62   
  188   1  189 -0.0006  0.0002  100.0  518.67  644.18  1596.17  1428.01  14.62   
  189   1  190 -0.0027  0.0001  100.0  518.67  643.64  1599.22  1425.95  14.62   
  190   1  191 -0.0000 -0.0004  100.0  518.67  643.34  1602.36  1425.77  14.62   
  191   1  192  0.0009 -0.0000  100.0  518.67  643.54  1601.41  1427.20  14.62   
  
        

In [21]:
num_cycles_to_failure = train_data.groupby(0)[1].count()
num_cycles_to_failure.values

array([192, 287, 179, 189, 269, 188, 259, 150, 201, 222, 240, 170, 163,
       180, 207, 209, 276, 195, 158, 234, 195, 202, 168, 147, 230, 199,
       156, 165, 163, 194, 234, 191, 200, 195, 181, 158, 170, 194, 128,
       188, 216, 196, 207, 192, 158, 256, 214, 231, 215, 198, 213, 213,
       195, 257, 193, 275, 137, 147, 231, 172, 185, 180, 174, 283, 153,
       202, 313, 199, 362, 137, 208, 213, 213, 166, 229, 210, 154, 231,
       199, 185, 240, 214, 293, 267, 188, 278, 178, 213, 217, 154, 135,
       341, 155, 258, 283, 336, 202, 156, 185, 200])

In [23]:
len(num_cycles_to_failure)

100