In [1]:
import numpy as np
import pandas as pd
import glob

In [2]:
locn = "C:\\Users\\rangy\\Downloads\\Hydraulics-main\\features\\*.txt"
# find all the txt files in the path 
files = glob.glob(locn)
# use a dict to save all the variables 
features = {}

In [3]:
for name in files:
    try:
        with open(name) as f:
            # read in the data 
            temp_df = pd.read_csv(name,delim_whitespace=True,header=None)
            # make the row names cycle as a variable
            temp_df.index.name = 'cycle'
            temp_df.reset_index(inplace=True)
            # transpose the data 
            temp_df_transposed= temp_df.T
            # make the seconds a variable, call it "time”
            temp_df_transposed.index.name = 'time'
            temp_df_transposed.reset_index(inplace=True)
            # add a prefix cycle in the column names to help with pivoting data (from wide to long)
            string = ' cycle'.join(str(e) for e in list(temp_df_transposed.columns))
            temp_df_transposed.columns = string.split(" ")
            # From wide to long to help with joining all the variables 
            temp_df_long = pd.wide_to_long(temp_df_transposed.iloc[1:,:],stubnames='cycle', i=['time'], j='c')
            temp_df_long.reset_index(inplace=True)
            # save each data variable in the long format into separate data frames in the dictionary called "features"
            # but exclude path and txt in the names
            features[name[9:-4]] = temp_df_long
                        
           
    
    # prevent a possible error
    except IOError as exc:
        if exc.errno != errno.EISDIR:
            raise

In [4]:
for key in list(features.keys()):
    features[key].columns=['seconds','cycle',key]

In [5]:
dfs= [features['rangy\\Downloads\\Hydraulics-main\\features\\CP'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\CE'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\EPS1'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\FS1'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\FS2'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\PS1'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\PS2'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\PS3'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\PS4'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\PS5'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\PS6'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\SE'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\TS1'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\TS2'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\TS3'],
      features['rangy\\Downloads\\Hydraulics-main\\features\\TS4'],
     features['rangy\\Downloads\\Hydraulics-main\\features\\VS1']]  

In [6]:
from functools import reduce
features_join = reduce(lambda left,right: pd.merge(left,right,on=['seconds','cycle']), dfs)
features_join.head()

Unnamed: 0,seconds,cycle,rangy\Downloads\Hydraulics-main\features\CP,rangy\Downloads\Hydraulics-main\features\CE,rangy\Downloads\Hydraulics-main\features\EPS1,rangy\Downloads\Hydraulics-main\features\FS1,rangy\Downloads\Hydraulics-main\features\FS2,rangy\Downloads\Hydraulics-main\features\PS1,rangy\Downloads\Hydraulics-main\features\PS2,rangy\Downloads\Hydraulics-main\features\PS3,rangy\Downloads\Hydraulics-main\features\PS4,rangy\Downloads\Hydraulics-main\features\PS5,rangy\Downloads\Hydraulics-main\features\PS6,rangy\Downloads\Hydraulics-main\features\SE,rangy\Downloads\Hydraulics-main\features\TS1,rangy\Downloads\Hydraulics-main\features\TS2,rangy\Downloads\Hydraulics-main\features\TS3,rangy\Downloads\Hydraulics-main\features\TS4,rangy\Downloads\Hydraulics-main\features\VS1
0,0,0,2.184,47.202,2411.6,8.99,10.179,151.47,125.5,2.305,0.0,9.936,9.818,68.039,35.57,40.961,38.32,30.363,0.604
1,1,0,2.184,47.273,2411.6,0.77,10.174,151.45,125.39,2.305,0.0,9.947,9.823,0.0,35.492,40.949,38.332,30.375,0.605
2,2,0,2.184,47.25,2411.6,0.641,10.151,151.52,125.4,2.336,0.0,9.964,9.844,0.0,35.469,40.965,38.32,30.367,0.611
3,3,0,2.185,47.332,2411.6,0.006,10.149,151.27,125.03,2.578,0.0,9.989,9.861,0.0,35.422,40.922,38.324,30.367,0.603
4,4,0,2.178,47.213,2411.6,0.0,10.172,150.8,124.05,2.977,0.0,9.996,9.877,0.0,35.414,40.879,38.332,30.379,0.608


In [7]:
label = pd.read_csv('profile.txt',delim_whitespace=True,header=None)
label.head()

Unnamed: 0,0,1,2,3,4
0,3,100,0,130,1
1,3,100,0,130,1
2,3,100,0,130,1
3,3,100,0,130,1
4,3,100,0,130,1


In [8]:
label.columns = ['cooler_condition', 'valve_condition', 'pump_leak', 'hydraulic_accumulator', 'stable_flag']

In [9]:
%pip install tsfresh

Note: you may need to restart the kernel to use updated packages.


In [10]:
import xgboost
from xgboost import XGBClassifier, XGBRegressor

xgc = XGBClassifier()
xgr = XGBRegressor()


from tsfresh.transformers import RelevantFeatureAugmenter
from sklearn.pipeline import Pipeline

import joblib

In [11]:
pipeline_flag = Pipeline([('augmenter', RelevantFeatureAugmenter(column_id="cycle", column_sort="seconds")),
            ('xgc', XGBClassifier())])
pipeline_accum = Pipeline([('augmenter', RelevantFeatureAugmenter(column_id="cycle", column_sort="seconds")),
            ('xgr', XGBRegressor())])

In [12]:
y_stable_flag = label['stable_flag']
y_hydraulic_accumulator = label['hydraulic_accumulator'] 


In [13]:
X = pd.DataFrame(index = y_stable_flag.index)

In [14]:
pipeline_flag.set_params(augmenter__timeseries_container=features_join)
pipeline_accum.set_params(augmenter__timeseries_container=features_join)

Pipeline(steps=[('augmenter',
                 RelevantFeatureAugmenter(column_id='cycle',
                                          column_sort='seconds',
                                          timeseries_container=        seconds  cycle  rangy\Downloads\Hydraulics-main\features\CP  \
0             0      0                                        2.184   
1             1      0                                        2.184   
2             2      0                                        2.184   
3             3      0                                        2.185   
4             4      0                                        2.178   
...         ...    ...                                          ...   
132295       55   2204                                        2.134   
132296       56   2204                                        2.146   
132297       57   2204                                        2.144   
132298       58   2204                                        2.136   


In [15]:
pipeline_flag.fit(X,y_stable_flag)

Feature Extraction: 100%|██████████████████████████████████████████████████████████████| 15/15 [11:36<00:00, 46.42s/it]
 'rangy\\Downloads\\Hydraulics-main\\features\\CP__fft_coefficient__attr_"real"__coeff_32'
 'rangy\\Downloads\\Hydraulics-main\\features\\CP__fft_coefficient__attr_"real"__coeff_33'
 ...
 'rangy\\Downloads\\Hydraulics-main\\features\\VS1__fft_coefficient__attr_"angle"__coeff_97'
 'rangy\\Downloads\\Hydraulics-main\\features\\VS1__fft_coefficient__attr_"angle"__coeff_98'
 'rangy\\Downloads\\Hydraulics-main\\features\\VS1__fft_coefficient__attr_"angle"__coeff_99'] did not have any finite values. Filling with zeros.




Pipeline(steps=[('augmenter',
                 RelevantFeatureAugmenter(column_id='cycle',
                                          column_sort='seconds',
                                          timeseries_container=        seconds  cycle  rangy\Downloads\Hydraulics-main\features\CP  \
0             0      0                                        2.184   
1             1      0                                        2.184   
2             2      0                                        2.184   
3             3      0                                        2.185   
4             4      0                                        2.178   
...         ...    ...                                          ...   
132295       55   2204                                        2.134   
132296       56   2204                                        2.146   
132297       57   2204                                        2.144   
132298       58   2204                                        2.136   


In [16]:
pipeline_accum.fit(X,y_hydraulic_accumulator)

Feature Extraction: 100%|██████████████████████████████████████████████████████████████| 15/15 [11:31<00:00, 46.09s/it]
 'rangy\\Downloads\\Hydraulics-main\\features\\CP__fft_coefficient__attr_"real"__coeff_32'
 'rangy\\Downloads\\Hydraulics-main\\features\\CP__fft_coefficient__attr_"real"__coeff_33'
 ...
 'rangy\\Downloads\\Hydraulics-main\\features\\VS1__fft_coefficient__attr_"angle"__coeff_97'
 'rangy\\Downloads\\Hydraulics-main\\features\\VS1__fft_coefficient__attr_"angle"__coeff_98'
 'rangy\\Downloads\\Hydraulics-main\\features\\VS1__fft_coefficient__attr_"angle"__coeff_99'] did not have any finite values. Filling with zeros.


Pipeline(steps=[('augmenter',
                 RelevantFeatureAugmenter(column_id='cycle',
                                          column_sort='seconds',
                                          timeseries_container=        seconds  cycle  rangy\Downloads\Hydraulics-main\features\CP  \
0             0      0                                        2.184   
1             1      0                                        2.184   
2             2      0                                        2.184   
3             3      0                                        2.185   
4             4      0                                        2.178   
...         ...    ...                                          ...   
132295       55   2204                                        2.134   
132296       56   2204                                        2.146   
132297       57   2204                                        2.144   
132298       58   2204                                        2.136   


In [17]:
import pickle
pickle.dump(pipeline_accum,open('pipeline_accum.pkl','wb'))
pickle.dump(pipeline_flag,open('pipeline_flag.pkl','wb'))

In [18]:
model = pickle.load(open('pipeline_flag.pkl','rb'))

In [19]:
xtest = np.array([200])
xtest_df = pd.DataFrame(xtest)
xtest_df.set_index(xtest,inplace=True)
Xtest_df = pd.DataFrame(index = xtest_df.index)