In [1]:
import pandas as pd
import numpy as np
import glob
import os

In [2]:
os.listdir()

['.amlignore',
 '.amlignore.amltmp',
 '.ipynb_aml_checkpoints',
 '2_28_Azure_hydraulic.ipynb',
 '2_28_azure_hydraulic.ipynb.amltmp',
 'CP.txt',
 'EPS1.txt',
 'features',
 'FS1.txt',
 'FS2.txt',
 'labels',
 'PS1.txt',
 'PS2.txt',
 'PS3.txt',
 'PS4.txt',
 'PS5.txt',
 'PS6.txt',
 'SE.txt',
 'TS1.txt',
 'TS2.txt',
 'TS3.txt',
 'TS4.txt',
 'ts_practice.ipynb.amltmp',
 'VS1.txt']

In [3]:
# locn = "C:\\Users\\rangy\\Downloads\\hyddata\\features\\*.txt"
locn = "./features/*.txt"
# find all the txt files in the path 
files = glob.glob(locn)
# use a dict to save all the variables 
features = {}

for name in files:
    try:
        with open(name) as f:
            # read in the data 
            temp_df = pd.read_csv(name,delim_whitespace=True,header=None)
            # make the row names cycle as a variable
            temp_df.index.name = 'cycle'
            temp_df.reset_index(inplace=True)
            # transpose the data 
            temp_df_transposed= temp_df.T
            # make the seconds a variable, call it "time”
            temp_df_transposed.index.name = 'time'
            temp_df_transposed.reset_index(inplace=True)
            # add a prefix cycle in the column names to help with pivoting data (from wide to long)
            string = ' cycle'.join(str(e) for e in list(temp_df_transposed.columns))
            temp_df_transposed.columns = string.split(" ")
            # From wide to long to help with joining all the variables 
            temp_df_long = pd.wide_to_long(temp_df_transposed.iloc[1:,:],stubnames='cycle', i=['time'], j='c')
            temp_df_long.reset_index(inplace=True)
            # save each data variable in the long format into separate data frames in the dictionary called "features"
            # but exclude path and txt in the names
            features[name[9:-4]] = temp_df_long
                        
           
    
    # prevent a possible error
    except IOError as exc:
        if exc.errno != errno.EISDIR:
            raise

In [4]:
for key in list(features.keys()):
    features[key].columns=['seconds','cycle',key]

In [6]:
features

{'s/CE':         seconds  cycle    s/CE
 0             0      0  47.202
 1             1      0  47.273
 2             2      0  47.250
 3             3      0  47.332
 4             4      0  47.213
 ...         ...    ...     ...
 132295       55   2204  46.355
 132296       56   2204  46.432
 132297       57   2204  46.384
 132298       58   2204  46.479
 132299       59   2204  46.621
 
 [132300 rows x 3 columns],
 's/CP':         seconds  cycle   s/CP
 0             0      0  2.184
 1             1      0  2.184
 2             2      0  2.184
 3             3      0  2.185
 4             4      0  2.178
 ...         ...    ...    ...
 132295       55   2204  2.134
 132296       56   2204  2.146
 132297       57   2204  2.144
 132298       58   2204  2.136
 132299       59   2204  2.148
 
 [132300 rows x 3 columns],
 's/EPS1':           seconds  cycle  s/EPS1
 0               0      0  2411.6
 1               1      0  2411.6
 2               2      0  2411.6
 3               3    

In [8]:
dfs= [features['s/CP'],
      features['s/CE'],
      features['s/EPS1'],
      features['s/FS1'],
      features['s/FS2'],
      features['s/PS1'],
      features['s/PS2'],
      features['s/PS3'],
      features['s/PS4'],
      features['s/PS5'],
      features['s/PS6'],
      features['s/SE'],
      features['s/TS1'],
      features['s/TS2'],
      features['s/TS3'],
      features['s/TS4'],
     features['s/VS1']]

In [9]:
from functools import reduce
feats_join = reduce(lambda x,y: pd.merge(x,y,on=['seconds','cycle']),dfs)

In [11]:
label = pd.read_csv('./labels/profile.txt',delim_whitespace=True)
label.columns = ['cooler_condition', 'valve_condition', 'pump_leak', 'hydraulic_accumulator', 'stable_flag']

In [12]:
label.tail()

Unnamed: 0,cooler_condition,valve_condition,pump_leak,hydraulic_accumulator,stable_flag
2199,100,100,0,90,0
2200,100,100,0,90,0
2201,100,100,0,90,0
2202,100,100,0,90,0
2203,100,100,0,90,0


In [13]:
label.reset_index(inplace=True)

In [14]:
label.columns = ['cycle','cooler_condition','valve_condition','pump_leak','hydraulic_accumulator','stable_flag']
label.head()

Unnamed: 0,cycle,cooler_condition,valve_condition,pump_leak,hydraulic_accumulator,stable_flag
0,0,3,100,0,130,1
1,1,3,100,0,130,1
2,2,3,100,0,130,1
3,3,3,100,0,130,1
4,4,3,100,0,130,1


In [15]:
feats_join.tail()

Unnamed: 0,seconds,cycle,s/CP,s/CE,s/EPS1,s/FS1,s/FS2,s/PS1,s/PS2,s/PS3,s/PS4,s/PS5,s/PS6,s/SE,s/TS1,s/TS2,s/TS3,s/TS4,s/VS1
132295,55,2204,2.134,46.355,2652.0,0.001,10.179,186.38,0.562,0.0,10.235,9.999,9.873,68.167,35.441,40.91,38.195,30.395,0.516
132296,56,2204,2.146,46.432,2667.0,0.0,10.183,187.2,0.555,0.0,10.198,9.976,9.861,68.167,35.437,40.895,38.184,30.391,0.528
132297,57,2204,2.144,46.384,2690.8,0.0,10.198,187.8,0.484,0.0,10.191,9.96,9.837,68.258,35.434,40.883,38.184,30.395,0.522
132298,58,2204,2.136,46.479,2698.4,0.003,10.19,188.05,0.453,0.0,10.189,9.955,9.827,68.258,35.434,40.879,38.184,30.402,0.522
132299,59,2204,2.148,46.621,2710.4,0.001,10.199,188.34,0.445,0.0,10.173,9.96,9.831,68.117,35.426,40.891,38.187,30.375,0.531


In [16]:
together = feats_join.merge(label,on='cycle')
together.shape

(132240, 24)

In [17]:
import datetime
base = datetime.datetime(2021, 1, 1)
arr = np.array([base + datetime.timedelta(seconds=i) for i in range(132240)])

In [18]:
together['time'] = pd.Series(arr,index=together.index)
together.drop(columns=['seconds','cycle'],inplace=True)

In [19]:
together.tail()

Unnamed: 0,s/CP,s/CE,s/EPS1,s/FS1,s/FS2,s/PS1,s/PS2,s/PS3,s/PS4,s/PS5,...,s/TS2,s/TS3,s/TS4,s/VS1,cooler_condition,valve_condition,pump_leak,hydraulic_accumulator,stable_flag,time
132235,2.131,46.579,2667.0,0.0,10.192,186.18,0.547,0.0,10.176,9.947,...,40.934,38.145,30.406,0.519,100,100,0,90,0,2021-01-02 12:43:55
132236,2.141,46.687,2674.6,0.0,10.179,186.94,0.547,0.0,10.205,9.96,...,40.91,38.148,30.402,0.526,100,100,0,90,0,2021-01-02 12:43:56
132237,2.135,46.59,2690.4,0.0,10.182,187.59,0.547,0.0,10.205,9.979,...,40.926,38.184,30.391,0.522,100,100,0,90,0,2021-01-02 12:43:57
132238,2.135,46.579,2694.2,0.0,10.172,188.07,0.531,0.0,10.238,9.997,...,40.918,38.184,30.406,0.52,100,100,0,90,0,2021-01-02 12:43:58
132239,2.148,46.57,2702.2,0.0,10.17,188.44,0.484,0.0,10.225,9.992,...,40.898,38.176,30.375,0.528,100,100,0,90,0,2021-01-02 12:43:59


In [32]:
from azureml.automl.core.forecasting_parameters import ForecastingParameters

forecasting_parameters_hydraulic_accum = ForecastingParameters(time_column_name='time', 
                                               forecast_horizon=100)

In [44]:
from azureml.train.automl import AutoMLConfig

automl_config_hydraulic_accum = AutoMLConfig(task='forecasting',
                             primary_metric='normalized_mean_absolute_error',
                             experiment_timeout_minutes=25,
                             enable_early_stopping=True,
                             training_data=together,
                             label_column_name="hydraulic_accumulator",
                             n_cross_validations=5,
                             enable_ensembling=False,
                             verbosity=logging.INFO,
                             forecasting_parameters=forecasting_parameters_hydraulic_accum)

automl_config_leak = AutoMLConfig(task='forecasting',
                             primary_metric='normalized_mean_absolute_error',
                             experiment_timeout_minutes=25,
                             enable_early_stopping=True,
                             training_data=together,
                             label_column_name="pump_leak",
                             n_cross_validations=5,
                             enable_ensembling=False,
                             verbosity=logging.INFO,
                             forecasting_parameters=forecasting_parameters_hydraulic_accum)

In [45]:
from azureml.core.experiment import Experiment
from azureml.core import Workspace

ws = Workspace.from_config()

# Choose a name for the experiment and specify the project folder.
experiment_name = 'AutoTSForecasting_hyd'
project_folder = './sample_projects/automl-classification'

experiment = Experiment(ws, experiment_name)

In [46]:
from azureml.widgets import RunDetails

hydrun = experiment.submit(automl_config_hydraulic_accum, show_output=True)
RunDetails(hydrun).show()
hydrun.wait_for_completion(show_output=True)

No run_configuration provided, running on local with default configuration
Running on local machine
Parent Run ID: AutoML_9f5bc4d1-e084-4e4c-a865-ea5bef357bed

Current status: DatasetFeaturization. Beginning to featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed featurizing the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationComplete

KeyboardInterrupt: 

In [51]:
leakrun = experiment.submit(automl_config_leak, show_output=True)
RunDetails(leakrun).show()
leakrun.wait_for_completion(show_output=True)

Running on local machine
Parent Run ID: AutoML_5e67a04c-355c-4dcf-aea8-b795183a46f8

Current status: DatasetFeaturization. Beginning to featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed featurizing the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…


****************************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
****************************************************************************************************

 ITERATION   PIPELINE                                       DURATION      METRIC      BEST
         0   RobustScaler DecisionTree                      0:00:45       0.0681    0.0681
         1   StandardScalerWrapper DecisionTree             0:00:41       0.1330    0.0681
Received interrupt. Returning now.

{'runId': 'AutoML_5e67a04c-355c-4dcf-aea8-b795183a46f8',
 'target': 'local',
 'status': 'Canceled',
 'startTimeUtc': '2021-03-01T01:20:33.627173Z',
 'endTimeUtc': '2021-03-01T01:24:12.140911Z',
   'message': 'The run was terminated due to an interruption while being executed.'}],
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'normalized_mean_absolute_error',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'local',
  'DataPrepJsonString': None,
  'EnableSubsampling': 'False',
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'forecasting',
  'dependencies_versions': '{"azureml-widgets": "1.22.0", "azureml-train": "1.22.0", "azureml-train-restclients-hyperdrive": "1.22.0", "azureml-train-core": "1.22.0", "azureml-train-automl": "1.22.0", "azureml-train-automl-runtime": "1.22.0", "azureml-train-automl-client": "1.22.0", "azureml-

In [52]:
best_hydraulic,fitted_hydraulic = hydrun.get_output()
fitted_hydraulic

ForecastingPipelineWrapper(pipeline=Pipeline(memory=None,
                                             steps=[('timeseriestransformer',
                                                     TimeSeriesTransformer(featurization_config=None,
                                                                           pipeline_type=<TimeSeriesPipelineType.FULL: 1>)),
                                                    ('RobustScaler',
                                                     RobustScaler(copy=True,
                                                                  quantile_range=[10,
                                                                                  90],
                                                                  with_centering=True,
                                                                  with_scaling=False)),
                                                    ('DecisionTreeRegressor',
                                                     DecisionTree

In [53]:
best_leak,fitted_leak = leakrun.get_output()
fitted_leak

ForecastingPipelineWrapper(pipeline=Pipeline(memory=None,
                                             steps=[('timeseriestransformer',
                                                     TimeSeriesTransformer(featurization_config=None,
                                                                           pipeline_type=<TimeSeriesPipelineType.FULL: 1>)),
                                                    ('RobustScaler',
                                                     RobustScaler(copy=True,
                                                                  quantile_range=[10,
                                                                                  90],
                                                                  with_centering=True,
                                                                  with_scaling=False)),
                                                    ('DecisionTreeRegressor',
                                                     DecisionTree