In [1]:
import pandas as pd
import numpy as np
import glob

In [2]:
a = glob.glob('./features/*.txt')
features={}

for name in a:
    try:
        with open(name) as f:
            # read in the data 
            temp_df = pd.read_csv(name,delim_whitespace=True,header=None)
            # make the row names cycle as a variable
            temp_df.index.name = 'cycle'
            temp_df.reset_index(inplace=True)
            # transpose the data 
            temp_df_transposed= temp_df.T
            # make the seconds a variable, call it "time”
            temp_df_transposed.index.name = 'time'
            temp_df_transposed.reset_index(inplace=True)
            # add a prefix cycle in the column names to help with pivoting data (from wide to long)
            string = ' cycle'.join(str(e) for e in list(temp_df_transposed.columns))
            temp_df_transposed.columns = string.split(" ")
            # From wide to long to help with joining all the variables 
            temp_df_long = pd.wide_to_long(temp_df_transposed.iloc[1:,:],stubnames='cycle', i=['time'], j='c')
            temp_df_long.reset_index(inplace=True)
            # save each data variable in the long format into separate data frames in the dictionary called "features"
            # but exclude path and txt in the names
            features[name[9:-4]] = temp_df_long
                        
           
    
    # prevent a possible error
    except IOError as exc:
        if exc.errno != errno.EISDIR:
            raise

In [3]:
for key in list(features.keys()):
    features[key].columns=['seconds','cycle',key]

In [4]:
dfs= [features['s/CP'],
      features['s/CE'],
      features['s/EPS1'],
      features['s/FS1'],
      features['s/FS2'],
      features['s/PS1'],
      features['s/PS2'],
      features['s/PS3'],
      features['s/PS4'],
      features['s/PS5'],
      features['s/PS6'],
      features['s/SE'],
      features['s/TS1'],
      features['s/TS2'],
      features['s/TS3'],
      features['s/TS4'],
     features['s/VS1']]

In [5]:
from functools import reduce
feats_join = reduce(lambda x,y: pd.merge(x,y,on=['seconds','cycle']),dfs)

In [6]:
label = pd.read_csv('profile.txt',delim_whitespace=True)
label.columns = ['cooler_condition', 'valve_condition', 'pump_leak', 'hydraulic_accumulator', 'stable_flag']

In [7]:
%pip install tsfresh

Collecting tsfresh
  Downloading tsfresh-0.17.0-py2.py3-none-any.whl (91 kB)
[K     |████████████████████████████████| 91 kB 3.7 MB/s eta 0:00:011
Collecting dask[dataframe]>=2.9.0
  Downloading dask-2021.1.0-py3-none-any.whl (889 kB)
[K     |████████████████████████████████| 889 kB 11.3 MB/s eta 0:00:01
Collecting distributed>=2.11.0
  Downloading distributed-2021.1.0-py3-none-any.whl (671 kB)
[K     |████████████████████████████████| 671 kB 11.3 MB/s eta 0:00:01
Collecting fsspec>=0.6.0; extra == "dataframe"
  Downloading fsspec-0.8.5-py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 8.3 MB/s  eta 0:00:01
[?25hCollecting toolz>=0.8.2; extra == "dataframe"
  Downloading toolz-0.11.1-py3-none-any.whl (55 kB)
[K     |████████████████████████████████| 55 kB 4.4 MB/s  eta 0:00:01
[?25hCollecting partd>=0.3.10; extra == "dataframe"
  Downloading partd-1.1.0-py3-none-any.whl (19 kB)
Collecting tblib>=1.6.0
  Downloading tblib-1.7.0-py2.py3-none-any.whl (12 kB)


In [8]:
from azureml.core import Workspace, Experiment, Dataset, Model
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig
import joblib, pickle
import logging

from tsfresh.transformers import RelevantFeatureAugmenter
from sklearn.pipeline import Pipeline

from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh import extract_relevant_features
from tsfresh.feature_selection import select_features



In [9]:
# Automatic feautre extraction using the tsfresh package
extracted_features = extract_features(feats_join, column_id="cycle", column_sort="seconds")
# Impute any possible missing values 
impute(extracted_features)

Feature Extraction: 100%|██████████| 10/10 [26:17<00:00, 157.70s/it]
 's/CP__fft_coefficient__attr_"real"__coeff_32'
 's/CP__fft_coefficient__attr_"real"__coeff_33' ...
 's/VS1__fft_coefficient__attr_"angle"__coeff_97'
 's/VS1__fft_coefficient__attr_"angle"__coeff_98'
 's/VS1__fft_coefficient__attr_"angle"__coeff_99'] did not have any finite values. Filling with zeros.


Unnamed: 0,s/CP__variance_larger_than_standard_deviation,s/CP__has_duplicate_max,s/CP__has_duplicate_min,s/CP__has_duplicate,s/CP__sum_values,s/CP__abs_energy,s/CP__mean_abs_change,s/CP__mean_change,s/CP__mean_second_derivative_central,s/CP__median,...,s/VS1__fourier_entropy__bins_2,s/VS1__fourier_entropy__bins_3,s/VS1__fourier_entropy__bins_5,s/VS1__fourier_entropy__bins_10,s/VS1__fourier_entropy__bins_100,s/VS1__permutation_entropy__dimension_3__tau_1,s/VS1__permutation_entropy__dimension_4__tau_1,s/VS1__permutation_entropy__dimension_5__tau_1,s/VS1__permutation_entropy__dimension_6__tau_1,s/VS1__permutation_entropy__dimension_7__tau_1
0,0.0,0.0,0.0,1.0,111.765,212.795561,0.016119,-0.013576,-0.000164,1.9240,...,0.239217,0.239217,0.424254,0.563420,1.367099,1.686006,2.644019,3.421882,3.711047,3.860623
1,0.0,0.0,0.0,1.0,75.333,94.912737,0.011831,-0.004237,0.000405,1.2340,...,0.142506,0.142506,0.457102,0.595073,1.433575,1.595254,2.462080,3.107687,3.498105,3.727584
2,0.0,0.0,0.0,1.0,66.793,74.387011,0.009356,-0.000949,0.000172,1.1095,...,0.142506,0.142506,0.379535,0.379535,1.807218,1.664222,2.716036,3.427950,3.760199,3.886296
3,0.0,0.0,1.0,1.0,63.729,67.725523,0.012814,-0.000169,0.000052,1.0660,...,0.142506,0.142506,0.283936,0.595073,1.061467,1.700625,2.632835,3.357998,3.675070,3.850934
4,0.0,0.0,0.0,1.0,64.228,68.781148,0.009220,0.000339,0.000095,1.0725,...,0.142506,0.142506,0.283936,0.424254,1.705525,1.772589,2.863561,3.542382,3.821382,3.963312
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2200,0.0,0.0,0.0,1.0,129.636,280.111022,0.009644,-0.000424,0.000138,2.1615,...,0.142506,0.142506,0.283936,0.424254,1.220532,1.679611,2.719164,3.333242,3.695355,3.860623
2201,0.0,0.0,0.0,1.0,129.087,277.756455,0.008949,0.000000,-0.000147,2.1485,...,0.142506,0.283936,0.283936,0.518700,1.192692,1.638909,2.554651,3.198601,3.701534,3.860623
2202,0.0,1.0,1.0,1.0,128.598,275.667688,0.009356,-0.000034,0.000207,2.1425,...,0.239217,0.283936,0.283936,0.424254,1.518855,1.708797,2.782633,3.468116,3.805690,3.911968
2203,0.0,0.0,0.0,1.0,128.909,276.977483,0.007814,0.000051,0.000164,2.1480,...,0.239217,0.283936,0.283936,0.283936,1.147973,1.738131,2.780204,3.485050,3.755280,3.911968


In [10]:
extracted_features.drop(extracted_features.tail(1).index,inplace=True)

In [11]:
features_filtered_accum = select_features(extracted_features, label['hydraulic_accumulator'])

features_filtered_flag = select_features(extracted_features, label['stable_flag'])

In [16]:
joint = features_filtered_accum.join(label)
joint.shape

(2204, 5221)

In [19]:
half_joint = joint.drop(joint.tail(1000).index)
half_joint.shape

(1204, 5221)

In [None]:
# Convert seconds column to datetime.

In [20]:
from azureml.automl.core.forecasting_parameters import ForecastingParameters

forecasting_parameters_hydraulic_accum = ForecastingParameters(time_column_name='seconds_column_switched_to_datetime', 
                                               forecast_horizon=50,
                                               time_series_id_column_names=["hydraulic_accumulator"],
                                               target_lags='auto')

In [None]:
automl_config_hydraulic_accum = AutoMLConfig(task='forecasting',
                             primary_metric='normalized_root_mean_squared_error',
                             experiment_timeout_minutes=15,
                             enable_early_stopping=True,
                             training_data=half_joint,
                             label_column_name="hydraulic_accumulator",
                             n_cross_validations=5,
                             enable_ensembling=False,
                             verbosity=logging.INFO,
                             **forecasting_parameters)

In [21]:
ws = Workspace.get(name="quick-starts-ws-135136")
exp = Experiment(workspace=ws, name="Ranga")

autoexp = Experiment(workspace=ws,name='AutoRanga')
remote_run = autoexp.submit(automl_config)
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code F38YXNW96 to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.


****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missi



_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…


****************************************************************************************************

****************************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
****************************************************************************************************

 ITERATION   PIPELINE                                       DURATION      METRIC      BEST
         0   MaxAbsScaler LightGBM                          0:03:26       0.9941    0.9941
         1   MaxAbsScaler XGBoostClassifier                 0:01:38       0.9929    0.9941
         2   MaxAbsScaler RandomForest                      0:00:42       0.9776    0.9941
         3   MaxAbsScaler RandomForest                      0:00:42      

{'runId': 'AutoML_0401814e-b965-4245-bf69-7e0da572e1a8',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2021-01-17T23:45:01.255262Z',
 'endTimeUtc': '2021-01-18T00:09:23.83922Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'AUC_weighted',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '2',
  'target': 'local',
  'DataPrepJsonString': None,
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-widgets": "1.19.0", "azureml-train": "1.19.0", "azureml-train-restclients-hyperdrive": "1.19.0", "azureml-train-core": "1.19.0", "azureml-train-automl": "1.19.0", "azureml-train-automl-runtime": "1.19.0", "azureml-train-automl-client": "1.19.0", "azureml-tensorboard": "1.19.0", "azureml-telemetry": "1.19.0", "azureml-sdk": "1.19.0", "azureml-samples": "0+unk

In [22]:
best,fitted = remote_run.get_output()
fitted

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                                  min_impurity_split=None,
                                                                                                  min_samples_leaf=0.01,
                                                                                                  min_samples_split=0.15052631578947367,
           

In [23]:
pipeline = Pipeline([('augmenter', RelevantFeatureAugmenter(column_id="cycle", column_sort="seconds")),
            ('best automl', fitted)])

In [24]:
y_stable_flag = label['stable_flag']
x = pd.DataFrame(index = y_stable_flag.index)

In [27]:
pipeline.set_params(augmenter__timeseries_container=feats_join)
joblib.dump(pipeline,'pipeline.sav')

In [46]:
joblib.dump(fitted,'best_fitted.sav')

In [47]:
registeredfinal = Model.register(workspace=ws,model_path='./best_fitted.sav',model_name='registered_fitted.sav')

Registering model registered_fitted.sav


In [48]:
#Create inference config
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig

env = Environment.get(workspace=ws,name='AzureML-AutoML')


inferencefinal = InferenceConfig(entry_script='score.py',environment=env) 

#Deploy model and check service state
from azureml.core.webservice import LocalWebservice, AciWebservice


deployfinal = LocalWebservice.deploy_configuration()
svcs = Model.deploy(workspace=ws,name='depl',models=[registeredfinal],inference_config=inferencefinal,deployment_config=deployfinal)
svcs.wait_for_deployment(show_output=True)

Downloading model registered_fitted.sav:1 to /tmp/azureml_6luhazdx/registered_fitted.sav/1
Generating Docker build context.
Package creation Succeeded
Logging into Docker registry viennaglobal.azurecr.io
Logging into Docker registry viennaglobal.azurecr.io
Building Docker image from Dockerfile...
Step 1/5 : FROM viennaglobal.azurecr.io/azureml/azureml_1b9697d50bbbb35eb098c299c7ed3dd0
 ---> e21868753f04
Step 2/5 : COPY azureml-app /var/azureml-app
 ---> b229781b8f30
Step 3/5 : RUN mkdir -p '/var/azureml-app' && echo eyJhY2NvdW50Q29udGV4dCI6eyJzdWJzY3JpcHRpb25JZCI6IjgxY2VmYWQzLWQyYzktNGY3Ny1hNDY2LTk5YTdmNTQxYzdiYiIsInJlc291cmNlR3JvdXBOYW1lIjoiYW1sLXF1aWNrc3RhcnRzLTEzNTEzNiIsImFjY291bnROYW1lIjoicXVpY2stc3RhcnRzLXdzLTEzNTEzNiIsIndvcmtzcGFjZUlkIjoiZmE5YTg2OTUtYzNlMi00MzdkLTkyODctYWRkNzFiNGFmNTQ2In0sIm1vZGVscyI6e30sIm1vZGVsc0luZm8iOnt9fQ== | base64 --decode > /var/azureml-app/model_config_map.json
 ---> Running in 9b4f19a3cb81
 ---> 9199977408f6
Step 4/5 : RUN mv '/var/azureml-app/tmpxfqojuj

In [49]:
svcs.scoring_uri

'http://localhost:32779/score'

In [61]:
import requests
import json

# URL for the web service
scoring_uri = 'http://localhost:32779/score'


data = {'data':[{
    'data':3444}
]
        }

input_data = json.dumps(data)

headers = {'Content-Type': 'application/json'}


# # Make the request and display the response
result = requests.post(scoring_uri,input_data,headers=headers)
print("prediction:",result.text)

prediction: "DataErrorException:\n\tMessage: df should be a pandas dataframe or numpy array\n\tInnerException: None\n\tErrorResponse \n{\n    \"error\": {\n        \"code\": \"SystemError\",\n        \"message\": \"Encountered an internal AutoML error. Error Message/Code: df should be a pandas dataframe or numpy array. Additional Info: DataErrorException:\\n\\tMessage: df should be a pandas dataframe or numpy array\\n\\tInnerException: None\\n\\tErrorResponse \\n{\\n    \\\"error\\\": {\\n        \\\"message\\\": \\\"df should be a pandas dataframe or numpy array\\\",\\n        \\\"target\\\": \\\"utilities.check_input\\\",\\n        \\\"reference_code\\\": \\\"098d83d9-f525-4844-9666-ea28b1a41f08\\\"\\n    }\\n}\",\n        \"details_uri\": \"https://docs.microsoft.com/azure/machine-learning/resource-known-issues#automated-machine-learning\",\n        \"target\": \"utilities.check_input\",\n        \"inner_error\": {\n            \"code\": \"ClientError\",\n            \"inner_error\"

In [54]:
model_path = Model.get_model_path('registered_fitted.sav')
daone = joblib.load(model_path)

In [62]:
data
datadf = pd.DataFrame(data['data'])
datadf

Unnamed: 0,data
0,3444


In [63]:
daone.predict(datadf)

DataException: DataException:
	Message: The number of features in [fitted data](5220) does not match with those in [input data](1). Please inspect your data, and make sure that features are aligned in both the Datasets.
	InnerException: None
	ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "The number of features in [fitted data](5220) does not match with those in [input data](1). Please inspect your data, and make sure that features are aligned in both the Datasets.",
        "target": "X",
        "inner_error": {
            "code": "BadData",
            "inner_error": {
                "code": "InvalidDimension",
                "inner_error": {
                    "code": "DataShapeMismatch"
                }
            }
        },
        "reference_code": "c402b6c2-3870-45a7-8745-c063bd385962"
    }
}