# Anomaly Detection in Manufacturing
![](https://dreamdemostorageforgen2.blob.core.windows.net/mfgdemodata/anomaly_detection_demo.jpg)

In [1]:
import azureml.core
import pandas as pd
import numpy as np
import logging
from sklearn.ensemble import IsolationForest
import pickle

from azureml.core.workspace import Workspace
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig


In [2]:
from azureml.core import Workspace

# Importing user defined config
import config

# Import the subscription details as below to access the resources
subscription_id=config.subscription_id
resource_group=config.resource_group
workspace_name=config.workspace_name

ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)
ws.write_config()
ws = Workspace.from_config()

In [3]:
from azureml.core.dataset import Dataset

train_data = Dataset.get_by_name(ws,"pdmanomalymfg")

pdtrain_data = train_data.to_pandas_dataframe()

pdtrain_data.shape

(731000, 8)

In [4]:
  # Select the relevant features
features_data = pdtrain_data[["volt_rollingmean_12","rotate_rollingmean_12","pressure_rollingmean_12","vibration_rollingmean_12","model","age"]]

# Convert categorical variables into dummy variables
X = pd.get_dummies(features_data, prefix=['model'], columns=['model'])


# Show first few features of final dataframe before using Anomaly Detection algorithm
X.head(5)
# Show first few features of final dataframe before using Anomaly Detection algorithm
X.head(5)

Unnamed: 0,volt_rollingmean_12,rotate_rollingmean_12,pressure_rollingmean_12,vibration_rollingmean_12,age,model_model1,model_model2,model_model3,model_model4
0,169.792748,440.726507,97.337677,38.994172,18,0,0,1,0
1,171.121017,462.4846,103.032259,39.544451,9,0,0,1,0
2,171.468397,443.460836,98.76756,39.092499,9,0,0,1,0
3,171.973439,454.452179,101.116022,38.324467,12,0,1,0,0
4,169.806518,448.795436,99.541713,43.031735,18,0,0,1,0


In [5]:
# Instantiate the model
clf = IsolationForest(n_estimators=100, max_samples='auto', contamination='auto', max_features=1.0, bootstrap=False, n_jobs=None, behaviour='deprecated', random_state=None, verbose=0)
# Fit the model
clf.fit(X)

IsolationForest(behaviour='deprecated', bootstrap=False, contamination='auto',
        max_features=1.0, max_samples='auto', n_estimators=100,
        n_jobs=None, random_state=None, verbose=0)

In [6]:
# Save the model for future use
filename = 'anomaly_model.sav'
pickle.dump(clf, open(filename, 'wb'))

In [7]:
# load the model 
anomaly_model = pickle.load(open(filename, 'rb'))

In [8]:
# Predict the anomaly usign the model
y_anomaly = anomaly_model.predict(X)
y_scores= anomaly_model.decision_function(X)

In [9]:
# Convert the numpy array into Pandas dataframe
y_anom = pd.DataFrame(data=y_anomaly,columns=['Anomaly'])
y_score = pd.DataFrame(data=y_scores,columns=['Scores'])

# Rename dt_time column name with TimeStamp column name
pdtrain_data.columns = pdtrain_data.columns.str.replace('dt_truncated','TimeStamp')

# Combine the two dataframes
dataframes = [pdtrain_data,y_anom,y_score]
scored_output = pd.concat(dataframes,axis=1)

# Show the first few rows
scored_output[["TimeStamp","machineID","model","Anomaly","Scores"]].head(20)

Unnamed: 0,TimeStamp,machineID,model,Anomaly,Scores
0,2015-01-04 12:00:00,471,model3,1,0.067937
1,2015-02-17 12:00:00,148,model3,1,0.102671
2,2015-11-01 12:00:00,148,model3,1,0.109581
3,2015-09-07 12:00:00,463,model2,1,0.041276
4,2015-04-18 00:00:00,471,model3,1,0.064724
5,2015-10-18 00:00:00,471,model3,1,0.077968
6,2015-11-20 12:00:00,471,model3,1,0.083923
7,2015-12-27 00:00:00,471,model3,1,0.052521
8,2015-02-09 12:00:00,496,model4,1,0.095824
9,2015-04-09 12:00:00,496,model4,-1,-0.011397
