#### Microsoft Azure Predictive Maintenance

Link to this dataset - https://www.kaggle.com/datasets/arnabbiswas1/microsoft-azure-predictive-maintenance?resource=download

This an example data source which can be used for Predictive Maintenance Model Building. It consists of the following data:

    # Machine conditions and usage: The operating conditions of a machine e.g. data collected from sensors.
    # Failure history: The failure history of a machine or component within the machine.
    # Maintenance history: The repair history of a machine, e.g. error codes, previous maintenance activities or component replacements.
    # Machine features: The features of a machine, e.g. engine size, make and model, location.









<span style="color:green">Our goal is to build an AI used to predict the failure of a machine. This can help reduce costs to companies on their maintenance. 

</span>






In [1]:
# Import Libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import keras 
import os


In [2]:
data_path = os.path.normpath(os.getcwd() + os.sep + os.pardir) + "\\Data"
os.chdir(data_path)
#'PdM_errors.csv', 'PdM_failures.csv','PdM_machines.csv','PdM_maint.csv','PdM_telemetry.csv'
df_main = pd.concat(map(pd.read_csv, ['PdM_errors.csv']))
df_machines = pd.read_csv('PdM_machines.csv')
df_errors = pd.read_csv('PdM_errors.csv')

In [4]:
df_main.join(df_machines.set_index('machineID'), on='machineID')

Unnamed: 0,datetime,machineID,errorID,model,age
0,2015-01-03 07:00:00,1,error1,model3,18
1,2015-01-03 20:00:00,1,error3,model3,18
2,2015-01-04 06:00:00,1,error5,model3,18
3,2015-01-10 15:00:00,1,error4,model3,18
4,2015-01-22 10:00:00,1,error4,model3,18
...,...,...,...,...,...
3914,2015-11-21 08:00:00,100,error2,model4,5
3915,2015-12-04 02:00:00,100,error1,model4,5
3916,2015-12-08 06:00:00,100,error2,model4,5
3917,2015-12-08 06:00:00,100,error3,model4,5


In [18]:
df_telemetry=pd.read_csv('PdM_telemetry.csv')
df_telemetry['datetime'] = pd.to_datetime(df_telemetry['datetime'])

In [23]:
df_telemetry


Unnamed: 0,datetime,machineID,volt,rotate,pressure,vibration
0,2015-01-01 06:00:00,1,176.217853,418.504078,113.077935,45.087686
1,2015-01-01 07:00:00,1,162.879223,402.747490,95.460525,43.413973
2,2015-01-01 08:00:00,1,170.989902,527.349825,75.237905,34.178847
3,2015-01-01 09:00:00,1,162.462833,346.149335,109.248561,41.122144
4,2015-01-01 10:00:00,1,157.610021,435.376873,111.886648,25.990511
...,...,...,...,...,...,...
876095,2016-01-01 02:00:00,100,179.438162,395.222827,102.290715,50.771941
876096,2016-01-01 03:00:00,100,189.617555,446.207972,98.180607,35.123072
876097,2016-01-01 04:00:00,100,192.483414,447.816524,94.132837,48.314561
876098,2016-01-01 05:00:00,100,165.475310,413.771670,104.081073,44.835259


In [19]:
print("Starting date :  {1} , Ending Date : {0}  ".format(max(df_telemetry['datetime']) , min(df_telemetry['datetime'])))


Starting date :  2015-01-01 06:00:00 , Ending Date : 2016-01-01 06:00:00  


In [9]:
import plotly.graph_objects as go
import plotly.express as px

### Lets analyze telementry data first

In [28]:
def draw_telementry(machineID, variable ,Sdate ='2015-01-01',Edate = '2016-01-01'  ):  # helper function to draw

    plot_df = df_telemetry.loc[(df_telemetry['machineID'] == machineID),['datetime', variable]]
    fig = px.line(x=plot_df['datetime'].values, y=plot_df[variable].values, title= variable + ' over time', template='plotly_dark')
    fig.update_layout(xaxis_title='Time', yaxis_title=variable)
    fig.show()

In [29]:
draw_telementry(1,"pressure")


In [31]:
draw_telementry(1,"volt")

#### Errors

In [43]:
fig  = px.bar(x =df_errors['errorID'] , color=df_errors['errorID'].values , title="Error Frequency Graph" )
fig.update_layout(xaxis_title='Error IDs', yaxis_title="Frequency" , template='plotly_dark')
fig.show()

In [129]:
df_errors['datetime'] = pd.to_datetime(df_errors['datetime'])    

#### Machine analysis


In [111]:
df_machines

Unnamed: 0,machineID,model,age
0,1,model3,18
1,2,model4,7
2,3,model3,8
3,4,model3,7
4,5,model3,2
...,...,...,...
95,96,model2,10
96,97,model2,14
97,98,model2,20
98,99,model1,14


In [118]:
import plotly.figure_factory as ff
df_machines1 = df_machines.loc[df_machines['model'] == "model1",["age"]]
df_machines2 = df_machines.loc[df_machines['model'] == "model2",["age"]]
df_machines3 = df_machines.loc[df_machines['model'] == "model3",["age"]]
df_machines4 = df_machines.loc[df_machines['model'] == "model4",["age"]]

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=df_machines.loc[df_machines['model'] == 'model1', 'age'], name='model1'))
fig.add_trace(go.Histogram(
    x=df_machines.loc[df_machines['model'] == 'model2', 'age'], name='model2'))
fig.add_trace(go.Histogram(
    x=df_machines.loc[df_machines['model'] == 'model3', 'age'], name='model3'))
fig.add_trace(go.Histogram(
    x=df_machines.loc[df_machines['model'] == 'model4', 'age'], name='model4',))
fig.update_layout(xaxis_title='Age', yaxis_title='Count', template='plotly_dark')
fig.update_layout(barmode='stack', title='Machine Age Distribution')
fig.show()

In [140]:
merged = df_telemetry.merge(df_errors, on=['datetime','machineID'],how='left')
merged

Unnamed: 0,datetime,machineID,volt,rotate,pressure,vibration,errorID
0,2015-01-01 06:00:00,1,176.217853,418.504078,113.077935,45.087686,
1,2015-01-01 07:00:00,1,162.879223,402.747490,95.460525,43.413973,
2,2015-01-01 08:00:00,1,170.989902,527.349825,75.237905,34.178847,
3,2015-01-01 09:00:00,1,162.462833,346.149335,109.248561,41.122144,
4,2015-01-01 10:00:00,1,157.610021,435.376873,111.886648,25.990511,
...,...,...,...,...,...,...,...
876398,2016-01-01 02:00:00,100,179.438162,395.222827,102.290715,50.771941,
876399,2016-01-01 03:00:00,100,189.617555,446.207972,98.180607,35.123072,
876400,2016-01-01 04:00:00,100,192.483414,447.816524,94.132837,48.314561,
876401,2016-01-01 05:00:00,100,165.475310,413.771670,104.081073,44.835259,
