# Manufacturing Equipment Maintenace Prediction

In [1]:
!pip install tensorflow
# importing the necessary library and dependencies
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

#modelling
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU, PReLU, ELU, ReLU
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import BinaryAccuracy, Precision, Recall
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras import backend as K
from sklearn.metrics import confusion_matrix

#feature enginering
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer





### Loading the dataset

In [3]:
df = pd.read_csv('Dataset.csv')
df

Unnamed: 0,Equipment_ID,Temperature,Pressure,Vibration,Humidity,Flow_Rate,Power_Consumption,Oil_Level,Voltage,Maintenance_Type,Failure_Cause,Maintenance_Cost,Production_Volume,Planned_Downtime_Hours,Shifts_Per_Day,Production_Days_Per_Week,Installation_Date,Failure_Date,Maintenance_Date
0,77546,80.03,34.84,1.59,0.50,10.91,298.67,0.34,216.01,Preventive,Electrical Failure,579.69,196,4,2,6,2020-10-25,2021-08-16,2021-08-29
1,529399,90.39,32.63,1.29,0.43,10.20,300.22,0.41,220.97,Preventive,Electrical Failure,647.95,291,2,2,6,2019-12-16,2021-04-06,2021-04-19
2,194027,93.52,37.43,1.84,0.41,10.61,315.68,0.48,232.13,Corrective,Electrical Failure,664.85,308,4,1,5,2020-05-03,2021-05-14,2021-05-27
3,588205,91.57,35.36,1.44,0.29,10.59,296.97,0.55,217.34,Preventive,Mechanical Failure,582.15,328,4,3,5,2020-04-17,2021-07-11,2021-07-24
4,354137,82.37,31.54,1.66,0.48,10.40,339.35,0.56,216.79,Preventive,Electrical Failure,715.05,234,4,2,5,2020-08-16,2021-03-02,2021-03-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127635,22959,90.90,35.52,1.27,0.34,9.52,309.04,0.60,228.05,Corrective,Mechanical Failure,623.37,280,5,1,5,2020-04-26,2021-08-08,2021-08-21
127636,259803,99.93,39.77,1.98,0.28,11.16,285.89,0.54,229.97,Preventive,Electrical Failure,487.98,280,5,2,5,2019-08-09,2020-07-15,2020-07-28
127637,809374,103.23,39.56,1.93,0.39,11.80,273.59,0.60,220.25,Corrective,Electrical Failure,488.85,314,3,2,5,2019-04-06,2020-08-13,2020-08-26
127638,507580,81.36,36.65,1.95,0.50,9.04,267.20,0.58,212.01,Preventive,Mechanical Failure,489.96,242,4,2,5,2020-06-02,2021-02-26,2021-03-11


### Data cleaning

In [None]:
df1=df.drop(columns=['Equipment_ID'], axis=1)

In [None]:
df1.isnull().sum()

### Checking for multicollinearity 

In [None]:
#correlation heatmap
numeric_data = df1.select_dtypes(include=['number'])
plt.figure(figsize=(12,8))
sns.heatmap(numeric_data.corr(), annot=True, cmap = 'Greens')
plt.title("Correlation heat map of manufacturing equipment")
plt.show()

### Removing outliers

In [None]:
Q1 = df1['Power_Consumption'].quantile(0.25)
Q3 = df1['Power_Consumption'].quantile(0.75)
IQR= Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

In [None]:
data_no_outliers_1 = (df1[(df1['Power_Consumption']>= lower_bound) & (df1['Power_Consumption']<= upper_bound)])
print("Original data shape ;", df1.shape)
print("New data without outliers ;", data_no_outliers_1.shape)

In [None]:
df1=data_no_outliers_1

In [None]:
df1.shape

In [None]:
x = df1.drop(columns=['Failure_Cause'], axis=1)
y = df1['Failure_Cause']

In [None]:
x.columns.tolist()

In [None]:
y.value_counts()

### Feature engineering

In [None]:
# creating column transformer
num_features = x.select_dtypes(exclude="object").columns
cat_features = x.select_dtypes(include="object").columns

numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    [
      ("OneHotEncoder", categorical_transformer, cat_features),
      ("StandardScaler", numerical_transformer, num_features)  
    ]
)


### Splitting dataset for model training purposes

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train.shape, x_test.shape

In [None]:
preprocessor.fit(x_train)
x_train_processed = preprocessor.transform(x_train)
x_test_processed = preprocessor.transform(x_test)
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

### Initializing the ANN model

In [None]:
# inititalizing ANN
classifier = Sequential()

In [None]:
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience=35,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
    start_from_epoch=0,
)

### Model building

In [None]:
#model building
def focal_loss(gamma=2., alpha=.25):
    
    def focal_loss_fixed(y_true, y_pred):
        y_true = tf.cast(y_true, tf.int32)
        num_classes = K.int_shape(y_pred)[-1]
        y_true_one_hot = tf.one_hot(y_true, depth=num_classes)
        y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
        cross_entropy = -y_true_one_hot * K.log(y_pred)
        loss_weight = y_true_one_hot * K.pow(1 - y_pred, gamma)
        if alpha is not None:
            alpha_factor = K.ones_like(y_true_one_hot) * alpha
            loss_weight *= alpha_factor
        loss = loss_weight * cross_entropy
        return K.sum(loss, axis=-1)
    return focal_loss_fixed

# --- Define the Model Architecture ---
def create_lean_model(num_features, num_classes):
    classifier = Sequential()
    classifier.add(layers.Dense(units=8, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)))
    classifier.add(layers.Dropout(0.3))
    classifier.add(layers.Dense(units=num_classes, activation='softmax'))
    classifier.compile(optimizer='adam', loss=focal_loss(gamma=2.0, alpha=0.25), metrics=['accuracy'])
    return classifier


NUM_FEATURES = 17
NUM_CLASSES = 3

classifier = create_lean_model(NUM_FEATURES, NUM_CLASSES)

model_history = classifier.fit(
    x_train_processed,
    y_train_encoded,
    validation_split=0.33,
    batch_size=32,
    epochs=1000,
    callbacks=early_stopping
)


### MOdel evaluation

In [None]:
model_history.history.keys()

In [None]:
#evaluating the model
def focal_loss(gamma=2., alpha=.25):

    def focal_loss_fixed(y_true, y_pred):
        y_true = tf.cast(y_true, tf.int32)
        num_classes = K.int_shape(y_pred)[-1]
        y_true_one_hot = tf.one_hot(y_true, depth=num_classes)
        y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
        cross_entropy = -y_true_one_hot * K.log(y_pred)
        loss_weight = y_true_one_hot * K.pow(1 - y_pred, gamma)
        if alpha is not None:
            alpha_factor = K.ones_like(y_true_one_hot) * alpha
            loss_weight *= alpha_factor
        loss = loss_weight * cross_entropy
        return K.sum(loss, axis=-1)
    return focal_loss_fixed

print("Evaluating the model on the test set...")

loss, accuracy = classifier.evaluate(x_test_processed, y_test_encoded)

print(f"\nFinal Test Loss: {loss:.4f}")
print(f"Final Test Accuracy: {accuracy:.4f}")


#### Confusion matrix

In [None]:
#using confusion matrix to get more accurate model performance

def plot_confusion_matrix(classifier, x_test, y_test_encoded, class_names):
  
    y_pred_probs = classifier.predict(x_test_processed)
    y_pred_classes = np.argmax(y_pred_probs, axis=1)
    
    y_true_classes = y_test_encoded
    
    cm = confusion_matrix(y_true_classes, y_pred_classes)
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(
        cm, 
        annot=True, 
        fmt="d", 
        cmap="Blues", 
        xticklabels=class_names, 
        yticklabels=class_names
    )
    plt.title("Confusion Matrix for Failure Prediction")
    plt.ylabel("Actual Label")
    plt.xlabel("Predicted Label")
    plt.show()

if __name__ == "__main__":
    
    CLASS_NAMES = ['Electrical Failure', 'Mechanical Failure', 'Sensor Malfunction'] 
    
plot_confusion_matrix(classifier, x_test_processed, y_test_encoded, CLASS_NAMES)
