In [51]:
from typing import Tuple

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch import Tensor
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report

import matplotlib.pyplot as plt

In [52]:
df = pd.read_csv("predictive_maintenance.csv")

In [53]:
df

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure
...,...,...,...,...,...,...,...,...,...,...
9995,9996,M24855,M,298.8,308.4,1604,29.5,14,0,No Failure
9996,9997,H39410,H,298.9,308.4,1632,31.8,17,0,No Failure
9997,9998,M24857,M,299.0,308.6,1645,33.4,22,0,No Failure
9998,9999,H39412,H,299.0,308.7,1408,48.5,25,0,No Failure


In [54]:
df.drop(columns=["Product ID"], inplace=True, axis=1)

In [55]:
df["Type"].unique()

array(['M', 'L', 'H'], dtype=object)

In [56]:
df = pd.concat([df, pd.get_dummies(df["Type"])], axis=1)
df.drop(columns=["Type"], inplace=True, axis=1)

In [57]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
def preprocess_data(df: pd.DataFrame):
    x, y = df.drop(columns=["Failure Type"]), df["Failure Type"]
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    scaler = StandardScaler()
    x_scaled = scaler.fit_transform(x)
    x_train, x_val, y_train, y_val = train_test_split(x_scaled,y_encoded,test_size=0.2,random_state=42,stratify=y_encoded)
    return x_train, x_val, y_train, y_val, scaler, le

In [58]:
x_train, x_val, y_train, y_val, scaler, label_encoder = preprocess_data(df)
x_train.shape, x_val.shape, y_train.shape, y_val.shape

((8000, 10), (2000, 10), (8000,), (2000,))

In [59]:
class PredictiveDataset(Dataset):
    def __init__(self, x, y):
        super().__init__()
        
        self.x = torch.tensor(x, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]

In [60]:
train_ds = PredictiveDataset(x_train, y_train)
val_ds = PredictiveDataset(x_val, y_val)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=256, shuffle=False)

len(train_ds), len(val_ds)

(8000, 2000)

In [61]:
def build_model(input_dim: int, num_classes: int = 6) -> nn.Module:
    model = nn.Sequential(
        nn.Linear(input_dim, 64),
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, num_classes)
    )
    return model

In [62]:
input_dim = x_train.shape[1]
num_classes = 6
model = build_model(input_dim, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model

Sequential(
  (0): Linear(in_features=10, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=64, out_features=32, bias=True)
  (3): ReLU()
  (4): Linear(in_features=32, out_features=6, bias=True)
)

In [63]:
def train_one_epoch(model: nn.Module,
                    train_loader: DataLoader,
                    criterion,
                    optimizer) -> float:
    model.train()
    losses = []
    batches = 0
    
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        batches += 1
    return sum(losses) / batches

In [64]:
from sklearn.metrics import accuracy_score

def evaluate(model: nn.Module, val_loader: DataLoader) -> float:
    model.eval()
    
    all_true_labels = []
    all_predicted_labels = []
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            predictions = torch.argmax(outputs, dim = 1)
            all_predicted_labels.extend(predictions.numpy().flatten())
            all_true_labels.extend(labels.numpy().flatten())
            
    return float(accuracy_score(all_true_labels, all_predicted_labels))

In [65]:
epochs = 10
train_losses = []
val_accuracies = []

for epoch in range(epochs):
    train_loss = train_one_epoch(model, train_loader, criterion, optimizer)
    val_acc = evaluate(model, val_loader)
    train_losses.append(train_loss)
    val_accuracies.append(val_acc)

    print(f"Epoch {epoch+1}/{epoch} | Train loss: {train_loss:.4f} | Val acc: {val_acc:.4f}")

Epoch 1/0 | Train loss: 0.4450 | Val acc: 0.9670
Epoch 2/1 | Train loss: 0.0729 | Val acc: 0.9790
Epoch 3/2 | Train loss: 0.0601 | Val acc: 0.9805
Epoch 4/3 | Train loss: 0.0550 | Val acc: 0.9800
Epoch 5/4 | Train loss: 0.0504 | Val acc: 0.9870
Epoch 6/5 | Train loss: 0.0460 | Val acc: 0.9935
Epoch 7/6 | Train loss: 0.0410 | Val acc: 0.9930
Epoch 8/7 | Train loss: 0.0372 | Val acc: 0.9925
Epoch 9/8 | Train loss: 0.0342 | Val acc: 0.9920
Epoch 10/9 | Train loss: 0.0315 | Val acc: 0.9960
