# Reading the Data and Preprocessing

In [1]:
#import necessary libraries
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from keras.utils import to_categorical
import keras.backend as K
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
from tqdm import tqdm

In [24]:
df= pd.read_csv('predictive_maintenance2.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,0,M,298.1,308.6,1551,42.8,0,0,No Failure
1,1,L,298.2,308.7,1408,46.3,3,0,No Failure
2,2,L,298.1,308.5,1498,49.4,5,0,No Failure
3,3,L,298.2,308.6,1433,39.5,7,0,No Failure
4,4,L,298.2,308.7,1408,40.0,9,0,No Failure


In [3]:
df['Type'].replace({'M':0,"L":1,"H":2},inplace=True)

In [4]:
y=df[['Failure Type']]
x=df[['Air temperature [K]','Process temperature [K]','Rotational speed [rpm]','Torque [Nm]','Tool wear [min]','Type']]
y['Failure Type'].unique()

array(['No Failure', 'Power Failure', 'Tool Wear Failure',
       'Overstrain Failure', 'Heat Dissipation Failure'], dtype=object)

In [5]:
from sklearn.preprocessing import LabelEncoder
y=LabelEncoder.fit_transform(y,y['Failure Type'])
y

  self.classes_, y = _unique(y, return_inverse=True)


array([1, 1, 1, ..., 1, 1, 1])

In [6]:
df['Type'].unique()

array([0, 1, 2], dtype=int64)

In [7]:
np.shape(y)

(9555,)

In [8]:
len(x.columns)

6

In [9]:
n_class=len(np.unique(y))
n_features=len(x.columns)
batch_size = 15
epoch_count = 10
noise_dim = 100
len(df['Failure Type'].unique())

5

# Mode-Specific Normalization

In [10]:
# Function to fit GMM for a single continuous feature
def fit_gmm_for_continuous_feature(feature, n_components=2):
    gmm = GaussianMixture(n_components=n_components)
    feature = feature.to_numpy().reshape(-1, 1)
    gmm.fit(feature)
    return gmm

# Function to normalize a continuous feature using GMM
def normalize_feature_with_gmm(feature, gmm):
    # Predict the cluster for each value in the feature
    cluster_ids = gmm.predict(feature.to_numpy().reshape(-1, 1))
    # Get the means and variances for the corresponding clusters
    means = gmm.means_[cluster_ids].flatten()
    variances = np.sqrt(gmm.covariances_[cluster_ids].flatten())
    # Normalize the feature based on its assigned cluster
    normalized_feature = (feature - means) / variances
    return normalized_feature, cluster_ids

# Function to inverse normalize the feature (after generation)
def inverse_normalize_with_gmm(normalized_feature, gmm, cluster_ids):
    means = gmm.means_[cluster_ids].flatten()
    variances = np.sqrt(gmm.covariances_[cluster_ids].flatten())
    original_feature = normalized_feature * variances + means
    return original_feature


#### Normalizing the Data in X

In [11]:
#normalizing the data
normalized_feature=np.zeros(shape=(n_features,9555))
# cluster_ids=np.zeros(shape=(7,10000))
n=0
gmm=[]
for i in x.columns:
    gmm_model=fit_gmm_for_continuous_feature(x[i])
    gmm.append(gmm_model)
    normalized_feature[n],cluster_ids=normalize_feature_with_gmm(x[i],gmm_model)
    n+=1
    print(cluster_ids.shape)
normalized_feature=normalized_feature.T
gmm

(9555,)
(9555,)
(9555,)
(9555,)
(9555,)
(9555,)


[GaussianMixture(n_components=2),
 GaussianMixture(n_components=2),
 GaussianMixture(n_components=2),
 GaussianMixture(n_components=2),
 GaussianMixture(n_components=2),
 GaussianMixture(n_components=2)]

In [12]:
# Grouping the data together to make it ready to be batched
dataset = tf.data.Dataset.from_tensor_slices((normalized_feature, y))
dataset = dataset.shuffle(buffer_size=1000).batch(batch_size)

print(len(dataset))

637


In [13]:
for i in dataset:
    data,la= i
print(la)

tf.Tensor([1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], shape=(15,), dtype=int32)


In [14]:
print(len(y))

9555


# CTGAN Model

#### Loss and Optiimizer

In [15]:
# Define Loss function for Classification between Real and Fake
bce_loss = tf.keras.losses.BinaryCrossentropy()

# Discriminator Loss
def discriminator_loss(real, fake):
	real_loss = bce_loss(tf.ones_like(real), real)
	fake_loss = bce_loss(tf.zeros_like(fake), fake)
	total_loss = real_loss + fake_loss
	return total_loss

# Generator Loss
def generator_loss(preds):
	return bce_loss(tf.ones_like(preds), preds)

# Optimiser for both Generator and Dsicriminator
d_optimizer=Adam(learning_rate=0.0002, beta_1 = 0.5)
g_optimizer=Adam(learning_rate=0.0002, beta_1 = 0.5)


Building the Generator Model


In [16]:
def build_generator():

	# label input
	in_label = tf.keras.layers.Input(shape=(1,))
	li = tf.keras.layers.Embedding(n_class, 50)(in_label)
	li = tf.keras.layers.Flatten()(li)

	# data generator input
	in_lat = tf.keras.layers.Input(shape=(noise_dim,))
	gen = tf.keras.layers.Dense(512)(in_lat)
	gen = tf.keras.layers.LeakyReLU(alpha=0.1)(gen)

	# merge data gen and label input
	merge = tf.keras.layers.Concatenate()([gen, li])

	gen = tf.keras.layers.Dense(1024)(merge) 
	gen = tf.keras.layers.LeakyReLU(alpha=0.2)(gen)

	gen = tf.keras.layers.Dense(512)(merge) 
	gen = tf.keras.layers.LeakyReLU(alpha=0.2)(gen)

	gen = tf.keras.layers.Dense(128)(gen) 
	gen = tf.keras.layers.LeakyReLU(alpha=0.2)(gen)

	# Activation function 'linear' for continous data type for the tabular data
	out_layer = tf.keras.layers.Dense(n_features, activation='linear')(gen) 
	model = Model([in_lat, in_label], out_layer)
	return model

g_model = build_generator()
g_model.summary()




Discriminator model

In [17]:
def build_discriminator():

  # label input
  in_label = tf.keras.layers.Input(shape=(1,))
  li = tf.keras.layers.Embedding(n_class, 50)(in_label)
  li = tf.keras.layers.Flatten()(li)

  # data input
  in_data = tf.keras.layers.Input(shape=( n_features,))
  merge = tf.keras.layers.Concatenate()([in_data, li])


  #We will combine input label with input data and supply as inputs to the model.
  fe = tf.keras.layers.Dense(1024)(merge)
  fe = tf.keras.layers.LeakyReLU(alpha=0.2)(fe)

  fe = tf.keras.layers.Dense(512)(fe)
  fe = tf.keras.layers.LeakyReLU(alpha=0.2)(fe)

  fe = tf.keras.layers.Dense(128)(fe)
  fe = tf.keras.layers.LeakyReLU(alpha=0.2)(fe)
  fe = tf.keras.layers.Dropout(0.2)(fe)

  out_layer = tf.keras.layers.Dense(1, activation='sigmoid')(fe)

  # define model the model.
  model = Model([in_data, in_label], out_layer)

  return model

d_model = build_discriminator()
d_model.summary()


#### Custmozing the training process into batches

In [18]:
# Compiles the train_step function into a callable TensorFlow graph
@tf.function
def train_step(DataBatch):

	real_data, real_labels= DataBatch

	# Sample random points in the latent space and concatenate the labels.
	random_latent_vectors = tf.random.normal(shape=(batch_size, noise_dim))
	
	generated_data = g_model([random_latent_vectors, real_labels])
	# Train the discriminator.
	with tf.GradientTape() as tape:
		pred_fake = d_model([generated_data, real_labels])
		pred_real = d_model([real_data, real_labels])

		d_loss = discriminator_loss(pred_real, pred_fake)

	grads = tape.gradient(d_loss, d_model.trainable_variables)
	d_optimizer.apply_gradients(zip(grads, d_model.trainable_variables))

	#-----------------------------------------------------------------#

	# Sample random points in the latent space.
	random_latent_vectors = tf.random.normal(shape=(batch_size, noise_dim))

	# Train the generator
	with tf.GradientTape() as tape:
		fake_data = g_model([random_latent_vectors, real_labels])
		predictions = d_model([fake_data, real_labels])
		g_loss = generator_loss(predictions)

	grads = tape.gradient(g_loss, g_model.trainable_variables)
	g_optimizer.apply_gradients(zip(grads, g_model.trainable_variables))

	return d_loss, g_loss
n=0
for i in dataset:
	n=i
train_step(n)

(<tf.Tensor: shape=(), dtype=float32, numpy=1.3884766>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.7024677>)

#### Training

In [19]:
def train(dataset, epochs=epoch_count):

	for epoch in range(epochs):
		print('Epoch: ', epoch)
		d_loss_list = []
		g_loss_list = []
		start = time.time()

		itern = 0
		for data_batch in tqdm(dataset):
			d_loss, g_loss = train_step(data_batch)
			d_loss_list.append(d_loss)
			g_loss_list.append(g_loss)
			itern=itern+1

		print (f'Epoch: {epoch} -- Generator Loss: {np.mean(g_loss_list)}, Discriminator Loss: {np.mean(d_loss_list)}\n')
		print (f'Took {time.time()-start} seconds. \n\n')


train(dataset, epochs=epoch_count)


Epoch:  0


100%|██████████| 637/637 [00:04<00:00, 135.89it/s]


Epoch: 0 -- Generator Loss: 0.912338376045227, Discriminator Loss: 1.3359968662261963

Took 4.698411226272583 seconds. 


Epoch:  1


100%|██████████| 637/637 [00:04<00:00, 145.03it/s]


Epoch: 1 -- Generator Loss: 0.875961422920227, Discriminator Loss: 1.2873330116271973

Took 4.39542818069458 seconds. 


Epoch:  2


100%|██████████| 637/637 [00:04<00:00, 143.49it/s]


Epoch: 2 -- Generator Loss: 0.8336252570152283, Discriminator Loss: 1.2901021242141724

Took 4.4437408447265625 seconds. 


Epoch:  3


100%|██████████| 637/637 [00:04<00:00, 144.16it/s]


Epoch: 3 -- Generator Loss: 0.8324925303459167, Discriminator Loss: 1.2845344543457031

Took 4.422781705856323 seconds. 


Epoch:  4


100%|██████████| 637/637 [00:04<00:00, 144.05it/s]


Epoch: 4 -- Generator Loss: 0.8243606686592102, Discriminator Loss: 1.2888591289520264

Took 4.423150539398193 seconds. 


Epoch:  5


100%|██████████| 637/637 [00:04<00:00, 139.80it/s]


Epoch: 5 -- Generator Loss: 0.8242735862731934, Discriminator Loss: 1.2924625873565674

Took 4.56160306930542 seconds. 


Epoch:  6


100%|██████████| 637/637 [00:04<00:00, 140.29it/s]


Epoch: 6 -- Generator Loss: 0.7989816069602966, Discriminator Loss: 1.2953606843948364

Took 4.544135093688965 seconds. 


Epoch:  7


100%|██████████| 637/637 [00:04<00:00, 140.55it/s]


Epoch: 7 -- Generator Loss: 0.8116792440414429, Discriminator Loss: 1.299068808555603

Took 4.537536859512329 seconds. 


Epoch:  8


100%|██████████| 637/637 [00:04<00:00, 138.64it/s]


Epoch: 8 -- Generator Loss: 0.8600181341171265, Discriminator Loss: 1.2825549840927124

Took 4.598526477813721 seconds. 


Epoch:  9


100%|██████████| 637/637 [00:04<00:00, 135.66it/s]

Epoch: 9 -- Generator Loss: 0.8307455778121948, Discriminator Loss: 1.288391351699829

Took 4.70055079460144 seconds. 







# Generating 5 unique data based on 'Failure Type'

In [20]:
#group each type together
dic={}
for i in df['Failure Type'].unique():
    dic.update({i:df[df['Failure Type']==i].index})
    
print(dic.keys())

dict_keys(['No Failure', 'Power Failure', 'Tool Wear Failure', 'Overstrain Failure', 'Heat Dissipation Failure'])


In [21]:
#taking the first Unique Value in each type
test=[]
for key,val in dic.items():
    test.append(val[0])
test

[0, 67, 74, 155, 3100]

In [22]:
#example on normal data and how it looks
print(x.iloc[1221])

Air temperature [K]         298.1
Process temperature [K]     309.5
Rotational speed [rpm]     1530.0
Torque [Nm]                  40.2
Tool wear [min]              65.0
Type                          2.0
Name: 1221, dtype: float64


#### Genertaing the data

In [23]:
# Random data in shape of noise_dim and number of unique classes
random_noise = tf.random.normal(shape=(n_class, noise_dim))
#unique class encoded, each number represents a class
y_test=[0,1,2,3,4]
# cast to numpy array
label = np.array(y_test)
gen_data = g_model.predict([random_noise, label])
result=[]
for i in range(len(gen_data)):
    temp=[]
    for n in range(n_features):
        if n==n_features-1:
            temp.append(abs(np.rint(inverse_normalize_with_gmm(gen_data[i][n],gmm[n],cluster_ids[test[i]])[0])))
        else:
            temp.append(inverse_normalize_with_gmm(gen_data[i][n],gmm[n],cluster_ids[test[i]])[0])
    result.append(temp) 
    print(temp)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[301.5822469974759, 308.41472777640655, 1667.1279386249043, 48.169073176972965, 100.05448581145029, 0.0]
[297.4076699765869, 309.7755153814836, 1493.3554221273714, 33.5156472751882, 163.5370282420747, 1.0]
[298.7060687296597, 311.0215840364104, 1320.7341046025413, 51.9542099017272, 166.65265668216955, 1.0]
[298.6587083603257, 311.0003075698344, 1309.5130649872515, 55.80076457859719, 186.2739583191283, 1.0]
[301.68184680712295, 308.6422981550853, 1715.8619182458576, 50.515168748211046, 61.20975243534232, 0.0]
