In [None]:
# import random
# random.seed(1234)
# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import tensorflow as tf
import math
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import seaborn as sns
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, LSTM, GRU, Bidirectional, Dense, Dropout, Layer, Concatenate, Masking, Attention, Flatten, MultiHeadAttention, BatchNormalization, RepeatVector, Reshape, Conv1D, Add
from tensorflow.keras.regularizers import L1L2, L1, L2
from tensorflow.keras.optimizers import Adam, RMSprop
import keras.callbacks

from tensorflow.keras import layers, Model

from keras import backend as K

In [None]:
random_state = 1

tf.config.experimental.enable_op_determinism()
tf.keras.utils.set_random_seed(random_state)

In [None]:
def Error(y_pred, y_real):
    y_pred = np.nan_to_num(y_pred, copy = True)
    y_real = np.nan_to_num(y_real, copy = True)
    temp = np.exp(-0.001 * y_real) * np.abs(y_real - y_pred)
    error = np.sum(temp)
    return error

In [None]:
def customLoss(y_pred, y_real):
    return tf.reduce_sum(tf.exp(-0.001 * y_real) * tf.abs(y_real - y_pred))

In [None]:
# Read in Data
sensor_data = pd.read_csv('../input/phm-2018/phm_data_challenge_2018/train/01_M02_DC_train.csv')
faults_data = pd.read_csv('../input/phm-2018/phm_data_challenge_2018/train/train_faults/01_M02_train_fault_data.csv')
ttf_data = pd.read_csv('../input/phm-2018/phm_data_challenge_2018/train/train_ttf/01_M02_DC_train.csv')

# sensor_data = sensor_data.drop(['Tool'], axis = 1)
# sensor_data = sensor_data.drop(['Lot'], axis = 1)


In [None]:
#Make sure the TTFs are all divisible by 4, and subtract mod 4 if not
ttf_data['TTF_FlowCool Pressure Dropped Below Limit'] = ttf_data['TTF_FlowCool Pressure Dropped Below Limit'].sub(ttf_data['TTF_FlowCool Pressure Dropped Below Limit'] % 4)
ttf_data['TTF_Flowcool Pressure Too High Check Flowcool Pump'] = ttf_data['TTF_Flowcool Pressure Too High Check Flowcool Pump'].sub(ttf_data['TTF_Flowcool Pressure Too High Check Flowcool Pump'] % 4)
ttf_data['TTF_Flowcool leak'] = ttf_data['TTF_Flowcool leak'].sub(ttf_data['TTF_Flowcool leak'] % 4)

In [None]:
#set failure time to mod 4 as well so the time index matches other datasets
faults_data['time'] = faults_data['time'].sub(faults_data['time'] % 4)

In [None]:
#join the ttf and training data together on time
df = pd.concat([sensor_data, ttf_data], axis=1, join = 'inner')
df.columns = ['time', 'Tool', 'stage', 'Lot', 'runnum', 'recipe', 'recipe_step','IONGAUGEPRESSURE', 'ETCHBEAMVOLTAGE', 'ETCHBEAMCURRENT','ETCHSUPPRESSORVOLTAGE', 'ETCHSUPPRESSORCURRENT', 'FLOWCOOLFLOWRATE','FLOWCOOLPRESSURE', 'ETCHGASCHANNEL1READBACK', 'ETCHPBNGASREADBACK','FIXTURETILTANGLE', 'ROTATIONSPEED', 'ACTUALROTATIONANGLE','FIXTURESHUTTERPOSITION', 'ETCHSOURCEUSAGE', 'ETCHAUXSOURCETIMER','ETCHAUX2SOURCETIMER', 'ACTUALSTEPDURATION', 'time_drop','TTF_FlowCool Pressure Dropped Below Limit','TTF_Flowcool Pressure Too High Check Flowcool Pump','TTF_Flowcool leak']
#drop excess "time" column by position
df = df.drop(df.columns[24],axis = 1)

In [None]:
#FSP of 1 is the only important data
#df = df.loc[df['FIXTURESHUTTERPOSITION'] == 1]
#drop NaNs that are present at the end of ttf (no more failures)
df = df.fillna(method = 'ffill')
# (subset=['TTF_FlowCool Pressure Dropped Below Limit','TTF_Flowcool Pressure Too High Check Flowcool Pump', 'TTF_Flowcool leak'], how='all')

In [None]:
#drop duplicates ignoring the time and TTF columns
df.drop_duplicates(subset=df.columns.difference(['time', 'TTF_FlowCool Pressure Dropped Below Limit', 'TTF_Flowcool Pressure Too High Check Flowcool Pump',	'TTF_Flowcool leak']))


In [None]:
#Make 3 dataframes, one for each fail type
df_f1 = df.drop(['TTF_Flowcool Pressure Too High Check Flowcool Pump', 'TTF_Flowcool leak', "time", "Tool", "ROTATIONSPEED"],axis = 1)
df_f1 = df_f1.dropna()
df_f2 = df.drop(['TTF_FlowCool Pressure Dropped Below Limit', 'TTF_Flowcool leak', "time", "Tool", "ROTATIONSPEED"],axis = 1)
df_f2 = df_f2.dropna()
df_f3 = df.drop(['TTF_FlowCool Pressure Dropped Below Limit', 'TTF_Flowcool Pressure Too High Check Flowcool Pump', "time", "Tool", "ROTATIONSPEED"],axis = 1)
df_f3 = df_f3.dropna()

stage, recipe, and recipe step are categorical and need to be encoded as such, but adds complexity. Wu dropped them. include OHE in Proprocess_data function

In [None]:
df_f3['TTF_Flowcool leak'].min()

In [None]:
df['TTF_Flowcool leak'].min()

In [None]:
#prod
def PreProcess_Data(df_f1, df_f2, df_f3, numKept, numFail):
    #only keep the data within 6000 seconds of a failure  to closer analyze the data
    df_f1 = df_f1.loc[df['TTF_FlowCool Pressure Dropped Below Limit'] < numKept]
    df_f2 = df_f2.loc[df['TTF_Flowcool Pressure Too High Check Flowcool Pump'] < numKept]
    df_f3 = df_f3.loc[df['TTF_Flowcool leak'] < numKept]

    # df_f1['IsFailure'] = np.where(df_f1['TTF_FlowCool Pressure Dropped Below Limit'] < numFail, True, False)
    # df_f2['IsFailure'] = np.where(df_f2['TTF_Flowcool Pressure Too High Check Flowcool Pump'] < numFail, True, False)
    # df_f3['IsFailure'] = np.where(df_f3['TTF_Flowcool leak'] < numFail, True, False)
    df_f1.loc[df_f1['TTF_FlowCool Pressure Dropped Below Limit'] < numFail, 'IsFailure'] = 1
    df_f1.loc[df_f1['TTF_FlowCool Pressure Dropped Below Limit'] >= numFail, 'IsFailure'] = 0

    df_f2.loc[df_f2['TTF_Flowcool Pressure Too High Check Flowcool Pump'] < numFail, 'IsFailure'] = 1
    df_f2.loc[df_f2['TTF_Flowcool Pressure Too High Check Flowcool Pump'] >= numFail, 'IsFailure'] = 0

    df_f3.loc[df_f3['TTF_Flowcool leak'] < numFail, 'IsFailure'] = 1
    df_f3.loc[df_f3['TTF_Flowcool leak'] >= numFail, 'IsFailure'] = 0

    return df_f1, df_f2, df_f3

In [None]:
df1, df2, df3 = PreProcess_Data(df_f1, df_f2, df_f3, 10800 , 1000)

In [None]:
df3['TTF_Flowcool leak'].min()

In [None]:
# from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
#get data with 5000 points from 0 as relevant and 250 point from 0 being fail data
#df1, df2, df3 = PreProcess_Data(df_f1, df_f2, df_f3, 86400, 20000)

#drop categorical columns
df1_temp = df1[['TTF_FlowCool Pressure Dropped Below Limit', 'IsFailure']]
df2_temp = df2[['TTF_Flowcool Pressure Too High Check Flowcool Pump', 'IsFailure']]
df3_temp = df3[['TTF_Flowcool leak', 'IsFailure']]
df1 = df1.drop(['stage', 'recipe', 'recipe_step', 'Lot', 'runnum', 'TTF_FlowCool Pressure Dropped Below Limit', 'IsFailure'], axis=1)
df2 = df2.drop(['stage', 'recipe', 'recipe_step', 'Lot', 'runnum', 'TTF_Flowcool Pressure Too High Check Flowcool Pump', 'IsFailure'], axis=1)
df3 = df3.drop(['stage', 'recipe', 'recipe_step', 'Lot', 'runnum', 'TTF_Flowcool leak', 'IsFailure'], axis=1)

In [None]:
#implement a standard scaler to normalize the dataset, but not scaling the target values.
scaler = preprocessing.MinMaxScaler()
# scaler.fit(df1)
# df1_scaled = scaler.transform(df1)
df1_scaled = scaler.fit_transform(df1)
df2_scaled = scaler.fit_transform(df2)
df3_scaled = scaler.fit_transform(df3)
df1_scaled = pd.DataFrame(df1_scaled, columns = df1.columns)
df2_scaled = pd.DataFrame(df2_scaled, columns = df2.columns)
df3_scaled = pd.DataFrame(df3_scaled, columns = df3.columns)

In [None]:
#reintroduce the target values
df1_index = pd.DataFrame(df1_temp.index.values)
df2_index = pd.DataFrame(df2_temp.index.values)
df3_index = pd.DataFrame(df3_temp.index.values)

df1_scaled = pd.concat([df1_scaled, df1_index], axis=1, join = 'inner')
df1_scaled = df1_scaled.set_index(0)
df1_scaled = pd.concat([df1_scaled, df1_temp ], axis=1, join = 'inner')

df2_scaled = pd.concat([df2_scaled, df2_index], axis=1, join = 'inner')
df2_scaled = df2_scaled.set_index(0)
df2_scaled = pd.concat([df2_scaled, df2_temp ], axis=1, join = 'inner')

df3_scaled = pd.concat([df3_scaled, df3_index], axis=1, join = 'inner')
df3_scaled = df3_scaled.set_index(0)
df3_scaled = pd.concat([df3_scaled, df3_temp ], axis=1, join = 'inner')

In [None]:
df1_scaled

In [None]:
df1_eda = df1_scaled.reset_index()
df1_eda_vis = df1_eda.drop(['index', 'TTF_FlowCool Pressure Dropped Below Limit'], axis=1)
df1_eda_tar = df1_eda['TTF_FlowCool Pressure Dropped Below Limit']

df2_eda = df2_scaled.reset_index()
df2_eda_vis = df2_eda.drop(['index', 'TTF_Flowcool Pressure Too High Check Flowcool Pump'], axis=1)
df2_eda_tar = df2_eda['TTF_Flowcool Pressure Too High Check Flowcool Pump']

df3_eda = df3_scaled.reset_index()
df3_eda_vis = df3_eda.drop(['index', 'TTF_Flowcool leak'], axis=1)
df3_eda_tar = df3_eda['TTF_Flowcool leak']

In [None]:
import warnings
warnings.filterwarnings('ignore')
import matplotlib.gridspec as gridspec
import seaborn as sns
#columns =df1_eda_vis.columns.drop('IsFailure')
columns = ['IONGAUGEPRESSURE', 'ETCHBEAMVOLTAGE', 'FLOWCOOLFLOWRATE',	'FLOWCOOLPRESSURE', 'ETCHGASCHANNEL1READBACK', 'ETCHPBNGASREADBACK', 
           'FIXTURETILTANGLE', 'ACTUALROTATIONANGLE', 'ETCHSOURCEUSAGE', 'ACTUALSTEPDURATION']
grid = gridspec.GridSpec(7, 3)

plt.figure(figsize=(20,30))

for n, col in enumerate(df1_eda_vis[columns]):
    ax = plt.subplot(grid[n])
    sns.distplot(df1_eda_vis[df1_eda_vis.IsFailure==0][col], bins = 50, color='g', label = 'Normal') 
    sns.distplot(df1_eda_vis[df1_eda_vis.IsFailure==1][col], bins = 50, color='r', label = 'Faliure')
    
    ax.set_ylabel('Density')
    ax.set_title(str(col))
    ax.set_xlabel('')
    ax.legend()
    
plt.show()

In [None]:
#columns =df2_eda_vis.columns.drop('IsFailure')
grid = gridspec.GridSpec(7, 3)

plt.figure(figsize=(20,30))

for n, col in enumerate(df2_eda_vis[columns]):
    ax = plt.subplot(grid[n])
    sns.distplot(df2_eda_vis[df2_eda_vis.IsFailure==0][col], bins = 50, color='g', label = 'Normal') 
    sns.distplot(df2_eda_vis[df2_eda_vis.IsFailure==1][col], bins = 50, color='r', label = 'Faliure')
    ax.set_ylabel('Density')
    ax.set_title(str(col))
    ax.set_xlabel('')
    ax.legend()
    
plt.show()

In [None]:
#columns =df3_eda_vis.columns.drop('IsFailure')

grid = gridspec.GridSpec(7, 3)

plt.figure(figsize=(20,30))

for n, col in enumerate(df3_eda_vis[columns]):
    ax = plt.subplot(grid[n])
    sns.distplot(df3_eda_vis[df3_eda_vis.IsFailure==0][col], bins = 50, color='g', label = 'Normal') 
    sns.distplot(df3_eda_vis[df3_eda_vis.IsFailure==1][col], bins = 50, color='r', label = 'Faliure')
    ax.set_ylabel('Density')
    ax.set_title(str(col))
    ax.set_xlabel('')
    ax.legend()
    
plt.show()

In [None]:
# cols1 = df1_eda_vis.columns.drop('IsFailure')
# for column in cols1:
#     plt.figure(figsize = (30, 3))
#     plt.plot(df1_eda.index, df1_eda_vis[column])
#     plt.title(column)
#     for ele in np.where(df1_eda_tar == 0)[0]:
#         plt.axvline(x = ele, color = 'red')

#     plt.show()

In [None]:
# cols2 = df2_eda_vis.columns.drop('IsFailure')
# for column in cols2:
#     plt.figure(figsize = (30, 3))
#     plt.plot(df2_eda.index, df2_eda_vis[column])
#     plt.title(column)
#     for ele in np.where(df2_eda_tar == 0)[0]:
#         plt.axvline(x = ele, color = 'red')

#     plt.show()

In [None]:
# cols3 = df3_eda_vis.columns.drop('IsFailure')
# for column in cols3:
#     plt.figure(figsize = (30, 3))
#     plt.plot(df3_eda_vis.index, df3_eda_vis[column])
#     plt.title(column)
#     for ele in np.where(df3_eda_tar == 0)[0]:
#         plt.axvline(x = ele, color = 'red')

#     plt.show()

In [None]:
df1_eda_vis.describe().apply(lambda s: s.apply('{0:.5f}'.format)).T

In [None]:
df2_eda_vis.describe().apply(lambda s: s.apply('{0:.5f}'.format)).T


In [None]:
df3_eda_vis.describe().apply(lambda s: s.apply('{0:.5f}'.format)).T


In [None]:
import seaborn as sns
sns.set()

plt.figure(figsize=(14,8))
sns.heatmap(df1_eda_vis.corr(), annot=True, cmap ='crest', fmt='.2f',linewidths=.5)
fig=plt.gcf()
fig.set_size_inches(15,15)
plt.show()

In [None]:
plt.figure(figsize=(14,8))
sns.heatmap(df2_eda_vis.corr(), annot=True, cmap ='crest', fmt='.2f',linewidths=.5)
fig=plt.gcf()
fig.set_size_inches(15,15)
plt.show()

In [None]:
plt.figure(figsize=(14,8))
sns.heatmap(df3_eda_vis.corr(), annot=True, cmap ='crest', fmt='.2f',linewidths=.5)
fig=plt.gcf()
fig.set_size_inches(15,15)
plt.show()

In [None]:
# sensor_data.index = range(0,len(sensor_data))
# ttf_data.index = range(0,len(ttf_data))

In [None]:
sensor_data = sensor_data.drop(['Tool'], axis = 1)
sensor_data = sensor_data.drop(['Lot'], axis = 1)

In [None]:
def cutoff(sensor_data, faults_data, ttf_data, column):
    # cut off the tail of the data set that with NaN ttf
    temp = faults_data[faults_data['fault_name'] == column]
    last_failure = temp['time'].values[-1]
    array = np.asarray(sensor_data['time'])
    closest_ind = (np.abs(array - last_failure)).argmin()
    if ((array[closest_ind] - last_failure) != np.abs(array[closest_ind] - last_failure)):
        ind = closest_ind + 1
    elif ((array[closest_ind] - last_failure) == 0):
        ind = closest_ind + 1
    else:
        ind = closest_ind
    sensor_data = sensor_data[:ind]
    ttf_data = ttf_data[:ind]
    faults_data = faults_data[faults_data['fault_name'] == column]
    return sensor_data, ttf_data, faults_data


In [None]:
sensor_fault1, ttf_fault1, faults_fault1 = cutoff(sensor_data, faults_data, ttf_data, 'FlowCool Pressure Dropped Below Limit')    

sensor_fault1 = sensor_fault1.fillna(method = 'ffill')
sensor_fault1['recipe'] = sensor_fault1['recipe'] + 200
label = ttf_fault1['TTF_FlowCool Pressure Dropped Below Limit']


In [None]:
# Capture the trends
temp = ttf_fault1.shift(1)
diff = ttf_fault1['TTF_FlowCool Pressure Dropped Below Limit'] - temp['TTF_FlowCool Pressure Dropped Below Limit']
idx = diff[diff > 0].index
trend_start_time = idx.values
trend_start_time = np.insert(trend_start_time, 0, 0)   


In [None]:
# Select data points
def Select(df, y, start_time, num):
    col = []
    y_result = []
    for t in range(1, len(start_time)):
        if start_time[t] - start_time[t-1] > num:
            col.append(df[start_time[t] - num: start_time[t]])
            y_result.extend(y[start_time[t] - num: start_time[t]])
        else:
            col.append(df[start_time[t-1]: start_time[t]])
            y_result.extend(y[start_time[t-1]: start_time[t]])
    df_result = pd.concat(col, axis=0)
    y_result = pd.Series(y_result)
    return df_result, y_result

In [None]:
df_select, y_select = Select(sensor_fault1, label, trend_start_time, 2000)

In [None]:
# Shift dataset
def series_to_supervised(data, y, n_in=50, dropnan=True):
    data_col = []
    y_col = []
    for i in range (0, n_in):
        data_col.append(data.shift(i))
        y_col.append(y.shift(i))
    result = pd.concat(data_col, axis = 1)
    label = pd.concat(y_col, axis = 1)
    if dropnan:
        result = result[n_in:]
        label = label[n_in:]
    return result, label


In [None]:
df_select.head()

In [None]:
df_select.describe().T

In [None]:
df, y = series_to_supervised(df_select, y_select, 10, True)

In [None]:
df.head()

In [None]:
df_scaler = preprocessing.MinMaxScaler(feature_range = (0,1))
y_scaler = preprocessing.MinMaxScaler(feature_range = (0,1))


feature = df_scaler.fit_transform(df)
label = y_scaler.fit_transform(y)


In [None]:
y_train, y_valid, y_test = label[0:16000], label[16000:], label
X_train, X_valid, x_test = feature[0:16000], feature[16000:], feature

In [None]:
X_train = X_train.reshape((X_train.shape[0], 10, 22))
X_valid = X_valid.reshape((X_valid.shape[0], 10, 22))


In [None]:
class CausalConv1D(layers.Layer):
    def __init__(self, filters, kernel_size, dilation_rate):
        super(CausalConv1D, self).__init__()
        self.conv = layers.Conv1D(filters, kernel_size, padding='causal', dilation_rate=dilation_rate)
    
    def call(self, x):
        return self.conv(x)

In [None]:
class ResidualBlock(layers.Layer):
    def __init__(self, filters, kernel_size, dilation_rates, dropout_rate):
        super(ResidualBlock, self).__init__()
        self.layers = []
        for dilation_rate in dilation_rates:
            self.layers.append(CausalConv1D(filters, kernel_size, dilation_rate))
            self.layers.append(layers.ReLU())
            self.layers.append(layers.Dropout(dropout_rate))
        self.conv1x1 = layers.Conv1D(filters, 1) if filters is not None else None
    
    def call(self, x):
        residual = x
        for layer in self.layers:
            x = layer(x)
        if self.conv1x1 is not None:
            residual = self.conv1x1(residual)
        return layers.ReLU()(x + residual)


In [None]:
class TCNBlock(layers.Layer):
    def __init__(self, num_channels, kernel_sizes, dilation_rates, dropout_rate):
        super(TCNBlock, self).__init__()
        self.residual_blocks = []
        for filters, kernel_size, dilation_rate in zip(num_channels, kernel_sizes, dilation_rates):
            self.residual_blocks.append(ResidualBlock(filters, kernel_size, dilation_rate, dropout_rate))
    
    def call(self, x):
        for block in self.residual_blocks:
            x = block(x)
        return x

    def compute_output_shape(self, input_shape):
        return input_shape


In [None]:

class SelfAttention(layers.Layer):
    def __init__(self, units):
        super(SelfAttention, self).__init__()
        self.units = units
    
    def build(self, input_shape):
        self.W_q = self.add_weight(shape=(input_shape[-1], self.units), initializer='random_normal', trainable=True)
        self.W_k = self.add_weight(shape=(input_shape[-1], self.units), initializer='random_normal', trainable=True)
        self.W_v = self.add_weight(shape=(input_shape[-1], self.units), initializer='random_normal', trainable=True)
        self.W_o = self.add_weight(shape=(self.units, input_shape[-1]), initializer='random_normal', trainable=True)
    
    def call(self, inputs):
        q = tf.tensordot(inputs, self.W_q, axes=1)
        k = tf.tensordot(inputs, self.W_k, axes=1)
        v = tf.tensordot(inputs, self.W_v, axes=1)
        
        score = tf.matmul(q, k, transpose_b=True) / tf.math.sqrt(tf.cast(self.units, tf.float32))
        attention_weights = tf.nn.softmax(score, axis=-1)
        
        context = tf.matmul(attention_weights, v)
        output = tf.tensordot(context, self.W_o, axes=1)
        return output

In [None]:
# Input layer
input_layer = Input(shape=(X_train.shape[1], X_train.shape[2]))

# TCN Path
p1 = TCNBlock(
    num_channels=[32, 16],
    kernel_sizes=[4, 8],
    dilation_rates=[[1, 2, 4], [1, 2]],
    dropout_rate=0.4
)(input_layer)

# LSTM Path
p2 = layers.LSTM(128, activation='tanh', return_sequences=True)(input_layer)
p2 = layers.LSTM(128, activation='tanh', return_sequences=True)(p2)

# Concatenation stage
x = layers.Concatenate()([p1, p2])

# Additional LSTM layer
x = layers.LSTM(64, activation='tanh', return_sequences=True)(x)

# Self-Attention Mechanism
x = SelfAttention(32)(x)

# Flatten
x = layers.Flatten()(x)

# Prediction Block
x = layers.Dense(16, activation='relu')(x)
x = layers.Dense(8, activation='relu')(x)

# output layer
output_layer = layers.Dense(1)(x)

# Build the model
model = Model(inputs=input_layer, outputs=output_layer)

In [None]:
opt = Adam(learning_rate=0.001)
model.compile(loss='mean_squared_error', metrics=["mse"], optimizer=opt)

In [None]:
model.summary()

In [None]:
from tensorflow.keras.utils import plot_model

plot_model(model,
           show_shapes=True, 
           show_layer_names=True, 
           expand_nested=True, 
           dpi=50,
           to_file='test.png')

In [None]:
# Callback to early stopping of Training
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    mode="min",
#     min_delta=0.000005,
    patience=30,
    verbose=1,
    restore_best_weights=True,
)


model_callbacks = [
    early_stopping,
]


In [None]:
history = model.fit(X_train, y_train, epochs=1000, batch_size=256, validation_data=(X_valid, y_valid), verbose=2, shuffle=False, callbacks= model_callbacks)