<a href="https://colab.research.google.com/github/Swastik200/30DaysOfDSA/blob/main/TCN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU
from tensorflow.keras.callbacks import TensorBoard, LearningRateScheduler
import time

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
!cp "/content/drive/MyDrive/CMAPSS.zip" "/content"
!unzip CMAPSS.zip

Archive:  CMAPSS.zip
  inflating: CMAPSS/readme.txt       
  inflating: CMAPSS/RUL_FD001.txt    
  inflating: CMAPSS/RUL_FD002.txt    
  inflating: CMAPSS/RUL_FD003.txt    
  inflating: CMAPSS/RUL_FD004.txt    
  inflating: CMAPSS/test_FD001.txt   
  inflating: CMAPSS/test_FD002.txt   
  inflating: CMAPSS/test_FD003.txt   
  inflating: CMAPSS/test_FD004.txt   
  inflating: CMAPSS/train_FD001.txt  
  inflating: CMAPSS/train_FD002.txt  
  inflating: CMAPSS/train_FD003.txt  
  inflating: CMAPSS/train_FD004.txt  
  inflating: CMAPSS/x.txt            


In [5]:
path = '/content/CMAPSS/'
col_names = ['unit_number', 'time_cycles', 'setting_1', 'setting_2', 'setting_3'] + ['sensor_{}'.format(i) for i in range(1, 22)]
df_train = pd.read_csv(path+'train_FD001.txt', sep='\s+', header=None, names=col_names)
df_test = pd.read_csv(path+'test_FD001.txt', sep='\s+', header=None, names=col_names)
y_test = pd.read_csv(path+'RUL_FD001.txt', sep='\s+', header=None, names=['RUL'])


In [6]:
def process_targets(data_length, early_rul):
    early_rul_duration = data_length - early_rul
    if early_rul_duration <= 0:
        return np.arange(data_length - 1, -1, -1)
    else:
        new_early_rul = early_rul * np.ones(early_rul_duration)
        origin_rul = np.arange(early_rul - 1, -1, -1)
        return np.append(new_early_rul, origin_rul)

In [7]:

def process_input_data_with_targets(input_data, target_data, window_length, shift):
    num_batches = int(np.floor((len(input_data) - window_length) / shift)) + 1
    num_features = input_data.shape[1]
    output = np.repeat(np.nan, repeats=num_batches * window_length * num_features)
    output_data = output.reshape(num_batches, window_length, num_features)

    if target_data is None:
        for batch in range(num_batches):
            output_data[batch, :, :] = input_data[(0 + shift * batch):(0 + shift * batch + window_length), :]
        return output_data
    else:
        output_targets = np.repeat(np.nan, repeats=num_batches)
        for batch in range(num_batches):
            window_start = shift * batch
            window_end = window_start + window_length

            output_data[batch, :, :] = input_data[window_start:window_end, :]
            output_targets[batch] = target_data[window_end - 1]
        return output_data, output_targets

In [8]:
def process_test_data(test_data_for_an_engine, window_length, shift, num_test_windows=1):
    max_num_test_batches = int(np.floor((len(test_data_for_an_engine) - window_length) / shift)) + 1

    if max_num_test_batches < num_test_windows:
        required_len = (max_num_test_batches - 1) * shift + window_length
        batched_test_data_for_an_engine = process_input_data_with_targets(
            test_data_for_an_engine[-required_len:, :],
            target_data=None,
            window_length=window_length,
            shift=shift
        )
        return batched_test_data_for_an_engine, max_num_test_batches
    else:
        required_len = (num_test_windows - 1) * shift + window_length
        batched_test_data_for_an_engine = process_input_data_with_targets(
            test_data_for_an_engine[-required_len:, :],
            target_data=None,
            window_length=window_length,
            shift=shift
        )
        return batched_test_data_for_an_engine, num_test_windows

In [9]:
train_data_first_column = df_train["unit_number"]
test_data_first_column = df_test["unit_number"]

In [10]:
scaler = MinMaxScaler()

train_data = scaler.fit_transform(df_train.drop(columns=['unit_number', 'setting_1', 'setting_2', 'sensor_1', 'sensor_5', 'sensor_6', 'sensor_10', 'sensor_16', 'sensor_18', 'sensor_19']))
test_data = scaler.transform(df_test.drop(columns=['unit_number', 'setting_1', 'setting_2', 'sensor_1', 'sensor_5', 'sensor_6', 'sensor_10', 'sensor_16', 'sensor_18', 'sensor_19']))

In [11]:
train_data = pd.DataFrame(data=np.c_[train_data_first_column, train_data])
test_data = pd.DataFrame(data=np.c_[test_data_first_column, test_data])

In [12]:
num_train_machines = len(train_data[0].unique())
num_test_machines = len(test_data[0].unique())
window_length = 30
shift = 1
early_rul = 125
num_test_windows = 5


In [13]:
processed_train_data = []
processed_train_targets = []


In [14]:
for i in np.arange(1, num_train_machines + 1):
    temp_train_data = train_data[train_data[0] == i].drop(columns=[0]).values
    temp_train_targets = process_targets(data_length=temp_train_data.shape[0], early_rul=early_rul)
    data_for_a_machine, targets_for_a_machine = process_input_data_with_targets(
        temp_train_data, temp_train_targets, window_length=window_length, shift=shift
    )
    processed_train_data.append(data_for_a_machine)
    processed_train_targets.append(targets_for_a_machine)

processed_train_data = np.concatenate(processed_train_data)
processed_train_targets = np.concatenate(processed_train_targets)


In [15]:
processed_test_data = []
num_test_windows_list = []

for i in np.arange(1, num_test_machines + 1):
    temp_test_data = test_data[test_data[0] == i].drop(columns=[0]).values
    test_data_for_an_engine, num_windows = process_test_data(
        temp_test_data, window_length=window_length, shift=shift, num_test_windows=num_test_windows
    )
    processed_test_data.append(test_data_for_an_engine)
    num_test_windows_list.append(num_windows)

processed_test_data = np.concatenate(processed_test_data)
true_rul = y_test.values

In [16]:
index = np.random.permutation(len(processed_train_targets))
processed_train_data, processed_train_targets = processed_train_data[index], processed_train_targets[index]

In [17]:
processed_train_data, processed_val_data, processed_train_targets, processed_val_targets = train_test_split(
    processed_train_data, processed_train_targets, test_size=0.2, random_state=666
)

In [18]:
def GRU_model():
    input_shape = (window_length, 15)
    model = Sequential([
        GRU(128, input_shape=input_shape, return_sequences=True, activation="tanh"),
        GRU(64, activation="tanh", return_sequences=True),
        GRU(32, activation="tanh"),
        Dense(96, activation="relu"),
        Dense(128, activation="relu"),
        Dense(1)
    ])
    model.compile(loss="mse", optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))
    return model


In [19]:
def scheduler(epoch):
    if epoch < 10:
        return 0.001
    else:
        return 0.0001

tf_callback = TensorBoard(log_dir="./logs")
callback = LearningRateScheduler(scheduler, verbose=0)

batch_size = 50
epochs = 20

In [21]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.layers import Reshape, Permute, concatenate

def TCN_model():
    input_shape = (window_length, 15)
    kernel_size = 3
    num_filters = 64
    dilations = [1, 2, 4, 8, 16]

    inputs = Input(shape=input_shape)
    x = inputs  # Fix the indentation
    skip_connections = []
    for dilation_rate in dilations:
        x = Conv1D(num_filters, kernel_size, padding='causal', dilation_rate=dilation_rate, activation='relu')(x)
        skip_connections.append(x)

    x = concatenate(skip_connections)
    x = Conv1D(num_filters, kernel_size=1, padding='same', activation='relu')(x)
    x = Conv1D(num_filters, kernel_size=1, padding='same', activation='relu')(x)
    x = Conv1D(1, kernel_size=1, padding='same')(x)
    x = Flatten()(x)

    outputs = Dense(1)(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='mse')

    return model

In [35]:
models = { "TCN": TCN_model()}

In [36]:
def evaluate(true_rul ,pred_rul, model):
  MAE = mean_absolute_error(true_rul, pred_rul)
  RMSE = np.sqrt(mean_squared_error(true_rul, pred_rul))
  r2 = r2_score(true_rul, pred_rul)
  print("Testing : R-square = ",r2,'MAE = ',MAE,"RMSE = ", RMSE)

In [37]:
history_train_loss = []
history_val_loss = []

for model_name, model in models.items():
    print("================", model_name, "================")
    start_time = time.time()

    # Slice the input data to match the expected shape
    processed_train_data_sliced = processed_train_data[:, :, :15]
    processed_val_data_sliced = processed_val_data[:, :, :15]

    # Compile the model with run_eagerly=True
    model.compile(optimizer='adam', loss='mse', run_eagerly=True)

    model_history = model.fit(processed_train_data_sliced, processed_train_targets, epochs=epochs,
                              validation_data=(processed_val_data_sliced, processed_val_targets),
                              callbacks=[tf_callback, callback],
                              batch_size=batch_size, verbose=0)
    rul_pred = model.predict(processed_test_data[:, :, :15], verbose=0).reshape(-1)
    end_time = time.time()

    preds_for_each_engine = np.split(rul_pred, np.cumsum(num_test_windows_list)[:-1])
    mean_pred_for_each_engine = [np.average(ruls_for_each_engine, weights=np.repeat(1 / num_windows, num_windows))
                                  for ruls_for_each_engine, num_windows in zip(preds_for_each_engine,
                                                                                  num_test_windows_list)]

    print('Training : loss = ', model_history.history['loss'][-1])
    print('Validation : loss = ', model_history.history['val_loss'][-1])

    # Ensure true_rul and mean_pred_for_each_engine have the same length
    true_rul_trimmed = true_rul[:len(mean_pred_for_each_engine)]
    evaluate(true_rul_trimmed, mean_pred_for_each_engine, model_name)

    history_train_loss.append(model_history.history['loss'])
    history_val_loss.append(model_history.history['val_loss'])
    print('Run Time :', int(end_time - start_time), 'sec')


Training : loss =  129.2417755126953
Validation : loss =  135.8905792236328
Testing : R-square =  0.884709985431825 MAE =  10.959075937390331 RMSE =  14.109954689556538
Run Time : 842 sec
