In [1]:
import pandas as pd
import numpy as np
import warnings 
warnings.filterwarnings("ignore")

In [2]:
def remaining_cycle(df, number_of_units):
    rul_values = []
    for i in range(1, number_of_units + 1):
        max_cycle = df[df["Unit No"] == i].Cycle.max()
        rul = max_cycle - df[df["Unit No"] == i].Cycle
        rul_values.extend(rul.tolist())
    rul_df = pd.DataFrame({'RUL': rul_values})
    return rul_df

In [3]:
def handling_outliers(df):
    # using IQR technique 
    Q1 = df.quantile(0.25)
    Q3 = df.quantile(0.75)
    IQR = Q3 - Q1
    iqr_factor = 1.5

    lower_bound = Q1 - iqr_factor * IQR
    upper_bound = Q3 + iqr_factor * IQR
    
    for col in df.iloc[:, :-1].columns:
        df[col] = np.where(df[col] < lower_bound[col], lower_bound[col], df[col])
        df[col] = np.where(df[col] > upper_bound[col], upper_bound[col], df[col])
    return df

In [4]:
def preprocess_train_data(file_name, file_no, remaining_cycle, handling_outliers):
    df_name = f"df_{file_no}"
    
    df_name= pd.read_csv(file_name, sep=" ", header=None)
    
    df_name .drop([26, 27], inplace=True, axis=1)
    
    df_name.columns = ["Unit No", 'Cycle', 'OS1', 'OS2', 'OS3', 'SM1', 'SM2', 'SM3', 'SM4', 'SM5', 'SM6', 'SM7', 'SM8', 'SM9', 'SM10', 'SM11', 'SM12', 'SM13', 'SM14', 'SM15', 'SM16', 'SM17', 'SM18', 'SM19', 'SM20', 'SM21',]
    
    
    # creating target variable
    number_of_units = df_name["Unit No"].max()
    rul_df = remaining_cycle(df_name, number_of_units)
    
    # concatinating features and target
    important_features = df_name[['Cycle', 'SM2', 'SM3', 'SM4', 'SM7', 'SM8', 'SM9', 'SM11', 'SM12', 'SM13', 'SM15', 'SM17', 'SM20', 'SM21']]
    new_df = pd.concat([important_features, rul_df], axis=1)
    
    # handling outliers
    preprocessed_df = handling_outliers(new_df)
    preprocessed_df.head()
    
    
    return preprocessed_df.to_csv(f'Preprocessed_train_{file_no}.csv', index=False)

    
    

In [5]:
def preprocess_test_data(file_name, file_no, remaining_cycle):
    df_name = f"df_{file_no}"
    
    df_name= pd.read_csv(file_name, sep=" ", header=None)
    
    df_name .drop([26, 27], inplace=True, axis=1)
    
    df_name.columns = ["Unit No", 'Cycle', 'OS1', 'OS2', 'OS3', 'SM1', 'SM2', 'SM3', 'SM4', 'SM5', 'SM6', 'SM7', 'SM8', 'SM9', 'SM10', 'SM11', 'SM12', 'SM13', 'SM14', 'SM15', 'SM16', 'SM17', 'SM18', 'SM19', 'SM20', 'SM21',]
    
    
    # creating target variable
    number_of_units = df_name["Unit No"].max()
    rul_df = remaining_cycle(df_name, number_of_units)
    
    # concatinating features and target
    important_features = df_name[['Cycle', 'SM2', 'SM3', 'SM4', 'SM7', 'SM8', 'SM9', 'SM11', 'SM12', 'SM13', 'SM15', 'SM17', 'SM20', 'SM21']]
    new_df = pd.concat([important_features, rul_df], axis=1)
    

    
    return new_df.to_csv(f'Preprocessed_test_{file_no}.csv', index=False)

    
    

In [6]:
preprocess_train_data('train_FD001.txt', 1, remaining_cycle, handling_outliers)
preprocess_train_data('train_FD002.txt', 2, remaining_cycle, handling_outliers)
preprocess_train_data('train_FD003.txt', 3, remaining_cycle, handling_outliers)
preprocess_train_data('train_FD004.txt', 4, remaining_cycle, handling_outliers)


In [7]:
preprocess_test_data('test_FD001.txt', 1, remaining_cycle)
preprocess_test_data('test_FD002.txt', 2, remaining_cycle)
preprocess_test_data('test_FD003.txt', 3, remaining_cycle)
preprocess_test_data('test_FD004.txt', 4, remaining_cycle)