In [6]:
import pandas as pd
import os

def process_engine_data(input_filename, output_directory="."):
   
    try:
        raw_data = pd.read_csv(input_filename, sep=' ', header=None)
        raw_data = raw_data.drop([26, 27], axis='columns')
        column_names = [
            'ID', 'Cycle', 'OpSet1', 'OpSet2', 'OpSet3', 
            'SensorMeasure1', 'SensorMeasure2', 'SensorMeasure3', 'SensorMeasure4', 
            'SensorMeasure5', 'SensorMeasure6', 'SensorMeasure7', 'SensorMeasure8', 
            'SensorMeasure9', 'SensorMeasure10', 'SensorMeasure11', 'SensorMeasure12', 
            'SensorMeasure13', 'SensorMeasure14', 'SensorMeasure15', 'SensorMeasure16', 
            'SensorMeasure17', 'SensorMeasure18', 'SensorMeasure19', 'SensorMeasure20', 
            'SensorMeasure21'
        ]
        raw_data.columns = column_names
        max_cycles = raw_data.groupby('ID')['Cycle'].max().reset_index()
        max_cycles.columns = ['ID', 'EOL']

        raw_data = pd.merge(raw_data, max_cycles, on='ID', how='left')
        raw_data['RUL'] = raw_data['EOL'] - raw_data['Cycle']

        df = raw_data.drop(columns=['EOL'])

        base_name = os.path.basename(input_filename)
        output_filename = f"processed_{os.path.splitext(base_name)[0]}.csv"
        output_path = os.path.join(output_directory, output_filename)
        
        df.to_csv(output_path, index=False)
        print(f"Successfully processed '{input_filename}' and saved to '{output_path}'")

    except FileNotFoundError:
        print(f"Error: The file '{input_filename}' was not found.")
    except Exception as e:
        print(f"An error occurred while processing '{input_filename}': {e}")


if __name__ == "__main__":
    files_to_process = [
        'datasets/CMaps/train_FD001.txt',
        'datasets/CMaps/train_FD002.txt',
        'datasets/CMaps/train_FD003.txt',
        'datasets/CMaps/train_FD004.txt'
    ]
    for file in files_to_process:
        process_engine_data(file)


Successfully processed 'datasets/CMaps/train_FD001.txt' and saved to '.\processed_train_FD001.csv'
Successfully processed 'datasets/CMaps/train_FD002.txt' and saved to '.\processed_train_FD002.csv'
Successfully processed 'datasets/CMaps/train_FD003.txt' and saved to '.\processed_train_FD003.csv'
Successfully processed 'datasets/CMaps/train_FD004.txt' and saved to '.\processed_train_FD004.csv'


In [24]:
import pandas as pd
import os
import re

def remove_specified_features(input_filename):
  
    try:
        
        print(f"--- Processing '{input_filename}' ---")
        df = pd.read_csv(input_filename)

        columns_to_drop = [
            'OpSet1', 'OpSet2', 'OpSet3', 'SensorMeasure1', 'SensorMeasure5',
            'SensorMeasure6', 'SensorMeasure9', 'SensorMeasure10', 'SensorMeasure14',
            'SensorMeasure16', 'SensorMeasure18', 'SensorMeasure19'
        ]
        
  
        existing_columns_to_drop = [col for col in columns_to_drop if col in df.columns]

        if existing_columns_to_drop:
            print(f"\nDropping {len(existing_columns_to_drop)} columns: {existing_columns_to_drop}")
            df_filtered = df.drop(columns=existing_columns_to_drop)
        else:
            print("\nNone of the specified columns to drop were found in the file.")
            df_filtered = df

        match = re.search(r'FD(\d+)', input_filename)
        if match:
            file_number = match.group(1)
            output_filename = f"Processed_train_{file_number}.csv"
        else:
            base_name = os.path.basename(input_filename)
            output_filename = f"Processed_{os.path.splitext(base_name)[0]}.csv"

        df_filtered.to_csv(output_filename, index=False)
        print(f"Successfully created '{output_filename}' with {df_filtered.shape[1]} columns.\n")

    except FileNotFoundError:
        print(f"Error: The file '{input_filename}' was not found.\n")
    except Exception as e:
        print(f"An error occurred while processing '{input_filename}': {e}\n")


if __name__ == "__main__":
    files_to_process = [
        'RUL_train_FD001.csv',
        'RUL_train_FD002.csv',
        'RUL_train_FD003.csv',
        'RUL_train_FD004.csv'
    ]

    for file in files_to_process:
        remove_specified_features(file)


--- Processing 'RUL_train_FD001.csv' ---

Dropping 12 columns: ['OpSet1', 'OpSet2', 'OpSet3', 'SensorMeasure1', 'SensorMeasure5', 'SensorMeasure6', 'SensorMeasure9', 'SensorMeasure10', 'SensorMeasure14', 'SensorMeasure16', 'SensorMeasure18', 'SensorMeasure19']
Successfully created 'Processed_train_001.csv' with 15 columns.

--- Processing 'RUL_train_FD002.csv' ---

Dropping 12 columns: ['OpSet1', 'OpSet2', 'OpSet3', 'SensorMeasure1', 'SensorMeasure5', 'SensorMeasure6', 'SensorMeasure9', 'SensorMeasure10', 'SensorMeasure14', 'SensorMeasure16', 'SensorMeasure18', 'SensorMeasure19']
Successfully created 'Processed_train_002.csv' with 15 columns.

--- Processing 'RUL_train_FD003.csv' ---

Dropping 12 columns: ['OpSet1', 'OpSet2', 'OpSet3', 'SensorMeasure1', 'SensorMeasure5', 'SensorMeasure6', 'SensorMeasure9', 'SensorMeasure10', 'SensorMeasure14', 'SensorMeasure16', 'SensorMeasure18', 'SensorMeasure19']
Successfully created 'Processed_train_003.csv' with 15 columns.

--- Processing 'RUL_tr

In [26]:
import pandas as pd
import numpy as np
import os

def process_test_data(file_number):
   
    try:
        print(f"--- Processing test file set FD00{file_number} ---")
        base_path = os.path.join('datasets', 'CMaps')
        test_filename = os.path.join(base_path, f'test_FD00{file_number}.txt')
        
        raw_data = pd.read_csv(test_filename, sep=' ', header=None)
        raw_data = raw_data.drop([26, 27], axis='columns')
        
        column_names = [
            'ID', 'Cycle', 'OpSet1', 'OpSet2', 'OpSet3', 'SensorMeasure1', 
            'SensorMeasure2', 'SensorMeasure3', 'SensorMeasure4', 'SensorMeasure5', 
            'SensorMeasure6', 'SensorMeasure7', 'SensorMeasure8', 'SensorMeasure9', 
            'SensorMeasure10', 'SensorMeasure11', 'SensorMeasure12', 'SensorMeasure13', 
            'SensorMeasure14', 'SensorMeasure15', 'SensorMeasure16', 'SensorMeasure17', 
            'SensorMeasure18', 'SensorMeasure19', 'SensorMeasure20', 'SensorMeasure21'
        ]
        raw_data.columns = column_names
        rul_filename = os.path.join(base_path, f'RUL_FD00{file_number}.txt')
        cycle_ran_after = pd.read_csv(rul_filename, sep=' ', header=None)
        cycle_ran_after = cycle_ran_after.drop([1], axis='columns')
        cycle_ran_after.columns = ['RUL_after_last_cycle']
        last_cycle = raw_data.groupby('ID')['Cycle'].max().reset_index()
        last_cycle.columns = ['ID', 'last_cycle']
        
        last_cycle['EOL'] = last_cycle['last_cycle'] + cycle_ran_after['RUL_after_last_cycle']
        raw_data = pd.merge(raw_data, last_cycle[['ID', 'EOL']], on='ID', how='left')
        raw_data['RUL'] = raw_data['EOL'] - raw_data['Cycle']

        columns_to_drop = [
            'OpSet1', 'OpSet2', 'OpSet3', 'SensorMeasure1', 'SensorMeasure5',
            'SensorMeasure6', 'SensorMeasure9', 'SensorMeasure10', 'SensorMeasure14',
            'SensorMeasure16', 'SensorMeasure18', 'SensorMeasure19'
        ]
        
        final_df = raw_data.drop(columns=['ID', 'EOL'])
        final_df = final_df.drop(columns=columns_to_drop)
        output_filename = f"Processed_Test_00{file_number}.csv"
        final_df.to_csv(output_filename, index=False)
        print(f"Successfully created '{output_filename}' with {final_df.shape[1]} columns.\n")

    except FileNotFoundError:
        print(f"Error: Make sure '{test_filename}' and '{rul_filename}' exist.\n")
    except Exception as e:
        print(f"An error occurred while processing file set {file_number}: {e}\n")


if __name__ == "__main__":
    for i in range(1, 5):
        process_test_data(i)


--- Processing test file set FD001 ---
Successfully created 'Processed_Test_001.csv' with 14 columns.

--- Processing test file set FD002 ---
Successfully created 'Processed_Test_002.csv' with 14 columns.

--- Processing test file set FD003 ---
Successfully created 'Processed_Test_003.csv' with 14 columns.

--- Processing test file set FD004 ---
Successfully created 'Processed_Test_004.csv' with 14 columns.

