## FILE TO CONVERT THE NASA DATASET TO CSVs

The dataset originally comes from: https://www.kaggle.com/datasets/behrad3d/nasa-cmaps

In [3]:
# Importing necessary libraries.
import pandas as pd
import numpy as np
import os
from pathlib import Path


In [16]:
# Load the files
url = os.path.join(os.path.dirname(os.getcwd()), "nasa_cmaps")
print(url)
# Label the columns
column_names = [
    'engine_id', 'cycle',
    'op_setting_1', 'op_setting_2', 'op_setting_3',
    'T2', 'T24', 'T30', 'T50',
    'P2', 'P15', 'P30',
    'Nf', 'Nc', 'epr', 'Ps30', 'phi',
    'NRf', 'NRc', 'BPR', 'farB', 'htBleed',
    'Nf_dmd', 'PCNfR_dmd', 'W31', 'W32'
]

# Get file paths
train_data = ["FD001_train.txt", "FD002_train.txt","FD003_train.txt", "FD004_train.txt"]
test_data = ["FD001_test.txt", "FD002_test.txt","FD003_test.txt", "FD004_test.txt"]
RUL_data = ["FD001_RUL.txt","FD002_RUL.txt","FD003_RUL.txt","FD004_RUL.txt"]

# Iterate through the train_data to label all of them and save them as csv.
def label_train_data(train_data, columns = column_names, train_df_dict=None):
    #Initialize the dictionary
    train_df_dict = {}
    for data in train_data:
        # Generate the file paths
        file_path = os.path.join(url,data)
        # We only want to access the stem name if we want to access the full name plus the extension we would have os.path.basename
        base_name = Path(file_path).stem
        # Read the file names to a temporary dataframe and store it in a dictionary.
        temporary_dataframe = pd.read_csv(file_path, sep=r'\s+', names = column_names)
        train_df_dict[base_name]= temporary_dataframe
        # Saving the files as csv
    for key,value in train_df_dict.items():
        file_name = key + ".csv"
        file_path = os.path.join(url, file_name)
        print(file_path)
        value.to_csv(file_path, sep=',', index = False)
    return train_df_dict  #Keeping the dataframes for later use.
train_data = label_train_data(train_data)

# Iterate through test data to label all of them and save them as csv
def label_test_data(test_data, columns = column_names , test_df_dict=None):
    test_df_dict = {}
    #Iterate through to get the file path of each
    for data in test_data:
        file_path = os.path.join(url, data)
        # Get the stem name of each
        basename = Path(file_path).stem
        # Read it to a df
        temporary_dataframe = pd.read_csv(file_path, sep=r'\s+', names = column_names)
        # Save the key,value pair into a dictionary
        test_df_dict[basename] = temporary_dataframe
    # Iterate through the dictionary to save the dfs as csv
    for key, value in test_df_dict.items():
        filename = key + ".csv"
        file_path = os.path.join(url, filename)
        print(file_path)
        #save the dataframe to csv
        value.to_csv(file_path)

    return test_df_dict
test_data = label_test_data(test_data)

# Iterate through RUL and label it
def label_rul_data(RUL_data , column="remaining_useful_life", rul_df_dict=None):
    rul_df_dict = {}
    for data in RUL_data:
        file_path = os.path.join(url, data)
        basename = Path(file_path).stem
        temp_dataframe = pd.read_csv(file_path, sep=r'\s+', names=column_names)
        rul_df_dict[basename] = temp_dataframe
    #Iterate through the dict to save it as csv
    for key, value in rul_df_dict.items():
        filename = key + ".csv"
        file_path = os.path.join(url,filename)
        print(file_path)
        value.to_csv(file_path, sep=',', index = False)
    return rul_df_dict
rul_data = label_rul_data(RUL_data)

/home/local-host/PycharmProjects/Turbofan-engine-rul/notebooks/data/raw/nasa_cmaps
/home/local-host/PycharmProjects/Turbofan-engine-rul/notebooks/data/raw/nasa_cmaps/FD001_train.csv
/home/local-host/PycharmProjects/Turbofan-engine-rul/notebooks/data/raw/nasa_cmaps/FD002_train.csv
/home/local-host/PycharmProjects/Turbofan-engine-rul/notebooks/data/raw/nasa_cmaps/FD003_train.csv
/home/local-host/PycharmProjects/Turbofan-engine-rul/notebooks/data/raw/nasa_cmaps/FD004_train.csv
/home/local-host/PycharmProjects/Turbofan-engine-rul/notebooks/data/raw/nasa_cmaps/FD001_test.csv
/home/local-host/PycharmProjects/Turbofan-engine-rul/notebooks/data/raw/nasa_cmaps/FD002_test.csv
/home/local-host/PycharmProjects/Turbofan-engine-rul/notebooks/data/raw/nasa_cmaps/FD003_test.csv
/home/local-host/PycharmProjects/Turbofan-engine-rul/notebooks/data/raw/nasa_cmaps/FD004_test.csv
/home/local-host/PycharmProjects/Turbofan-engine-rul/notebooks/data/raw/nasa_cmaps/FD001_RUL.csv
/home/local-host/PycharmProjects