In [1]:
# Imports
from pathlib import Path
import pandas as pd

In [2]:
def load_raw_data(txt_filename: str) -> pd.DataFrame:
    """ Loads the .txt data from the Data folder as pandas DataFrame 
        :argument: txt_filename - name of the text file with .txt extension
        :return: dataframe - pandas DataFrame
    """
    # Define the data folder path
    data_path = Path().absolute().parent.joinpath('data', 'raw')
    # Open with pandas without header and space as separator
    dataframe = pd.read_csv(data_path.joinpath(txt_filename), sep=' ', header=None).dropna(axis=1, how='all')
    # Define number of sensor columns
    sensors_number = len(dataframe.columns) - 5
    # Rename the columns to corrensponding value
    column_names = ['unit', 'cycle', 'altitude', 'mach', 'tra'] + [f'sensor_{i}' for i in range(1, sensors_number + 1)]
    dataframe.columns = column_names
    return dataframe

In [3]:
data_1 = load_raw_data('train_FD004.txt')

In [4]:
data_1.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
unit,61249.0,124.325181,71.99535,1.0,60.0,126.0,185.0,249.0
cycle,61249.0,134.311417,89.783389,1.0,62.0,123.0,191.0,543.0
altitude,61249.0,23.999823,14.780722,0.0,10.0046,25.0014,41.9981,42.008
mach,61249.0,0.571347,0.310703,0.0,0.2507,0.7,0.84,0.842
tra,61249.0,94.031576,14.251954,60.0,100.0,100.0,100.0,100.0
sensor_1,61249.0,472.882435,26.436832,445.0,445.0,462.54,491.19,518.67
sensor_2,61249.0,579.420056,37.342647,535.48,549.33,555.74,607.07,644.42
sensor_3,61249.0,1417.8966,106.167598,1242.67,1350.55,1367.68,1497.42,1613.0
sensor_4,61249.0,1201.915359,119.327591,1024.42,1119.49,1136.92,1302.62,1440.77
sensor_5,61249.0,8.031626,3.622872,3.91,3.91,7.05,10.52,14.62


In [6]:
data_1

Unnamed: 0,unit,cycle,altitude,mach,tra,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21
0,1,1,42.0049,0.8400,100.0,445.00,549.68,1343.43,1112.93,3.91,...,129.78,2387.99,8074.83,9.3335,0.02,330,2212,100.00,10.62,6.3670
1,1,2,20.0020,0.7002,100.0,491.19,606.07,1477.61,1237.50,9.35,...,312.59,2387.73,8046.13,9.1913,0.02,361,2324,100.00,24.37,14.6552
2,1,3,42.0038,0.8409,100.0,445.00,548.95,1343.12,1117.05,3.91,...,129.62,2387.97,8066.62,9.4007,0.02,329,2212,100.00,10.48,6.4213
3,1,4,42.0000,0.8400,100.0,445.00,548.70,1341.24,1118.03,3.91,...,129.80,2388.02,8076.05,9.3369,0.02,328,2212,100.00,10.54,6.4176
4,1,5,25.0063,0.6207,60.0,462.54,536.10,1255.23,1033.59,7.05,...,164.11,2028.08,7865.80,10.8366,0.02,305,1915,84.93,14.03,8.6754
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61244,249,251,9.9998,0.2500,100.0,489.05,605.33,1516.36,1315.28,10.52,...,380.16,2388.73,8185.69,8.4541,0.03,372,2319,100.00,29.11,17.5234
61245,249,252,0.0028,0.0015,100.0,518.67,643.42,1598.92,1426.77,14.62,...,535.02,2388.46,8185.47,8.2221,0.03,396,2388,100.00,39.38,23.7151
61246,249,253,0.0029,0.0000,100.0,518.67,643.68,1607.72,1430.56,14.62,...,535.41,2388.48,8193.94,8.2525,0.03,395,2388,100.00,39.78,23.8270
61247,249,254,35.0046,0.8400,100.0,449.44,555.77,1381.29,1148.18,5.48,...,187.92,2388.83,8125.64,9.0515,0.02,337,2223,100.00,15.26,9.0774
