In [None]:
###################################################################
#NASA Turbofan Engine Degradation reliability and failure analysis#
###################################################################
#
# File structure and SQL update

In [42]:
import pandas as pd
from sqlalchemy import create_engine, text

In [43]:
# create connection
engine = create_engine("postgresql://postgres:Project1-NASA@localhost:5432/Project1-NASA")

engine.connect()

<sqlalchemy.engine.base.Connection at 0x255abfb7850>

In [44]:
# create column names per readme.txt
column_names = (
    ["engine_id", "cycle"] + [f"op_setting_{num}" for num in range(1,4)] + [f"sensor_{num}" for num in range(1,22)]
)

df = pd.DataFrame()

# create a dataframe and populate with first file of data (r"\s+" is a regular expression for any amount of white space)
# The code below has been commented due to it just being test 
# df = pd.read_csv("train_FD001.txt", sep=r"\s+", header=None, names=column_names)

# df.head(5)

In [45]:
# Add additional columns to determine file once combined
df["dataset_id"] = 'FD001'
df["dataset_split"] = "train"

In [46]:
# Send to sql
df.to_sql("engine_readings", engine, if_exists='append', index=False)

0

In [47]:
# Repeat for all files 
files = [
    ('test_FD001.txt', "FD001", 'test'),
    ('test_FD002.txt', "FD002", 'test'),
    ('test_FD003.txt', "FD003", 'test'),
    ('test_FD004.txt', "FD004", 'test'),
    ('train_FD001.txt', "FD001", 'train'),
    ('train_FD002.txt', "FD002", 'train'),
    ('train_FD003.txt', "FD003", 'train'),
    ('train_FD004.txt', "FD004", 'train'),
]

for file_name, id, split in files:
    df = pd.read_csv(file_name, sep=r"\s+", header=None, names=column_names)
    
    df["dataset_id"] = id
    df["dataset_split"] = split

    df.to_sql("engine_readings", engine, if_exists='append', index=False)

    print(f"Loaded {file_name}")

Loaded test_FD001.txt
Loaded test_FD002.txt
Loaded test_FD003.txt
Loaded test_FD004.txt
Loaded train_FD001.txt
Loaded train_FD002.txt
Loaded train_FD003.txt
Loaded train_FD004.txt


In [48]:
# connecting to postgresql and retrieving the data that has been stored to it
with engine.connect() as connection:
    result = connection.execute(text('SELECT * FROM engine_readings LIMIT 10'))
    for row in result:
        print(row)

(1, 1, 0.0023, 0.0003, 100.0, 518.67, 643.02, 1585.29, 1398.21, 14.62, 21.61, 553.9, 2388.04, 9050.17, 1.3, 47.2, 521.72, 2388.03, 8125.55, 8.4052, 0.03, 392.0, 2388.0, 100.0, 38.86, 23.3735, 'FD001', 'test')
(1, 2, -0.0027, -0.0003, 100.0, 518.67, 641.71, 1588.45, 1395.42, 14.62, 21.61, 554.85, 2388.01, 9054.42, 1.3, 47.5, 522.16, 2388.06, 8139.62, 8.3803, 0.03, 393.0, 2388.0, 100.0, 39.02, 23.3916, 'FD001', 'test')
(1, 3, 0.0003, 0.0001, 100.0, 518.67, 642.46, 1586.94, 1401.34, 14.62, 21.61, 554.11, 2388.05, 9056.96, 1.3, 47.5, 521.97, 2388.03, 8130.1, 8.4441, 0.03, 393.0, 2388.0, 100.0, 39.08, 23.4166, 'FD001', 'test')
(1, 4, 0.0042, 0.0, 100.0, 518.67, 642.44, 1584.12, 1406.42, 14.62, 21.61, 554.07, 2388.03, 9045.29, 1.3, 47.28, 521.38, 2388.05, 8132.9, 8.3917, 0.03, 391.0, 2388.0, 100.0, 39.0, 23.3737, 'FD001', 'test')
(1, 5, 0.0014, 0.0, 100.0, 518.67, 642.51, 1587.19, 1401.92, 14.62, 21.61, 554.16, 2388.01, 9044.55, 1.3, 47.31, 522.15, 2388.03, 8129.54, 8.4031, 0.03, 390.0, 2388

In [49]:
# group and count to check if all the data has been updated
with engine.connect() as connection:
    result = connection.execute(text('''SELECT dataset_id, dataset_split, COUNT(*) AS row_count FROM engine_readings 
                                     GROUP BY dataset_id, dataset_split 
                                     ORDER BY dataset_id, dataset_split'''))
    for row in result:
        print(row)

('FD001', 'test', 13096)
('FD001', 'train', 20631)
('FD002', 'test', 33991)
('FD002', 'train', 53759)
('FD003', 'test', 16596)
('FD003', 'train', 24720)
('FD004', 'test', 41214)
('FD004', 'train', 61249)
