In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

import matplotlib.pyplot as plt
import networkx as nx

In [12]:
# The column names for the dataset
# They are not provided in the original code, so we define them here.

COLS = [
    "unit", "cycle",
    "setting1", "setting2", "setting3",
    "T2", "T24", "T30", "T50", "P2", "P15", "P30",
    "Nf", "Nc", "EPR", "Ps30", "Phi", "NRf", "NRc",
    "BPR", "farB", "htBleed", "Nf_dmd", "PCNf_Rdmd", "W31", "W32"
]

In [None]:
# Importing the train dataset

df = pd.read_csv(
    "data/train_FD001.txt",
    delim_whitespace=True,   # gère les espaces multiples
    header=None,             # pas d’entêtes dans le .txt
    names=COLS,              # on assigne notre liste
    usecols=range(len(COLS)) # ignore les colonnes vides finales
)

df.head()

  df = pd.read_csv(


Unnamed: 0,unit,cycle,setting1,setting2,setting3,T2,T24,T30,T50,P2,...,Phi,NRf,NRc,BPR,farB,htBleed,Nf_dmd,PCNf_Rdmd,W31,W32
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044


In [14]:
# Importing the test dataset

df_test = pd.read_csv(
	"data/test_FD001.txt",
	delim_whitespace=True,
	header=None,
	names=COLS,
	usecols=range(len(COLS))
)

df_test.head()

  df_test = pd.read_csv(


Unnamed: 0,unit,cycle,setting1,setting2,setting3,T2,T24,T30,T50,P2,...,Phi,NRf,NRc,BPR,farB,htBleed,Nf_dmd,PCNf_Rdmd,W31,W32
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,...,521.72,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,...,522.16,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,...,521.97,2388.03,8130.1,8.4441,0.03,393,2388,100.0,39.08,23.4166
3,1,4,0.0042,0.0,100.0,518.67,642.44,1584.12,1406.42,14.62,...,521.38,2388.05,8132.9,8.3917,0.03,391,2388,100.0,39.0,23.3737
4,1,5,0.0014,0.0,100.0,518.67,642.51,1587.19,1401.92,14.62,...,522.15,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.413


In [15]:
def add_rul(df):
    temp = df.groupby('unit')['cycle'].max().reset_index()
    temp.columns = ['unit', 'max_cycle']
    df = df.merge(temp, on='unit')
    df['RUL'] = df['max_cycle'] - df['cycle']
    return df.drop('max_cycle', axis=1)

# Adding RUL to the train and test datasets
df = add_rul(df)
df_test = add_rul(df_test)

df.head()

Unnamed: 0,unit,cycle,setting1,setting2,setting3,T2,T24,T30,T50,P2,...,NRf,NRc,BPR,farB,htBleed,Nf_dmd,PCNf_Rdmd,W31,W32,RUL
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419,191
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236,190
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,189
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,188
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044,187
