In [None]:
import zipfile
import os

zip_path = "/content/NASA_Turbofan_Jet_Engine_Data_Set.zip"  # change name if needed
extract_path = "/content/cmapss"

os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

os.listdir(extract_path)

['CMaps']

In [None]:
import pandas as pd

# Column names (NASA standard)
columns = (
    ["unit", "cycle"] +
    [f"op_setting_{i}" for i in range(1, 4)] +
    [f"sensor_{i}" for i in range(1, 22)]
)

df = pd.read_csv(
    "/content/cmapss/CMaps/train_FD001.txt",
    sep=" ",
    header=None
)

df = df.iloc[:, :len(columns)]
df.columns = columns

df.head()

Unnamed: 0,unit,cycle,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044


In [None]:
# Calculate max cycle per engine
max_cycle = df.groupby("unit")["cycle"].max().reset_index()
max_cycle.columns = ["unit", "max_cycle"]

# Merge back
df = df.merge(max_cycle, on="unit")

# RUL = max_cycle - current_cycle
df["RUL"] = df["max_cycle"] - df["cycle"]

df[["unit", "cycle", "max_cycle", "RUL"]].head()

Unnamed: 0,unit,cycle,max_cycle,RUL
0,1,1,192,191
1,1,2,192,190
2,1,3,192,189
3,1,4,192,188
4,1,5,192,187


In [None]:
from sklearn.ensemble import RandomForestRegressor

features = [col for col in df.columns if col.startswith("sensor_")]
X = df[features]
y = df["RUL"]

model = RandomForestRegressor(
    n_estimators=100,
    random_state=42,
    n_jobs=-1
)

model.fit(X, y)

In [None]:
sample = X.iloc[0:1]
pred_rul = model.predict(sample)

print("Predicted RUL (cycles):", int(pred_rul[0]))

Predicted RUL (cycles): 182


In [None]:
import joblib

joblib.dump(model, "rul_model.pkl")

['rul_model.pkl']