In [4]:

!pip install -q pandas numpy scikit-learn


import pandas as pd
from sklearn.preprocessing import StandardScaler

url = "https://huggingface.co/datasets/nominal-io/nasa-turbofan-degradation/resolve/main/NASA_turbofan_train_FD001.csv"
df = pd.read_csv(url)


df.to_csv("turbofan_full.csv", index=False)
print(" Saved raw dataset as turbofan_full.csv")

for col in df.columns:
    if df[col].dtype == "object":
        df[col].fillna(df[col].mode()[0], inplace=True)
    else:
        df[col].fillna(df[col].median(), inplace=True)


exclude = ['engine', 'cycle']
num_cols = [c for c in df.select_dtypes(['int64', 'float64']).columns if c not in exclude]

scaler = StandardScaler()
df_scaled = df.copy()
df_scaled[num_cols] = scaler.fit_transform(df[num_cols])


df_scaled.to_csv("turbofan_preprocessed.csv", index=False)
print("Saved preprocessed dataset as turbofan_preprocessed.csv")


print(" Sample preprocessed data:")
print(df_scaled.head())



✅ Saved raw dataset as turbofan_full.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].median(), inplace=True)


✅ Saved preprocessed dataset as turbofan_preprocessed.csv
📊 Sample preprocessed data:
   Unnamed: 0  engine  cycle  setting_1  setting_2  setting_3  \
0   -1.731967       1      1  -0.315980  -1.372953        0.0   
1   -1.731799       1      2   0.872722  -1.031720        0.0   
2   -1.731631       1      3  -1.961874   1.015677        0.0   
3   -1.731463       1      4   0.324090  -0.008022        0.0   
4   -1.731295       1      5  -0.864611  -0.690488        0.0   

   (Fan inlet temperature) (◦R)  (LPC outlet temperature) (◦R)  \
0                           0.0                      -1.721725   
1                           0.0                      -1.061780   
2                           0.0                      -0.661813   
3                           0.0                      -0.661813   
4                           0.0                      -0.621816   

   (HPC outlet temperature) (◦R)  (LPT outlet temperature) (◦R)  ...  \
0                      -0.134255                      

In [5]:
from google.colab import drive
drive.mount('/content/drive')
# Save to a specific folder in your Drive
df_scaled.to_csv('/content/drive/MyDrive/turbofan_preprocessed.csv', index=False)


Mounted at /content/drive


In [6]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd

# URL of the original dataset CSV file
url = "https://huggingface.co/datasets/nominal-io/nasa-turbofan-degradation/resolve/main/NASA_turbofan_train_FD001.csv"

# Read the dataset from the URL
df = pd.read_csv(url)

# Path where you want to save it in your Google Drive
save_path = '/content/drive/MyDrive/nasa_turbofan_original.csv'

# Save the dataframe as CSV to Google Drive
df.to_csv(save_path, index=False)

print(f"✅ Dataset saved to Google Drive at: {save_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Dataset saved to Google Drive at: /content/drive/MyDrive/nasa_turbofan_original.csv
