In [3]:
import pandas as pd
import numpy as np

In [4]:
# Load the dataset
df = pd.read_csv('machine failure.csv')
print(df.head())

   UDI Product ID Type  Air temperature [K]  Process temperature [K]  \
0    1     M14860    M                298.1                    308.6   
1    2     L47181    L                298.2                    308.7   
2    3     L47182    L                298.1                    308.5   
3    4     L47183    L                298.2                    308.6   
4    5     L47184    L                298.2                    308.7   

   Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Machine failure  TWF  \
0                    1551         42.8                0                0    0   
1                    1408         46.3                3                0    0   
2                    1498         49.4                5                0    0   
3                    1433         39.5                7                0    0   
4                    1408         40.0                9                0    0   

   HDF  PWF  OSF  RNF  
0    0    0    0    0  
1    0    0    0    0  
2    0  

In [5]:
# Check for missing values
missing_values = df.isnull().sum()
print("Missing values in each column:")
print(missing_values)

Missing values in each column:
UDI                        0
Product ID                 0
Type                       0
Air temperature [K]        0
Process temperature [K]    0
Rotational speed [rpm]     0
Torque [Nm]                0
Tool wear [min]            0
Machine failure            0
TWF                        0
HDF                        0
PWF                        0
OSF                        0
RNF                        0
dtype: int64


In [6]:
# Create time-based feature
df['time_index'] = np.arange(len(df))

In [7]:
# Create rolling statistics features
df['rolling_avg_temp'] = df['Air temperature [K]'].rolling(window=5).mean()
df['rolling_avg_rot_speed'] = df['Rotational speed [rpm]'].rolling(window=5).mean()
df['rolling_avg_torque'] = df['Torque [Nm]'].rolling(window=5).mean()

In [8]:
# Create Exponential Moving Average (EMA) features
df['ema_temp'] = df['Air temperature [K]'].ewm(span=5, adjust=False).mean()
df['ema_rot_speed'] = df['Rotational speed [rpm]'].ewm(span=5, adjust=False).mean()
df['ema_torque'] = df['Torque [Nm]'].ewm(span=5, adjust=False).mean()

In [9]:
# Create lagged features for key variables
df['lag_temp'] = df['Air temperature [K]'].shift(1)
df['lag_rot_speed'] = df['Rotational speed [rpm]'].shift(1)
df['lag_torque'] = df['Torque [Nm]'].shift(1)

In [10]:
# Create interaction feature
df['temp_torque_interaction'] = df['Air temperature [K]'] * df['Torque [Nm]']

In [11]:
# Create cumulative sum (cumsum) features
df['cumsum_temp'] = df['Air temperature [K]'].cumsum()
df['cumsum_rot_speed'] = df['Rotational speed [rpm]'].cumsum()
df['cumsum_torque'] = df['Torque [Nm]'].cumsum()

In [12]:
# Create rate of change (ROC) features
df['roc_temp'] = df['Air temperature [K]'].pct_change()
df['roc_rot_speed'] = df['Rotational speed [rpm]'].pct_change()
df['roc_torque'] = df['Torque [Nm]'].pct_change()

In [13]:
# Create outlier detection feature
df['temp_outlier'] = (df['Air temperature [K]'] > df['Air temperature [K]'].mean() + 3*df['Air temperature [K]'].std()) | \
                     (df['Air temperature [K]'] < df['Air temperature [K]'].mean() - 3*df['Air temperature [K]'].std())

In [14]:
# Create polynomial features
df['temp_squared'] = df['Air temperature [K]'] ** 2
df['rot_speed_squared'] = df['Rotational speed [rpm]'] ** 2
df['torque_squared'] = df['Torque [Nm]'] ** 2

In [15]:
# Create binned features
df['binned_temp'] = pd.qcut(df['Air temperature [K]'], q=4, labels=False)
df['binned_rot_speed'] = pd.qcut(df['Rotational speed [rpm]'], q=4, labels=False)
df['binned_torque'] = pd.qcut(df['Torque [Nm]'], q=4, labels=False)

In [16]:
# Create failure rate features
df['failure_rate'] = df['Machine failure'].rolling(window=10).mean().shift(1)

In [17]:
# Create time since last failure feature
df['time_since_last_failure'] = df['Machine failure'].cumsum().shift(1).fillna(0)

In [18]:
# Create categorical encoding features
df = pd.get_dummies(df, columns=['Type'], drop_first=True)

In [19]:
# Display the updated DataFrame with new features
print("Updated DataFrame with new features:")
print(df.head())

Updated DataFrame with new features:
   UDI Product ID  Air temperature [K]  Process temperature [K]  \
0    1     M14860                298.1                    308.6   
1    2     L47181                298.2                    308.7   
2    3     L47182                298.1                    308.5   
3    4     L47183                298.2                    308.6   
4    5     L47184                298.2                    308.7   

   Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Machine failure  TWF  \
0                    1551         42.8                0                0    0   
1                    1408         46.3                3                0    0   
2                    1498         49.4                5                0    0   
3                    1433         39.5                7                0    0   
4                    1408         40.0                9                0    0   

   HDF  ...  temp_squared  rot_speed_squared  torque_squared  binned_temp

In [20]:
# Check for any missing values after feature engineering
missing_values_after = df.isnull().sum()
print("Missing values after feature engineering:")
print(missing_values_after)


Missing values after feature engineering:
UDI                         0
Product ID                  0
Air temperature [K]         0
Process temperature [K]     0
Rotational speed [rpm]      0
Torque [Nm]                 0
Tool wear [min]             0
Machine failure             0
TWF                         0
HDF                         0
PWF                         0
OSF                         0
RNF                         0
time_index                  0
rolling_avg_temp            4
rolling_avg_rot_speed       4
rolling_avg_torque          4
ema_temp                    0
ema_rot_speed               0
ema_torque                  0
lag_temp                    1
lag_rot_speed               1
lag_torque                  1
temp_torque_interaction     0
cumsum_temp                 0
cumsum_rot_speed            0
cumsum_torque               0
roc_temp                    1
roc_rot_speed               1
roc_torque                  1
temp_outlier                0
temp_squared                

In [22]:
# Save the updated DataFrame to a new CSV file
df.to_csv('machine_failure_engineered_v2.csv', index=False)