Failure Detection Feature Engineering

In [None]:
import pandas as pd

# Define file path
file_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Final_Merged_Dataset.csv"

# Load dataset
df = pd.read_csv(file_path)

# Convert timestamp column to datetime for sorting
df['ResultEntryTimestamp3'] = pd.to_datetime(df['ResultEntryTimestamp3'], errors='coerce')

# Step 1: Compute Bore Size Change Per Station
df = df.sort_values(by=['StationName', 'ResultEntryTimestamp3'])  # Sort for correct diff calculation
df['BoreSizeChange'] = df.groupby('StationName')['ResultDataValue3'].diff().abs()  # Absolute difference

# Step 2: Define a Fixed Tool Change Threshold (Adjustable)
tool_change_threshold = 0.05  # Change in bore size that indicates a tool change

# Step 3: Identify Tool Changes
df['ToolChange'] = (df['BoreSizeChange'] > tool_change_threshold).astype(int)

# Step 4: Calculate Time Since Last Tool Change Per Station
df['TimeSinceLastChange'] = df.groupby('StationName')['ResultEntryTimestamp3'].diff().dt.total_seconds() / 3600  # Time in hours

# Step 5: Identify Failures from Bore Size Changes (Threshold + Out of Tolerance)
df['OutOfTolerance'] = ((df['ResultDataValue3'] < df['MinValue']) | 
                        (df['ResultDataValue3'] > df['MaxValue'])).astype(int)

df['Failure_BoreSizeChange'] = ((df['BoreSizeChange'] > tool_change_threshold) & 
                                (df['OutOfTolerance'] == 1)).astype(int)

# Step 6: Create a Failure Indicator from Weld Reclaim
df['Failure_WeldReclaim'] = df['WeldReclaim'].notna().astype(int)  # 1 if WeldReclaim is not NaN

# Step 7: Define Final Failure Label (If Bore Size Failure OR Weld Reclaim Failure)
df['Failure_Label'] = ((df['Failure_BoreSizeChange'] == 1) | (df['Failure_WeldReclaim'] == 1)).astype(int)

# Save the dataset with tool change & failure labels
updated_file_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Final_Merged_Dataset_with_ToolChanges_and_Failures.csv"
df.to_csv(updated_file_path, index=False)

# Display success message
print(f"Dataset with tool change & failure labels saved to: {updated_file_path}")

# Display first few rows for verification
print(df.head(10))


Dataset with tool change & failure labels saved to: G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Final_Merged_Dataset_with_ToolChanges_and_Failures.csv
       AssemblyID NamePostfix  ... Failure_WeldReclaim Failure_Label
183705   D0678557    Doghouse  ...                   0             0
183706   D0678557    Doghouse  ...                   0             0
183707   D0678557    Doghouse  ...                   0             0
183651   D0678556    Doghouse  ...                   0             0
183652   D0678556    Doghouse  ...                   0             0
183653   D0678556    Doghouse  ...                   0             0
183571   D0678561    Doghouse  ...                   0             0
183572   D0678561    Doghouse  ...                   0             0
183573   D0678561    Doghouse  ...                   0             0
164872   D0678738    Doghouse  ...                   0             0

[10 rows x 27 columns]
