In [3]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load dataset
df = pd.read_csv("Dataset/1 Predictive Maintenance.csv")

# Load dataset
df = pd.read_csv('Dataset/Predictive Maintenance.csv')

# Drop duplicates
df.drop_duplicates(inplace=True)

# Handle missing values (impute with median for numerical, mode for categorical)
for col in df.columns:
    if df[col].dtype == 'object':
        df[col].fillna(df[col].mode()[0], inplace=True)
    else:
        df[col].fillna(df[col].median(), inplace=True)

# Convert relevant columns to numeric (if necessary)
numeric_columns = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Remove irrelevant observations (if necessary, based on business logic)
df = df[df['Process temperature [K]'] > 290]

# Handle outliers using the IQR method (only for numeric columns)
Q1 = df[numeric_columns].quantile(0.25)
Q3 = df[numeric_columns].quantile(0.75)
IQR = Q3 - Q1

# Align index to avoid misalignment errors
Q1, Q3 = Q1.align(Q3, axis=0, copy=False)

# Define acceptable range and filter rows
mask = ~((df[numeric_columns] < (Q1 - 1.5 * IQR)) | (df[numeric_columns] > (Q3 + 1.5 * IQR)))
df = df[mask.all(axis=1)]

# Apply Min-Max Normalization
scaler = MinMaxScaler()
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

# Save the cleaned and normalized dataset
df.to_csv("Dataset/2 Cleaned_Predictive_Maintenance.csv", index=False)

print("Data cleaning and normalization complete. File saved as 'Cleaned_Predictive_Maintenance.csv'.")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting value

KeyError: 'process temperature [K]'