In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import RandomOverSampler

# Load the cleaned dataset
df = pd.read_csv("Dataset/2 Cleaned_Predictive_Maintenance.csv")

# Define numeric columns
numeric_columns = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']

# Apply Min-Max Normalization
scaler = MinMaxScaler()
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

# Define features (X) and target (y)
X = df.drop(columns=['machine failure'])  # Features
y = df['machine failure']  # Target variable

# Apply Random Over-Sampling
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

# Combine resampled data into a new DataFrame
df_resampled = pd.DataFrame(X_resampled, columns=X.columns)
df_resampled['machine failure'] = y_resampled  # Add target column back

# Save the balanced dataset
df_resampled.to_csv("Dataset/3 Balanced_Predictive_Maintenance.csv", index=False)

print("Normalization and balancing complete. File saved as 'Balanced_Predictive_Maintenance.csv'.")
