In [1]:
import pandas as pd
import numpy as np

np.random.seed(42)

n_samples = 5000  # number of samples
machine_ids = [f"Machine_{i}" for i in range(1, 21)]  # 20 unique machine IDs

# generate synthetic features
machine_id = np.random.choice(machine_ids, n_samples)
temperature = np.random.uniform(50, 100, n_samples)  # temperature in range 50-100 C
run_time = np.random.uniform(10, 200, n_samples)  # run time in range 10-200 hours

# generate target variable: Downtime_Flag (1 for downtime, 0 otherwise)
downtime_flag = (temperature > 80) & (run_time > 150)  # downtime likely for high temp & long runtime
downtime_flag = downtime_flag.astype(int)  # convert to 0/1

# create the dataframe
synthetic_data = pd.DataFrame({
    "Machine_ID": machine_id,
    "Temperature": temperature,
    "Run_Time": run_time,
    "Downtime_Flag": downtime_flag
})

# save the file
file_path = "synthetic_manufacturing_data.csv"
synthetic_data.to_csv(file_path, index=False)