# Since we don't have Cost Optimizer Datast , so I m creating a demo dataset randomly
# But Here I have maintained the relationship between the columns

In [8]:
import pandas as pd
import numpy as np

def generate_synthetic_maintenance_data(n_samples=2000):
    np.random.seed(42)

    data = []

    for i in range(n_samples):
        # Base parameters with realistic correlations
        predicted_rul = np.random.randint(10, 131)  # 10 to 130 days

        # Sensor conditions - correlated with each other
        base_health = np.random.normal(80, 10)  # Base health around 80%
        sensor_health = max(60, min(99, base_health))  # Bound between 60-99%

        # Anomaly level inversely correlated with sensor health
        anomaly_level = max(0, min(25, (100 - sensor_health) * 0.4 + np.random.normal(0, 3)))

        # Sensor counts with realistic correlations
        # More anomalies → more critical/warning sensors
        health_factor = (100 - sensor_health) / 40  # 0 to 1 scale

        critical_sensors = min(3, int(np.random.poisson(health_factor * 1.5)))
        warning_sensors = min(3, int(np.random.poisson(health_factor * 2)))
        good_sensors = 15 - critical_sensors - warning_sensors

        # Repair day selection with biases
        repair_options = ['before_10_days', 'today', 'end_cycle', 'after_10_days']
        weights = [0.80, 0.60, 0.55, 0.10]  # Strong bias for before_10_days

        repair_day = np.random.choice(repair_options, p=[w/sum(weights) for w in weights])

        # Base cost calculation with realistic factors
        base_cost = 1000000  # 10 lakh base

        # Cost factors
        critical_factor = critical_sensors * 150000  # 1.5 lakh per critical sensor
        warning_factor = warning_sensors * 80000     # 80k per warning sensor
        anomaly_factor = anomaly_level * 20000       # 20k per % anomaly
        health_factor_cost = (100 - sensor_health) * 10000  # 10k per % health drop

        # RUL factor - repairing too early or too late costs more
        if repair_day == 'before_10_days':
            rul_factor = -50000  # Cheaper for preventive maintenance
            downtime_cost = 0
        elif repair_day == 'today':
            rul_factor = 0
            downtime_cost = 0
        elif repair_day == 'end_cycle':
            rul_factor = 200000  # Emergency premium
            downtime_cost = 500000  # 5 lakh downtime (1 day)
        else:  # after_10_days
            rul_factor = 500000   # Major emergency
            downtime_cost = 5000000  # 50 lakh downtime (10 days)

        # Total cost calculation
        total_cost = (base_cost + critical_factor + warning_factor +
                     anomaly_factor + health_factor_cost + rul_factor + downtime_cost)

        # Add some random variation
        total_cost += np.random.normal(0, 100000)  # 1 lakh random variation

        # Ensure cost is within bounds (10-30 lakh)
        total_cost = max(1000000, min(3000000, total_cost))

        data.append({
            'repair_day': repair_day,
            'warning_sensors': warning_sensors,
            'critical_sensors': critical_sensors,
            'good_sensors': good_sensors,
            'predicted_rul': predicted_rul,
            'sensor_health': round(sensor_health, 2),
            'anomaly_level': round(anomaly_level, 2),
            'cost': int(total_cost)
        })

    return pd.DataFrame(data)




In [9]:
# Generate dataset
df = generate_synthetic_maintenance_data(2000)

In [10]:



# Save to CSV
df.to_csv('synthetic_maintenance_data.csv', index=False)
print(f"\nDataset saved as 'synthetic_maintenance_data.csv'")


Dataset saved as 'synthetic_maintenance_data.csv'


In [11]:
df

Unnamed: 0,repair_day,warning_sensors,critical_sensors,good_sensors,predicted_rul,sensor_health,anomaly_level,cost
0,today,0,2,13,112,74.50,11.75,1701445
1,before_10_days,0,0,15,12,75.88,8.24,1410281
2,today,2,0,13,31,60.00,17.81,1930452
3,end_cycle,0,0,15,130,79.65,8.51,2022026
4,end_cycle,1,0,14,69,60.00,19.36,2680188
...,...,...,...,...,...,...,...,...
1995,today,2,2,11,102,70.07,17.32,2185712
1996,end_cycle,0,0,15,112,90.52,6.47,1854390
1997,end_cycle,1,2,12,71,79.19,9.59,2464111
1998,today,0,0,15,61,87.48,5.97,1395433
