## Génération du dataset synthétique

#### Import des librairies 

In [2]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

#### Paramètres

In [3]:
n_machines = 5
days = 180  # 6 mois d'historique
events = []

machines = [f"Press_{i}" for i in range(1, n_machines+1)]

start_date = datetime.today() - timedelta(days=days)

for machine in machines:
    current_date = start_date
    while current_date < datetime.today():
        # Temps avant la prochaine panne (aléatoire)
        time_to_failure = np.random.randint(3, 20)
        failure_date = current_date + timedelta(days=time_to_failure)

        if failure_date >= datetime.today():
            break

        # Durée de réparation (MTTR simulé)
        repair_time = np.random.randint(1, 8)  # heures

        # Coût de réparation
        cost = repair_time * random.choice([50, 75, 100])  # €/h + pièces

        # Scrap (rebuts causés par panne)
        scrap = np.random.randint(5, 50)

        events.append({
            "machine": machine,
            "failure_date": failure_date.date(),
            "repair_time_h": repair_time,
            "downtime_h": repair_time,
            "cost_eur": cost,
            "scrap_units": scrap
        })

        current_date = failure_date + timedelta(days=1)

# Création DataFrame
df = pd.DataFrame(events)


#### Sauvegarde CSV

In [4]:
df.to_csv("maintenance_events.csv", index=False)

print("✅ Dataset synthétique généré : maintenance_events.csv")
print(df.head(10))

✅ Dataset synthétique généré : maintenance_events.csv
   machine failure_date  repair_time_h  downtime_h  cost_eur  scrap_units
0  Press_1   2025-03-22              7           7       700           42
1  Press_1   2025-04-06              2           2       100           24
2  Press_1   2025-04-25              7           7       350           44
3  Press_1   2025-05-14              5           5       250            7
4  Press_1   2025-05-24              7           7       350           21
5  Press_1   2025-06-01              7           7       700           40
6  Press_1   2025-06-15              1           1       100           31
7  Press_1   2025-06-26              3           3       150           10
8  Press_1   2025-07-02              6           6       300           22
9  Press_1   2025-07-17              4           4       300           37


Nous obtenons donc un fichier maintenance_events.csv avec des colonnes :

-machine

-failure_date

-repair_time_h

-downtime_h

-cost_eur

-scrap_units