<a href="https://colab.research.google.com/github/Mehdi007bond/Predictive_maintenance_Project/blob/main/Predictive_maintenance_Project_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# I started by generating the dataset so i can work


In [2]:
import pandas as pd
import numpy as np
import datetime

# --- 1. Configuration Parameters ---
N_MACHINES = 5
DAYS_PER_MACHINE = 60
SAMPLES_PER_HOUR = 4
N_SAMPLES_PER_DAY = 24 * SAMPLES_PER_HOUR

# Failure simulation parameters
FAILURE_WINDOW_HOURS = 12  # How many hours before failure do symptoms appear?
FAILURE_HORIZON_HOURS = 6  # How many hours out do we want to predict?

# Sensor base levels and noise
BASE_TEMP = 25.0
TEMP_NOISE = 1.5
TEMP_DEGRADE_RATE = 0.1  # Temp increase per sample in failure window

BASE_VIB = 0.5
VIB_NOISE = 0.1
VIB_DEGRADE_RATE = 0.02

BASE_CUR = 5.0
CUR_NOISE = 0.2
CUR_DEGRADE_RATE = 0.05

BASE_TORQUE = 40.0
TORQUE_NOISE = 1.0
TORQUE_DEGRADE_RATE = -0.08 # Torque might decrease as efficiency drops

# --- 2. Function to Generate Data for One Machine ---
def generate_machine_data(machine_id, start_date):
    print(f"Generating data for Machine ID: {machine_id}...")

    total_samples = DAYS_PER_MACHINE * N_SAMPLES_PER_DAY
    timestamps = [start_date + datetime.timedelta(hours=i/SAMPLES_PER_HOUR) for i in range(total_samples)]

    # Initialize sensor data with base levels and noise
    temp = np.random.normal(loc=BASE_TEMP, scale=TEMP_NOISE, size=total_samples)
    vib = np.random.normal(loc=BASE_VIB, scale=VIB_NOISE, size=total_samples)
    curr = np.random.normal(loc=BASE_CUR, scale=CUR_NOISE, size=total_samples)
    torque = np.random.normal(loc=BASE_TORQUE, scale=TORQUE_NOISE, size=total_samples)

    # --- Simulate Failures ---
    # Schedule a failure in the last few days
    failure_sample = total_samples - np.random.randint(N_SAMPLES_PER_DAY, N_SAMPLES_PER_DAY * 5)

    # Define the window where degradation occurs
    degradation_start_sample = failure_sample - (FAILURE_WINDOW_HOURS * SAMPLES_PER_HOUR)

    # Apply degradation trend
    for i in range(degradation_start_sample, failure_sample):
        # Calculate degradation factor (0 to 1 linear ramp)
        progress = (i - degradation_start_sample) / (failure_sample - degradation_start_sample)

        temp[i] += TEMP_DEGRADE_RATE * progress * 20  # Accelerating degradation
        vib[i] += VIB_DEGRADE_RATE * progress * 15
        curr[i] += CUR_DEGRADE_RATE * progress * 10
        torque[i] += TORQUE_DEGRADE_RATE * progress * 10

    # --- Create Target Variables ---
    # 1. Binary 'failure' event (at the exact sample)
    failure = np.zeros(total_samples, dtype=int)
    failure[failure_sample] = 1

    # 2. Target Label: 'failure_imminent' (Classification)
    # Flag '1' for all samples within the prediction horizon
    failure_imminent = np.zeros(total_samples, dtype=int)
    prediction_window_start = failure_sample - (FAILURE_HORIZON_HOURS * SAMPLES_PER_HOUR)
    failure_imminent[prediction_window_start:failure_sample] = 1

    # 3. Target Label: 'RUL' (Regression)
    rul = np.full(total_samples, fill_value=9999) # Fill with a high value
    for i in range(failure_sample):
        rul[i] = (failure_sample - i) / SAMPLES_PER_HOUR # RUL in hours
    rul[failure_sample:] = 0 # RUL is 0 at and after failure

    # Assemble DataFrame
    df = pd.DataFrame({
        'timestamp': timestamps,
        'machine_id': machine_id,
        'temperature': temp,
        'vibration': vib,
        'current': curr,
        'torque': torque,
        'failure': failure,
        'failure_imminent': failure_imminent,
        'RUL_hours': rul
    })

    return df

# --- 3. Generate and Combine Data for All Machines ---
print("Starting dataset generation...")
all_data_frames = []
start_date = datetime.datetime(2024, 1, 1)

for i in range(N_MACHINES):
    machine_df = generate_machine_data(machine_id=i+1, start_date=start_date)
    all_data_frames.append(machine_df)

# Combine all machines into a single dataset
full_dataset = pd.concat(all_data_frames).reset_index(drop=True)

# Add some final noise/anomalies
full_dataset['temperature'] = full_dataset['temperature'].round(2)
full_dataset['vibration'] = full_dataset['vibration'].round(4)
full_dataset['current'] = full_dataset['current'].round(3)
full_dataset['torque'] = full_dataset['torque'].round(2)

print("\nDataset generation complete!")
print(f"Total samples generated: {len(full_dataset)}")

# Save to file
output_filename = "synthetic_maintenance_data.csv"
full_dataset.to_csv(output_filename, index=False)
print(f"Dataset saved to '{output_filename}'")

# Display a sample
print("\n--- Dataset Head ---")
display(full_dataset.head())

print("\n--- Dataset Tail (showing potential failure) ---")
print(full_dataset[full_dataset['machine_id']==1].tail())

print("\n--- Target Variable Distribution (failure_imminent) ---")
print(full_dataset['failure_imminent'].value_counts(normalize=True))

Starting dataset generation...
Generating data for Machine ID: 1...
Generating data for Machine ID: 2...
Generating data for Machine ID: 3...
Generating data for Machine ID: 4...
Generating data for Machine ID: 5...

Dataset generation complete!
Total samples generated: 28800
Dataset saved to 'synthetic_maintenance_data.csv'

--- Dataset Head ---


Unnamed: 0,timestamp,machine_id,temperature,vibration,current,torque,failure,failure_imminent,RUL_hours
0,2024-01-01 00:00:00,1,25.91,0.5336,5.076,40.31,0,0,1360
1,2024-01-01 00:15:00,1,25.28,0.4537,5.156,38.82,0,0,1359
2,2024-01-01 00:30:00,1,27.01,0.4236,5.284,41.67,0,0,1359
3,2024-01-01 00:45:00,1,26.91,0.4515,4.878,40.82,0,0,1359
4,2024-01-01 01:00:00,1,23.18,0.6138,4.969,40.68,0,0,1359



--- Dataset Tail (showing potential failure) ---
               timestamp  machine_id  temperature  vibration  current  torque  \
5755 2024-02-29 22:45:00           1        25.01     0.6170    5.157   40.02   
5756 2024-02-29 23:00:00           1        26.00     0.3170    4.579   39.24   
5757 2024-02-29 23:15:00           1        23.17     0.4816    4.996   40.15   
5758 2024-02-29 23:30:00           1        25.12     0.5047    4.983   38.38   
5759 2024-02-29 23:45:00           1        24.44     0.4767    4.981   38.74   

      failure  failure_imminent  RUL_hours  
5755        0                 0          0  
5756        0                 0          0  
5757        0                 0          0  
5758        0                 0          0  
5759        0                 0          0  

--- Target Variable Distribution (failure_imminent) ---
failure_imminent
0    0.995833
1    0.004167
Name: proportion, dtype: float64
