### Mean & Variance of original data

In [5]:
import pandas as pd
import numpy as np

# reads orginal sensordata to datafram
data = pd.read_excel("/Users/wardmestdagh/Documents/HIRB/THESIS/DOCUMENTEN PURATOS/SENSOR DATA/sensordata (09-2022_06-2023).xlsx")

# considered features 
features = ["RMSVibration.mean", "Temperature.mean", "SpeedPeak.mean", 
            "Kurtosis.mean", "MaximumAbsoluteVibration.mean", 
            "RMSAcceleration.mean", "Skewness.mean"]

# computes the stddv and the mean of eahc feature
for feature in features:
    mean_value = np.mean(data[feature])
    std_value = np.std(data[feature])
    
    print("Feature:", feature)
    print("Mean:", mean_value)
    print("Standard Deviation:", std_value)
    print()
    
    

Feature: RMSVibration.mean
Mean: 0.06833735048410561
Standard Deviation: 3.1925745149741926

Feature: Temperature.mean
Mean: 29.454999538602593
Standard Deviation: 10.842024123756904

Feature: SpeedPeak.mean
Mean: 32.931833661203065
Standard Deviation: 188.90170785792773

Feature: Kurtosis.mean
Mean: -0.30518701184578706
Standard Deviation: 1.756716371743166

Feature: MaximumAbsoluteVibration.mean
Mean: 0.18519815104428922
Standard Deviation: 1.2684597188373608

Feature: RMSAcceleration.mean
Mean: 0.06054332737825015
Standard Deviation: 0.09436486890417255

Feature: Skewness.mean
Mean: 0.00035740454610618205
Standard Deviation: 0.1649944870177848



### Synthetic data

In [8]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# defines parameters 
start_date = datetime(2022, 9, 1)
end_date = datetime(2023, 6, 30)
num_hours = int((end_date - start_date).total_seconds() / 3600)

# generates timestamps
timestamps = [start_date + timedelta(hours=i) for i in range(num_hours)]

# generates synthetic sensor readings
np.random.seed(0) 

# for each feature: draws num_hours instances a out of a normal distribution (with same stdv and mean as the feature in the orginal data)
sensor_data = pd.DataFrame({
    'ReadableDate': timestamps,
    'RMSVibration.mean': np.random.normal(loc=0.06833735048410561, scale=3.1925745149741926, size=num_hours),
    'Temperature.mean': np.random.normal(loc=29.454999538602593, scale=10.842024123756904, size=num_hours),
    'SpeedPeak.mean': np.random.normal(loc=10.842024123756904, scale=188.90170785792773, size=num_hours),
    'Kurtosis.mean': np.random.normal(loc=-0.30518701184578706, scale=1.756716371743166, size=num_hours),
    'MaximumAbsoluteVibration.mean': np.random.normal(loc=0.18519815104428922, scale=1.2684597188373608, size=num_hours),
    'RMSAcceleration.mean': np.random.normal(loc=0.06054332737825015, scale=0.09436486890417255, size=num_hours),
    'Skewness.mean': np.random.normal(loc=0.00035740454610618205, scale=0.1649944870177848, size=num_hours),
})

# generate breakdown events with a realistic trigger (if absolute deviation of the previous 5 readings exceeds a treshold, we label the intance with as a breakdwon )
sensor_data['Breakdown'] = 0
threshold = 0.62 # this treshold was determined by trail and error method. Where we looked at which treshold created the best data file.
for index in range(5, num_hours):
    # previous 5 sensor readings (of the analysed instance)
    sensor_data_slice = sensor_data.iloc[index-5:index]
    
    # calculates the deviations of sensor readings from their mean
    deviations = sensor_data_slice[["RMSVibration.mean", "Temperature.mean", "SpeedPeak.mean", 
            "Kurtosis.mean", "MaximumAbsoluteVibration.mean", 
            "RMSAcceleration.mean", "Skewness.mean"]].apply(lambda x: x - x.mean())
    
    # determines the likelihood of breakdown based on deviations for each sensor reading
    likelihoods = 1 - np.exp(-deviations.abs()) 
    
    # takes the average likelihood across all sensor readings
    likelihood = likelihoods.mean().mean()  
    
    # label analysed instance as a breakdown, if likelihood is bigger than treshhold
    if threshold < likelihood:
        sensor_data.loc[index, 'Breakdown'] = 1

# data to csv file
sensor_data = sensor_data.iloc[5:]
sensor_data.to_csv('synthetic_dataset.csv', index=False)
