In [1]:
import pandas as pd
import datetime
import random


duration_minutes = 600
sampling_rate_hz = 10
num_samples = duration_minutes * 60 * sampling_rate_hz

# Initialize lists to store data
timestamps = []
cpu_temperatures = []
cpu_usages = []
cpu_loads = []
memory_usages = []
battery_levels = []
cpu_powers = []

# Simulating data collection without real-time delays
for i in range(num_samples):
    # Generating synthetic timestamps
    current_time = datetime.datetime.now() + datetime.timedelta(seconds=i/sampling_rate_hz)
    timestamps.append(current_time)


    cpu_temperatures.append(random.uniform(30, 90))
    cpu_usages.append(random.uniform(0, 100))
    cpu_loads.append(random.uniform(0, 3))
    memory_usages.append(random.uniform(0, 100))
    battery_levels.append(random.uniform(0, 100))
    cpu_powers.append(random.uniform(0, 100))

    # Introducing synthetic anomalies
    if random.random() < 0.1:
        cpu_usages[-1] = random.uniform(90, 100)
    if random.random() < 0.1:
        cpu_temperatures[-1] = random.uniform(90, 105)
    if random.random() < 0.1:
        memory_usages[-1] = random.uniform(95, 100)
    if random.random() < 0.1:
        battery_levels[-1] = random.uniform(0, 10)
    if random.random() < 0.1:
        cpu_powers[-1] = random.uniform(50, 100)

data = {
    'timestamp': timestamps,
    'cpu_temperature': cpu_temperatures,
    'cpu_usage': cpu_usages,
    'cpu_load': cpu_loads,
    'memory_usage': memory_usages,
    'battery_level': battery_levels,
    'cpu_power': cpu_powers
}
df_real = pd.DataFrame(data)

df_real.to_csv('synthetic_hardware_monitor_data.csv', index=False)

print("Data generation complete.")


Data generation complete.


In [2]:


# Loading the data
df = pd.read_csv('/content/synthetic_hardware_monitor_data.csv')

# Defining a simple anomaly detection function using IQR
def detect_anomalies(series):
    Q1 = series.quantile(0.25)  # First quartile
    Q3 = series.quantile(0.75)  # Third quartile
    IQR = Q3 - Q1  # Interquartile Range
    lower_bound = Q1 - 1.5 * IQR  # Lower bound for anomalies
    upper_bound = Q3 + 1.5 * IQR  # Upper bound for anomalies
    return ((series < lower_bound) | (series > upper_bound))  # Return boolean mask for anomalies


df['cpu_temperature_anomaly'] = detect_anomalies(df['cpu_temperature'])
df['cpu_usage_anomaly'] = detect_anomalies(df['cpu_usage'])
df['memory_usage_anomaly'] = detect_anomalies(df['memory_usage'])

# Displaying first few rows with anomalies marked
print(df[['timestamp', 'cpu_temperature', 'cpu_temperature_anomaly',
          'cpu_usage', 'cpu_usage_anomaly', 'memory_usage', 'memory_usage_anomaly']].head())


df.to_csv('synthetic_data_with_anomalies.csv', index=False)


cpu_temp_anomalies = df['cpu_temperature_anomaly'].sum()
cpu_usage_anomalies = df['cpu_usage_anomaly'].sum()
memory_usage_anomalies = df['memory_usage_anomaly'].sum()

print(f"CPU Temperature Anomalies Detected: {cpu_temp_anomalies}")
print(f"CPU Usage Anomalies Detected: {cpu_usage_anomalies}")
print(f"Memory Usage Anomalies Detected: {memory_usage_anomalies}")


                    timestamp  cpu_temperature  cpu_temperature_anomaly  \
0  2024-11-30 10:07:09.683893        74.574711                    False   
1  2024-11-30 10:07:09.783916        69.069559                    False   
2  2024-11-30 10:07:09.883925        79.987026                    False   
3  2024-11-30 10:07:09.983932        80.915932                    False   
4  2024-11-30 10:07:10.083938        84.271266                    False   

   cpu_usage  cpu_usage_anomaly  memory_usage  memory_usage_anomaly  
0  56.617181              False     80.078925                 False  
1  97.562872              False     97.920369                 False  
2  56.980606              False     86.367742                 False  
3  69.937202              False     80.709217                 False  
4  62.354936              False     97.456278                 False  
CPU Temperature Anomalies Detected: 0
CPU Usage Anomalies Detected: 0
Memory Usage Anomalies Detected: 0
