<a href="https://colab.research.google.com/github/RinayGajjar/AI4SEE/blob/main/AI4SEE_Task.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import datetime
import random
import numpy as np

# Settings
target_size_gb = 1  # Target size in GB
row_size_bytes = 100  # Estimated average size of one row in bytes
num_rows = (target_size_gb * 1024**3) // row_size_bytes  # Total number of rows needed
sampling_rate_hz = 10   # 10 samples per second
start_time = datetime.datetime.now()

# Generate synthetic data in chunks to avoid memory overload
chunk_size = 1_000_000  # Number of rows per chunk
num_chunks = num_rows // chunk_size

# Function to generate a chunk of data
def generate_chunk(start_index, chunk_size):
    timestamps = []
    cpu_temperatures = []
    cpu_usages = []
    cpu_loads = []
    memory_usages = []
    battery_levels = []
    cpu_powers = []

    for i in range(chunk_size):
        current_time = start_time + datetime.timedelta(seconds=(start_index + i) / sampling_rate_hz)
        timestamps.append(current_time)

        # Generate random normal data
        cpu_temp = max(20, min(85, random.gauss(50, 10)))  # CPU temperature (20-85°C)
        cpu_usage = max(0, min(100, random.gauss(50, 20)))  # CPU usage (%)
        cpu_load = random.uniform(0, 2.5)  # 1-minute load average
        memory_usage = random.uniform(30, 90)  # Memory usage (%)
        battery_level = random.uniform(20, 100)  # Battery level (%)
        cpu_power = random.uniform(10, 50)  # CPU power consumption (W)

        # Introduce anomalies randomly
        if random.random() < 0.1:
            cpu_usage = random.uniform(90, 100)  # High CPU usage
        if random.random() < 0.05:
            cpu_temp = random.uniform(85, 105)  # High CPU temperature
        if random.random() < 0.05:
            memory_usage = random.uniform(90, 100)  # High memory usage
        if random.random() < 0.02:
            battery_level = random.uniform(0, 10)  # Low battery level
        if random.random() < 0.03:
            cpu_power = random.uniform(50, 100)  # High CPU power

        # Append to lists
        cpu_temperatures.append(cpu_temp)
        cpu_usages.append(cpu_usage)
        cpu_loads.append(cpu_load)
        memory_usages.append(memory_usage)
        battery_levels.append(battery_level)
        cpu_powers.append(cpu_power)

    # Create a DataFrame for the chunk
    chunk_data = {
        'timestamp': timestamps,
        'cpu_temperature': cpu_temperatures,
        'cpu_usage': cpu_usages,
        'cpu_load': cpu_loads,
        'memory_usage': memory_usages,
        'battery_level': battery_levels,
        'cpu_power': cpu_powers,
    }
    return pd.DataFrame(chunk_data)

# Generate and save the dataset in chunks
output_file = 'synthetic_hardware_monitor_1gb.csv'
with open(output_file, 'w') as f:
    for chunk_index in range(num_chunks):
        print(f"Generating chunk {chunk_index + 1}/{num_chunks}...")
        start_row = chunk_index * chunk_size
        chunk_df = generate_chunk(start_row, chunk_size)

        # Save chunk to file
        chunk_df.to_csv(f, index=False, header=(chunk_index == 0))  # Write header only for the first chunk

print(f"1GB dataset generated and saved to {output_file}.")


Generating chunk 1/10...
Generating chunk 2/10...
Generating chunk 3/10...
Generating chunk 4/10...
Generating chunk 5/10...
Generating chunk 6/10...
Generating chunk 7/10...
Generating chunk 8/10...
Generating chunk 9/10...
Generating chunk 10/10...
1GB dataset generated and saved to synthetic_hardware_monitor_1gb.csv.
