# Simulate Sensor Data

In [0]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

In [0]:
def generate_sensor_data(start_time, periods, machine_ids, anomaly_chance=0.01, inject_anomalies=False):
    data = []
    for machine_id in machine_ids:
        base_temp = random.uniform(20, 35)
        base_vib = random.uniform(1.0, 2.0)
        base_pres = random.uniform(30, 35)

        for i in range(periods):
            t = start_time + timedelta(minutes=5 * i)

            temp = base_temp + np.random.normal(0, 1)
            vib = base_vib + np.random.normal(0, 0.2)
            pres = base_pres + np.random.normal(0, 0.5)
            inliner = 1
            # Inject anomalies in ~1% of rows
            if inject_anomalies and random.random() < anomaly_chance:
                temp += random.uniform(10, 20)
                vib += random.uniform(1, 2)
                pres -= random.uniform(2, 4)
                inliner = -1
            data.append((t, machine_id, temp, vib, pres, inliner))

    return pd.DataFrame(data, columns=['timestamp', 'machine_id', 'temperature', 'vibration', 'pressure', 'inliner'])

## Simulate normal training data

In [0]:
catalog = "workspace"
schema = "genai_demo"

In [0]:
# Parameters
n_days = 1
freq_mins = 10
n_points = int(n_days * 24 * 60 / freq_mins)
machine_ids = list(range(1, 48))
start_time = datetime.now() - timedelta(days=n_days)

# Dataset 1: Normal only
table_name = "sensor_data_normal"
df_normal = generate_sensor_data(start_time, n_points, machine_ids, inject_anomalies=False)
spark.createDataFrame(df_normal).write\
    .mode("overwrite").format("delta")\
    .option("mergeSchema", "true")\
    .saveAsTable(f"{catalog}.{schema}.{table_name}")

## Simulate test data 

In [0]:
n_days = 1
n_points = int(n_days * 24 * 60 / 5)
machine_ids = list(range(1, 6))
start_time = datetime.now()

# Dataset 2: with anomalies
table_name = "sensor_data_test"
df_test = generate_sensor_data(start_time, n_points, machine_ids, anomaly_chance=0.01, inject_anomalies=True)
spark.createDataFrame(df_test).write\
    .mode("overwrite").format("delta")\
    .option("mergeSchema", "true")\
    .saveAsTable(f"{catalog}.{schema}.{table_name}")