**CREATING SAMPLE DATASET**

In [2]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)

# Define shipments and sensors
shipments = [101, 102, 103, 104, 105]
sensors = {
    "ThermoProbe T-100": {"type": "Temperature", "unit": "C", "range": (2, 8)},
    "TempTrack X5": {"type": "Surface Temp", "unit": "C", "range": (2, 10)},
    "HumidSensor H-200": {"type": "Humidity", "unit": "%", "range": (30, 50)},
    "ShockLog S-50": {"type": "Shock", "unit": "g", "range": (0, 5)},
    "GPS ColdTrack G-12": {"type": "GPS", "unit": "km deviation", "range": (0, 5)},
    "BatteryMonitor B-10": {"type": "Battery", "unit": "V", "range": (3.2, 5)}
}

# Generate sample dataset
data = []
start_time = datetime(2025, 1, 1, 6, 0, 0)

reading_id = 1
for shipment in shipments:
    for sensor, details in sensors.items():
        timestamp = start_time
        for i in range(10):  # 10 readings per sensor per shipment
            # Normally distributed values within range, with some noise
            low, high = details["range"]
            value = np.random.uniform(low, high)

            # Inject some violations randomly
            if random.random() < 0.15:
                if details["type"] == "Temperature":
                    value = np.random.uniform(9, 12)  # spike high
                elif details["type"] == "Humidity":
                    value = np.random.uniform(55, 70)  # too humid
                elif details["type"] == "Shock":
                    value = np.random.uniform(6, 12)  # high g
                elif details["type"] == "GPS":
                    value = np.random.uniform(6, 12)  # deviation
                elif details["type"] == "Battery":
                    value = np.random.uniform(2.5, 3.1)  # low battery

            data.append([
                reading_id, shipment, sensor, details["type"], timestamp, round(value, 2), details["unit"]
            ])
            reading_id += 1
            timestamp += timedelta(minutes=5)

# Create DataFrame
df = pd.DataFrame(data, columns=[
    "reading_id", "shipment_id", "sensor_model", "sensor_type", "timestamp", "value", "unit"
])




In [13]:
print(df.head(30))

    reading_id  shipment_id       sensor_model   sensor_type  \
0            1          101  ThermoProbe T-100   Temperature   
1            2          101  ThermoProbe T-100   Temperature   
2            3          101  ThermoProbe T-100   Temperature   
3            4          101  ThermoProbe T-100   Temperature   
4            5          101  ThermoProbe T-100   Temperature   
5            6          101  ThermoProbe T-100   Temperature   
6            7          101  ThermoProbe T-100   Temperature   
7            8          101  ThermoProbe T-100   Temperature   
8            9          101  ThermoProbe T-100   Temperature   
9           10          101  ThermoProbe T-100   Temperature   
10          11          101       TempTrack X5  Surface Temp   
11          12          101       TempTrack X5  Surface Temp   
12          13          101       TempTrack X5  Surface Temp   
13          14          101       TempTrack X5  Surface Temp   
14          15          101       TempTr