We are working with an edge device (radar board) that includes a 10-bit Analog-to-Digital Converter (ADC) and three different sensors. Each sensor operates at a different sampling interval and voltage range.

Sensor Specifications

Temperature Sensor

Sampling interval: Every 2 seconds

Voltage range: 2 V to 4 V

Moisture Sensor

Sampling interval: Every 4 hours

Voltage range: 1.2 V to 3 V

Light Sensor

Sampling interval: Every 5 seconds

Voltage range: 0 V to 5 V

ADC Details

ADC resolution: 10-bit

ADC output range: 0 to 1023 (digital counts)

Task 1:
Generate a 24-hour edge-level dataset for all three sensors

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# ADC configuration
ADC_BITS = 10
ADC_MAX = (2 ** ADC_BITS) - 1  # 1023
VREF = 5.0  # volts

# Start time at 00:00:00
today = datetime.now().date()
start_time = datetime.combine(today, datetime.min.time())
end_time = start_time + timedelta(hours=24)

data = []

def voltage_to_adc(voltage):
    return int((voltage / VREF) * ADC_MAX)

# --------------------------------
# Temperature Sensor (every 2 sec)
# --------------------------------
current_time = start_time
while current_time < end_time:
    voltage = np.random.uniform(2.0, 4.0)
    data.append([
        current_time,
        "temperature",
        round(voltage, 3),
        voltage_to_adc(voltage)
    ])
    current_time += timedelta(seconds=2)

# --------------------------------
# Moisture Sensor (every 4 hours)
# --------------------------------
current_time = start_time
while current_time < end_time:
    voltage = np.random.uniform(1.2, 3.0)
    data.append([
        current_time,
        "moisture",
        round(voltage, 3),
        voltage_to_adc(voltage)
    ])
    current_time += timedelta(hours=4)

# --------------------------------
# Light Sensor (every 5 sec)
# --------------------------------
current_time = start_time
while current_time < end_time:
    voltage = np.random.uniform(0.0, 5.0)
    data.append([
        current_time,
        "light",
        round(voltage, 3),
        voltage_to_adc(voltage)
    ])
    current_time += timedelta(seconds=5)

# Create DataFrame
df = pd.DataFrame(
    data,
    columns=["timestamp", "sensor", "voltage", "adc_value"]
)

# Sort chronologically
df.sort_values("timestamp", inplace=True)

# Export to Excel
df.to_excel("edge_sensor_24h_dataset.xlsx", index=False)

print("Excel dataset generated successfully.")
print(df.head())


Excel dataset generated successfully.
                timestamp       sensor  voltage  adc_value
0     2026-01-17 00:00:00  temperature    3.466        709
43206 2026-01-17 00:00:00        light    1.832        374
43200 2026-01-17 00:00:00     moisture    1.687        345
1     2026-01-17 00:00:02  temperature    2.210        452
2     2026-01-17 00:00:04  temperature    3.432        702


In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# -----------------------------
# Configuration
# -----------------------------
START_DATETIME = datetime(2026, 1, 17, 3, 0, 0)
END_DATETIME   = START_DATETIME + timedelta(hours=24)

SENSORS = {
    "temperature": 2,          # seconds
    "light": 5,                # seconds
    "moisture": 4 * 60 * 60    # 4 hours
}

rows = []

# -----------------------------
# Generate data
# -----------------------------
for sensor, interval in SENSORS.items():
    t = START_DATETIME
    while t < END_DATETIME:
        value = round(np.random.uniform(10, 100), 2)
        rows.append([t, sensor, value])
        t += timedelta(seconds=interval)

df = pd.DataFrame(rows, columns=["timestamp", "sensor", "value"])

# Sort for cleanliness
df = df.sort_values("timestamp").reset_index(drop=True)

# Save CSV
df.to_csv("node_3_to_3.csv", index=False)

print("node_3_to_3.csv generated")
print("Total rows:", len(df))
print("Start:", df["timestamp"].min())
print("End:", df["timestamp"].max())


node_3_to_3.csv generated
Total rows: 60486
Start: 2026-01-17 03:00:00
End: 2026-01-18 02:59:58


In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

ADC_MAX = 1023
VREF = 5.0

def voltage_to_adc(v):
    return int((v / VREF) * ADC_MAX)

def generate_node_csv(start_hour, file_name):
    today = datetime.now().date()
    start_time = datetime.combine(today, datetime.min.time()) + timedelta(hours=start_hour)
    end_time = start_time + timedelta(hours=24)

    rows = []

    # Temperature – 2 sec
    t = start_time
    while t < end_time:
        v = np.random.uniform(2, 4)
        rows.append([t, "temperature", v, voltage_to_adc(v)])
        t += timedelta(seconds=2)

    # Moisture – 4 hours
    t = start_time
    while t < end_time:
        v = np.random.uniform(1.2, 3)
        rows.append([t, "moisture", v, voltage_to_adc(v)])
        t += timedelta(hours=4)

    # Light – 5 sec
    t = start_time
    while t < end_time:
        v = np.random.uniform(0, 5)
        rows.append([t, "light", v, voltage_to_adc(v)])
        t += timedelta(seconds=5)

    df = pd.DataFrame(
        rows,
        columns=["timestamp", "sensor", "voltage", "adc_value"]
    )

    df.sort_values("timestamp", inplace=True)
    df.to_csv(file_name, index=False)
    print(f"{file_name} generated | Records: {len(df)}")


# Generate node datasets
generate_node_csv(12, "node1_12_to_12.csv")
generate_node_csv(3, "node2_3_to_3.csv")
generate_node_csv(4, "node3_4_to_4.csv")

node1_12_to_12.csv generated | Records: 60486
node2_3_to_3.csv generated | Records: 60486
node3_4_to_4.csv generated | Records: 60486


In [None]:
import pandas as pd
import os
from google.colab import files
from datetime import time

# ----------------------------------------
# Upload CSV files (ANY CSV)
# ----------------------------------------
uploaded_files = files.upload()

MASTER_FILE = "master_dataset.csv"

# ----------------------------------------
# Define MASTER time window (12:00–12:00)
# ----------------------------------------
MASTER_START = time(12, 0, 0)
MASTER_END = time(12, 0, 0)

def filter_to_master_window(df):
    """
    Keep only records that belong to the 12:00 → 12:00 window
    """
    t = df["timestamp"].dt.time

    return df[
        (t >= MASTER_START) | (t < MASTER_END)
    ]

# ----------------------------------------
# Process uploaded CSVs
# ----------------------------------------
for file_name in uploaded_files.keys():

    print(f"\nProcessing file: {file_name}")

    # Read CSV
    df = pd.read_csv(file_name, parse_dates=["timestamp"])

    # Validate schema
    required_cols = {"timestamp", "sensor", "voltage", "adc_value"}
    if not required_cols.issubset(df.columns):
        print(f"Skipped {file_name} (invalid format)")
        continue

    # ----------------------------------------
    # Filter uploaded data to master window
    # ----------------------------------------
    df = filter_to_master_window(df)
    print("Records after window alignment:", len(df))

    # ----------------------------------------
    # Overlap key (canonical)
    # ----------------------------------------
    df["overlap_key"] = (
        df["timestamp"].astype(str) + "_" +
        df["sensor"]
    )

    # ----------------------------------------
    # Load or create master dataset
    # ----------------------------------------
    if os.path.exists(MASTER_FILE):
        master_df = pd.read_csv(MASTER_FILE, parse_dates=["timestamp"])
        combined_df = pd.concat([master_df, df], ignore_index=True)
    else:
        combined_df = df

    # ----------------------------------------
    # Remove overlap (same timestamp + sensor)
    # ----------------------------------------
    combined_df.drop_duplicates(
        subset="overlap_key",
        keep="first",
        inplace=True
    )

    # Sort and save
    combined_df.sort_values("timestamp", inplace=True)
    combined_df.to_csv(MASTER_FILE, index=False)

    print("Merged successfully")
    print("Total records in master:", len(combined_df))

# ----------------------------------------
# Final summary
# ----------------------------------------
master_df = pd.read_csv(MASTER_FILE, parse_dates=["timestamp"])

print("\n========== FINAL MASTER DATASET ==========")
print("Total records (no overlap):", len(master_df))
print("Total unique time points  :", master_df["overlap_key"].nunique())

display(master_df.tail(10))

# Download master dataset
files.download(MASTER_FILE)


Saving node2_3_to_3.csv to node2_3_to_3 (2).csv

Processing file: node2_3_to_3 (2).csv
Records after window alignment: 60486
Merged successfully
Total records in master: 83173

Total records (no overlap): 83173
Total unique time points  : 83172


Unnamed: 0,timestamp,sensor,voltage,adc_value,unique_key,date,overlap_key
83163,2026-01-18 11:59:45,light,4.566563,934,,,2026-01-18 11:59:45_light
83164,2026-01-18 11:59:46,temperature,3.796374,776,,,2026-01-18 11:59:46_temperature
83165,2026-01-18 11:59:48,temperature,2.121605,434,,,2026-01-18 11:59:48_temperature
83166,2026-01-18 11:59:50,temperature,3.313929,678,,,2026-01-18 11:59:50_temperature
83167,2026-01-18 11:59:50,light,3.768613,771,,,2026-01-18 11:59:50_light
83168,2026-01-18 11:59:52,temperature,2.440564,499,,,2026-01-18 11:59:52_temperature
83169,2026-01-18 11:59:54,temperature,2.024932,414,,,2026-01-18 11:59:54_temperature
83170,2026-01-18 11:59:55,light,3.289263,672,,,2026-01-18 11:59:55_light
83171,2026-01-18 11:59:56,temperature,2.771312,567,,,2026-01-18 11:59:56_temperature
83172,2026-01-18 11:59:58,temperature,2.555699,522,,,2026-01-18 11:59:58_temperature


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>