# <b>IoT Sensor Data ETL

In [11]:
import random
import time
import sqlite3
from datetime import datetime, timedelta

## <b>Extract Data

In [12]:
def simulate_large_sensor_data(sensor_ids, num_records):
    data = []
    start_time = datetime.now() - timedelta(days=1)  # Start 1 day ago
    for _ in range(num_records):
        for sensor_id in sensor_ids:
            timestamp = start_time + timedelta(seconds=random.randint(0, 86400))  # Random timestamp within the day
            temperature = round(random.uniform(20.0, 30.0), 2)
            humidity = round(random.uniform(30.0, 60.0), 2)
            data.append((sensor_id, timestamp.strftime('%Y-%m-%d %H:%M:%S'), temperature, humidity))
    return data

## <b>Transform Data

In [13]:
def transform_data(data):
    sensor_data = {}
    for record in data:
        sensor_id = record[0]
        temperature = record[2]
        humidity = record[3]
        if sensor_id not in sensor_data:
            sensor_data[sensor_id] = {'total_temp': 0, 'total_humidity': 0, 'count': 0}
        sensor_data[sensor_id]['total_temp'] += temperature
        sensor_data[sensor_id]['total_humidity'] += humidity
        sensor_data[sensor_id]['count'] += 1

    transformed_data = []
    for sensor_id, values in sensor_data.items():
        avg_temp = values['total_temp'] / values['count']
        avg_humidity = values['total_humidity'] / values['count']
        transformed_data.append((sensor_id, avg_temp, avg_humidity))
    return transformed_data

## <b>Load Data

In [14]:
def load_data_to_db(db_name, table_name, transformed_data):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    cursor.execute(f"""
        CREATE TABLE IF NOT EXISTS {table_name} (
            sensor_id INTEGER,
            timestamp TEXT,
            avg_temperature REAL,
            avg_humidity REAL
        )
    """)
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    for record in transformed_data:
        cursor.execute(f"""
            INSERT INTO {table_name} (sensor_id, timestamp, avg_temperature, avg_humidity)
            VALUES (?, ?, ?, ?)
        """, (record[0], timestamp, record[1], record[2]))
    conn.commit()
    conn.close()

## <b> Main Function

In [15]:
sensor_ids = range(1, 101)  # Simulate 10 sensors
num_records = 1000  # Generate 1000 random inputs

large_sensor_data = simulate_large_sensor_data(sensor_ids, num_records)
print(f"Generated {len(large_sensor_data)} records")

transformed_large_sensor_data = transform_data(large_sensor_data)
for record in transformed_large_sensor_data:
    print(f"Sensor ID: {record[0]}, Average Temperature: {record[1]:.2f}°C, Average Humidity: {record[2]:.2f}%")

load_data_to_db('sensor_data.db', 'sensor_readings', transformed_large_sensor_data)

Generated 100000 records
Sensor ID: 1, Average Temperature: 24.93°C, Average Humidity: 45.17%
Sensor ID: 2, Average Temperature: 25.02°C, Average Humidity: 45.10%
Sensor ID: 3, Average Temperature: 25.03°C, Average Humidity: 44.99%
Sensor ID: 4, Average Temperature: 25.00°C, Average Humidity: 45.34%
Sensor ID: 5, Average Temperature: 24.85°C, Average Humidity: 44.99%
Sensor ID: 6, Average Temperature: 25.08°C, Average Humidity: 44.87%
Sensor ID: 7, Average Temperature: 24.94°C, Average Humidity: 44.95%
Sensor ID: 8, Average Temperature: 24.88°C, Average Humidity: 44.98%
Sensor ID: 9, Average Temperature: 25.03°C, Average Humidity: 44.78%
Sensor ID: 10, Average Temperature: 25.12°C, Average Humidity: 45.60%
Sensor ID: 11, Average Temperature: 25.07°C, Average Humidity: 44.99%
Sensor ID: 12, Average Temperature: 24.91°C, Average Humidity: 45.17%
Sensor ID: 13, Average Temperature: 25.09°C, Average Humidity: 44.84%
Sensor ID: 14, Average Temperature: 24.92°C, Average Humidity: 45.15%
Sens