# IoT Environmental Monitoring Data Generation

This notebook generates simulated data for environmental monitoring sensors including:
- Air Quality (CO2 and PM2.5)
- Temperature & Humidity
- Soil Moisture
- Water Quality (pH and turbidity)

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)

In [2]:
# Generate timestamps for 24 hours of data at 5-minute intervals
timestamps = [datetime.now() - timedelta(minutes=i*5) for i in range(288)]
timestamps.reverse()  # Make timestamps go forward in time

# Initialize data dictionary
data = {
    'timestamp': timestamps,
    
    # Air Quality Sensors
    'co2_ppm': np.random.normal(400, 50, len(timestamps)),  # CO2 in parts per million
    'pm25_ugm3': np.random.normal(15, 5, len(timestamps)),  # PM2.5 in µg/m³
    
    # Temperature & Humidity Sensors
    'temperature_c': np.random.normal(25, 3, len(timestamps)),  # Temperature in Celsius
    'humidity_pct': np.random.normal(60, 10, len(timestamps)),  # Humidity percentage
    
    # Soil Moisture Sensors
    'soil_moisture_pct': np.random.normal(35, 5, len(timestamps)),  # Soil moisture percentage
    
    # Water Quality Sensors
    'water_ph': np.random.normal(7, 0.5, len(timestamps)),  # pH level
    'water_turbidity_ntu': np.random.normal(5, 1, len(timestamps)),  # Turbidity in NTU
}

In [3]:
# Create DataFrame
df = pd.DataFrame(data)

# Apply constraints to make data more realistic
df['co2_ppm'] = df['co2_ppm'].clip(350, 800)  # Typical outdoor CO2 range
df['pm25_ugm3'] = df['pm25_ugm3'].clip(0, 50)  # Realistic PM2.5 range
df['humidity_pct'] = df['humidity_pct'].clip(0, 100)  # Valid humidity range
df['soil_moisture_pct'] = df['soil_moisture_pct'].clip(0, 100)  # Valid moisture range
df['water_ph'] = df['water_ph'].clip(0, 14)  # Valid pH range
df['water_turbidity_ntu'] = df['water_turbidity_ntu'].clip(0, 20)  # Typical turbidity range

# Add some daily patterns to temperature
time_of_day = np.array([(t.hour + t.minute/60) for t in df['timestamp']])
temperature_variation = 2 * np.sin(2 * np.pi * (time_of_day - 14) / 24)  # Peak at 2 PM
df['temperature_c'] += temperature_variation

In [4]:
# Display the first few records
print("Dataset preview:")
df.head()

Dataset preview:


Unnamed: 0,timestamp,co2_ppm,pm25_ugm3,temperature_c,humidity_pct,soil_moisture_pct,water_ph,water_turbidity_ntu
0,2025-04-30 23:24:04.430936,424.835708,16.404959,26.965484,53.872113,31.805191,7.331441,5.367287
1,2025-04-30 23:29:04.430936,393.086785,11.886502,28.53703,56.122984,28.384551,7.586737,6.838184
2,2025-04-30 23:34:04.430936,432.384427,13.959389,21.753887,62.858654,43.210076,7.090511,4.776534
3,2025-04-30 23:39:04.430936,476.151493,12.534995,29.585553,63.344568,40.049085,6.351584,4.650683
4,2025-04-30 23:44:04.430936,388.292331,12.053176,27.133875,66.585443,31.559248,7.199844,4.98058


In [5]:
# Display summary statistics
print("Dataset summary statistics:")
df.describe()

Dataset summary statistics:


Unnamed: 0,timestamp,co2_ppm,pm25_ugm3,temperature_c,humidity_pct,soil_moisture_pct,water_ph,water_turbidity_ntu
count,288,288.0,288.0,288.0,288.0,288.0,288.0,288.0
mean,2025-05-01 11:21:34.430918400,403.335855,14.87625,25.125628,61.490568,35.220668,7.050106,4.991904
min,2025-04-30 23:24:04.430936,350.0,2.641777,15.106499,31.037446,22.044789,5.539325,2.059611
25%,2025-05-01 05:22:49.430936064,364.533002,11.526932,22.725243,55.166387,31.649114,6.730115,4.349056
50%,2025-05-01 11:21:34.430936064,402.594514,14.900205,25.14078,61.970604,35.186688,7.0385,4.973536
75%,2025-05-01 17:20:19.430936064,430.758703,18.115837,27.399124,67.632357,38.427358,7.371222,5.662269
max,2025-05-01 23:19:04.429935,592.636575,30.394404,34.102298,85.269324,47.898547,8.596554,8.137749
std,,43.7804,4.805645,3.393865,9.781277,5.054829,0.498191,0.981531


In [6]:
# Save to CSV and JSON
df.to_csv('iot_data.csv', index=False)
df.to_json('iot_data.json', orient='records')
print("Data saved to iot_data.csv and iot_data.json")

Data saved to iot_data.csv
