In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# --- Configuration ---
num_records = 20  # Adjust this number as needed for your simulation
# --- End Configuration ---

# Example for Smart Environmental Monitoring - Matching groupmate's structure
data = []

for i in range(num_records):
    # Generate random timestamp
    timestamp = datetime.now() - timedelta(minutes=np.random.randint(0, 1440))

    # Generate a unique/random sensor ID (keep this as it's good practice,
    # although your groupmate's output didn't explicitly show it as a column name,
    # it would be needed in a real system to identify readings)
    sensor_id = f"ENV-SENSOR-{np.random.randint(1000, 9999)}"

    # Simulate environmental data with plausible ranges - Matching groupmate's columns
    temperature_c = round(np.random.uniform(10.0, 40.0), 1)  # Temperature in Celsius
    humidity_percent = np.random.randint(20, 90)          # Humidity in percentage
    co2_ppm = np.random.randint(400, 2000)                # CO2 level in parts per million
    pm25_ug_m3 = round(np.random.uniform(5.0, 75.0), 1)   # PM2.5 in µg/m³ (e.g., 5-15 Good, 15-40 Moderate, 40-65 Unhealthy)
    soil_moisture_percent = np.random.randint(10, 80)    # Soil Moisture in percentage
    ph = round(np.random.uniform(5.5, 8.5), 1)           # pH (e.g., 5.5-7.0 slightly acidic, 7.0 neutral, 7.0-8.5 slightly alkaline)
    turbidity_ntu = round(np.random.uniform(1.0, 50.0), 1)# Turbidity in NTU (e.g., <5 typically good drinking water, higher indicates more suspended solids)
    # Assuming 'Contamination' is a generic pollutant level, perhaps 0-100 scale
    contamination_ppm = round(np.random.uniform(0.0, 10.0), 2) # Example contaminant level

    record = {
        # Note: Column names might need exact matching with groupmate's later if combining data
        # Using descriptive names for now, you might adjust based on your group's final decision
        "Timestamp": timestamp,
        "Sensor_ID": sensor_id, # Added sensor ID for realism, group can decide if needed in final CSV
        "Temperature (°C)": temperature_c,
        "Humidity (%)": humidity_percent,
        "CO2 (ppm)": co2_ppm,
        "PM2.5 (µg/m³)": pm25_ug_m3,
        "Soil Moisture (%)": soil_moisture_percent,
        "pH": ph,
        "Turbidity (NTU)": turbidity_ntu,
        "Contamination (ppm)": contamination_ppm
    }
    data.append(record)

# Convert to DataFrame
df = pd.DataFrame(data)

# Sort by timestamp (optional, but good for time-series)
df = df.sort_values(by="Timestamp").reset_index(drop=True)

# Save dataset
csv_filename = "environmental_data_group_consistent.csv" # Use a name indicating group agreement
json_filename = "environmental_data_group_consistent.json" # Optional

df.to_csv(csv_filename, index=False)
df.to_json(json_filename, orient="records", indent=4)

# Display first few rows and info to confirm
print("Generated Group-Consistent Data Head:")
print(df.head())

print("\nDataFrame Info:")
df.info()

print(f"\nSuccessfully generated {num_records} records with group-consistent parameters.")
print(f"Data saved to {csv_filename} and {json_filename}.")

Generated Group-Consistent Data Head:
                   Timestamp        Sensor_ID  Temperature (°C)  Humidity (%)  \
0 2025-05-08 21:48:35.718864  ENV-SENSOR-5697              30.9            47   
1 2025-05-08 23:09:35.718386  ENV-SENSOR-1224              28.9            53   
2 2025-05-08 23:44:35.716489  ENV-SENSOR-3181              14.4            60   
3 2025-05-09 00:11:35.718930  ENV-SENSOR-6687              38.3            63   
4 2025-05-09 01:00:35.719101  ENV-SENSOR-5454              22.9            55   

   CO2 (ppm)  PM2.5 (µg/m³)  Soil Moisture (%)   pH  Turbidity (NTU)  \
0        626           65.5                 19  6.6             28.9   
1        506           46.4                 42  7.8             39.6   
2       1590           60.4                 23  7.3              4.1   
3       1112           61.9                 39  6.6             14.7   
4       1724           15.9                 12  8.3             25.7   

   Contamination (ppm)  
0                