In [53]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Generating synthetic dataset: configuration

In [54]:
N = 2000                              # Number of samples
interval_minutes = 10                 # Sensor frequency (every 10 minutes)
start_time = datetime(2024, 1, 1, 0, 0)

# Environmental baseline values
soil_moisture = 45.0                 # % VWC
temperature = 24.0                   # °C
humidity = 55.0                      # %
rainfall_forecast = 0.0              # mm (24h)

# Optimal thresholds (You can adjust for your greenhouse)
OPTIMAL_MIN_MOISTURE = 40
OPTIMAL_MAX_MOISTURE = 60
RAINFALL_THRESHOLD = 5               # If forecasted rain < 5 mm → irrigation needed

# Helper functions
def evaporation(temp, humidity):
    """Simple evaporation model: hotter + drier = more evaporation."""
    return max(0, 0.15 + 0.02 * (temp - 20) - 0.01 * (humidity - 50))

def rainfall_effect(rain):
    """Rain increases soil moisture slightly."""
    return 0.5 * rain

# Generating synthetic dataset: Data generation loop

In [55]:
timestamps = []
soil_list = []
temp_list = []
hum_list = []
rain_list = []
irrigation_list = []

current_time = start_time

for i in range(N):

    # 1) GENERATE TIMESTAMP
    timestamps.append(current_time)
    current_time += timedelta(minutes=interval_minutes)

    # 2) UPDATE ENV VARs
    temperature += np.random.uniform(-0.2, 0.2)
    humidity += np.random.uniform(-0.3, 0.3)

    # Daily rainfall forecast randomness
    if i % int(24*60/interval_minutes) == 0:
        rainfall_forecast = max(0, np.random.normal(3, 2))
    
    # Soil moisture dynamic 
    soil_moisture = soil_moisture \
                     - evaporation(temperature, humidity) \
                     + rainfall_effect(rainfall_forecast) \
                     + np.random.uniform(-0.1, 0.1)  # tiny noise

    soil_moisture = np.clip(soil_moisture, 10, 80)

    # 3) CONTROL LOGIC
    if soil_moisture < OPTIMAL_MIN_MOISTURE and rainfall_forecast < RAINFALL_THRESHOLD:
        irrigation = "HIGH"
    elif OPTIMAL_MIN_MOISTURE <= soil_moisture <= OPTIMAL_MAX_MOISTURE:
        irrigation = "MEDIUM"
    else:
        irrigation = "LOW"

    # Append data
    soil_list.append(soil_moisture)
    temp_list.append(temperature)
    hum_list.append(humidity)
    rain_list.append(rainfall_forecast)
    irrigation_list.append(irrigation)
       
print(soil_list)
print(temp_list)
print(hum_list)
print(rain_list)
print(irrigation_list)

[np.float64(44.80198834023034), np.float64(44.63744435232573), np.float64(44.391094325909854), np.float64(44.12777518232976), np.float64(44.03196345790838), np.float64(43.857450250593004), np.float64(43.67374127025936), np.float64(43.49383058495847), np.float64(43.33579050341335), np.float64(43.17862691157089), np.float64(42.927144820235995), np.float64(42.68843973169322), np.float64(42.548549712098406), np.float64(42.45023622854613), np.float64(42.27660856854718), np.float64(42.00676508264544), np.float64(41.893105948833785), np.float64(41.65764814353518), np.float64(41.50644126236371), np.float64(41.30770401373223), np.float64(41.20668938967745), np.float64(41.033526557809324), np.float64(40.7930203401412), np.float64(40.540094129936456), np.float64(40.346996183340536), np.float64(40.09746460639992), np.float64(39.81729833808857), np.float64(39.62149374269689), np.float64(39.37992336326832), np.float64(39.25659232294196), np.float64(39.07667294961979), np.float64(38.79278644059874), 

# Create dataframe and save it to csv

In [56]:
df = pd.DataFrame({
    "timestamp": timestamps,
    "soil_moisture": soil_list,
    "temperature": temp_list,
    "humidity": hum_list,
    "rainfall_forecast": rain_list,
    "irrigation_action": irrigation_list
})

# Save to CSV
df.to_csv("synthetic_greenhouse_dataset.csv", index=False)

df.head()

Unnamed: 0,timestamp,soil_moisture,temperature,humidity,rainfall_forecast,irrigation_action
0,2024-01-01 00:00:00,44.801988,23.820097,55.143749,0.0,MEDIUM
1,2024-01-01 00:10:00,44.637444,23.742953,55.316715,0.0,MEDIUM
2,2024-01-01 00:20:00,44.391094,23.673008,55.512332,0.0,MEDIUM
3,2024-01-01 00:30:00,44.127775,23.621291,55.235793,0.0,MEDIUM
4,2024-01-01 00:40:00,44.031963,23.754281,55.296063,0.0,MEDIUM
