In [3]:
import pandas as pd
import numpy as np

# Parameters for generating synthetic data
num_samples = 4000  # Number of rows in the dataset
random_seed = 42
np.random.seed(random_seed)

# Generating synthetic data
data = {
    "Soil_Moisture": np.random.uniform(10, 100, num_samples),  # Soil moisture in %
    "Temperature": np.random.uniform(10, 40, num_samples),     # Temperature in °C
    "Humidity": np.random.uniform(20, 90, num_samples),        # Humidity in %
    "Rain_Forecast_2H": np.random.uniform(0, 1, num_samples),  # Rain probability in next 2 hours
    "Rain_Forecast_8H": np.random.uniform(0, 1, num_samples),  # Rain probability in next 8 hours
    "Rain_Forecast_16H": np.random.uniform(0, 1, num_samples), # Rain probability in next 16 hours
    "Rain_Forecast_1D": np.random.uniform(0, 1, num_samples),  # Rain probability in 1 day
    "Rain_Volume": np.random.uniform(0, 2, num_samples),       # Rain volume (0-2, heavy = 2)
    "Wind_Speed": np.random.uniform(0, 15, num_samples),       # Wind speed in m/s
    "Solar_Radiation": np.random.uniform(100, 1000, num_samples), # Solar radiation in W/m²
    "Soil_Type": np.random.choice(["Sandy", "Clay", "Loamy"], num_samples), # Soil type
    "Soil_Perm": np.random.uniform(0.1, 1.0, num_samples),     # Soil permeability (0-1 scale)
    "Soil_Depth": np.random.uniform(10, 100, num_samples),     # Soil depth in cm
    "Crop_Type": np.random.choice(["Rice", "Wheat", "Barley", "Legumes", "Sugarcane"], num_samples), # Crop type
    "Growth_Stage": np.random.choice(["Seedling", "Vegetative", "Flowering", "Maturity"], num_samples), # Growth stage
    "Previous_Watering": np.random.uniform(0, 50, num_samples), # Amount of water previously applied in mm
    "Time_Since_Irrigation": np.random.uniform(0, 7, num_samples), # Days since last irrigation
    "Irrigation_Efficiency": np.random.uniform(0.6, 1.0, num_samples), # Efficiency of irrigation system
    "Water_Deficit": np.random.uniform(0, 30, num_samples),     # Water deficit in mm
    "Evapotranspiration": np.random.uniform(0, 8, num_samples), # ET rate in mm/day
    "Cumulative_Rain_Impact": np.random.uniform(0, 50, num_samples), # Cumulative rain impact in mm
    "Time_of_Day": np.random.choice(["Morning", "Afternoon", "Evening"], num_samples), # Time of irrigation
    "Season": np.random.choice(["Spring", "Summer", "Autumn", "Winter"], num_samples), # Season
}

# Create DataFrame
dataset = pd.DataFrame(data)

# Encode string columns to integers
categorical_columns = ["Soil_Type", "Crop_Type", "Growth_Stage", "Time_of_Day", "Season"]
for column in categorical_columns:
    dataset[column] = dataset[column].astype('category').cat.codes  # Convert to integer codes

# Adding patterns to the watering duration
dataset['Watering_Duration'] = (
    0.5 * dataset['Soil_Moisture']  # Higher soil moisture reduces watering needs
    - 0.2 * dataset['Rain_Forecast_1D'] * 100  # Higher rain probability reduces duration
    + 0.1 * dataset['Temperature']  # Higher temperature increases duration
    - 0.05 * dataset['Humidity']  # Higher humidity slightly reduces duration
    + 0.3 * dataset['Evapotranspiration']  # Higher ET increases watering needs
    + 15  # Base duration
).clip(lower=5, upper=60)  # Keeping within realistic limits of 5 to 60 minutes

# Rounding the time to whole minutes
dataset['Watering_Duration'] = dataset['Watering_Duration'].round()

# Save the dataset to a CSV file
dataset.to_csv("precise_irrigation_dataset.csv", index=False)

print("Dataset created and saved as 'precise_irrigation_dataset.csv'.")


Dataset created and saved as 'precise_irrigation_dataset.csv'.
