In [1]:
# Cell 1: Import Libraries
import pandas as pd
import numpy as np
import os
from datetime import timedelta

print("Libraries imported successfully.")

Libraries imported successfully.


In [2]:
# Cell 2: Configuration
# Define where to save the simulated data
output_dir = '../data/simulated/'
os.makedirs(output_dir, exist_ok=True)

# Define the crops we are focusing on
CROPS = ['Black Pepper', 'Arecanut', 'Banana']

print(f"Configuration set. Output will be saved to: {output_dir}")

Configuration set. Output will be saved to: ../data/simulated/


In [3]:
# Cell 3: Simulate Environmental Time-Series Data
print("Simulating environmental data...")

# Simulate 1 year of hourly data for 15 different plots
dates = pd.date_range(start='2024-01-01', periods=365*24, freq='h')
all_plots_env_data = []

for plot_id in range(15):
    # Base seasonal patterns for a tropical climate (like in parts of India)
    temp_base = 28  # Average temperature
    temp_season_amplitude = 5 # Seasonal variation
    temp_season = temp_base - temp_season_amplitude * np.cos(2 * np.pi * (dates.dayofyear - 120) / 365) # Peak in May

    humidity_base = 80 # Average humidity
    humidity_season_amplitude = 15
    humidity_season = humidity_base + humidity_season_amplitude * np.sin(2 * np.pi * (dates.dayofyear - 180) / 365) # Peak in monsoon (July)

    # Add daily noise and random variations per plot
    temp_daily_noise = np.random.normal(0, 0.8, len(dates))
    humidity_daily_noise = np.random.normal(0, 3, len(dates))
    
    temp = temp_season + temp_daily_noise + np.random.uniform(-0.5, 0.5)
    humidity = np.clip(humidity_season + humidity_daily_noise + np.random.uniform(-3, 3), 40, 100)
    
    # Soil moisture is influenced by humidity/rain and has its own drying trend
    soil_moisture = np.interp(humidity, (40, 100), (35, 90)) + np.random.normal(0, 5)
    
    # pH is relatively stable with minor fluctuations
    ph = 6.2 + np.random.uniform(-0.4, 0.4) + np.random.normal(0, 0.05, len(dates))
    
    df = pd.DataFrame({
        'timestamp': dates,
        'plot_id': plot_id,
        'temperature': temp.round(2),
        'humidity': humidity.round(2),
        'soil_moisture': np.clip(soil_moisture, 0, 100).round(2),
        'soil_ph': np.clip(ph, 4, 9).round(2)
    })
    all_plots_env_data.append(df)

environmental_df = pd.concat(all_plots_env_data)
environmental_df.to_csv(os.path.join(output_dir, 'environmental_data.csv'), index=False)

print(f"Generated environmental data for {environmental_df['plot_id'].nunique()} plots.")
print(environmental_df.head())

Simulating environmental data...
Generated environmental data for 15 plots.
            timestamp  plot_id  temperature  humidity  soil_moisture  soil_ph
0 2024-01-01 00:00:00        0        30.67     74.29          58.80     5.75
1 2024-01-01 01:00:00        0        30.92     80.56          64.54     5.90
2 2024-01-01 02:00:00        0        30.22     77.35          61.61     5.88
3 2024-01-01 03:00:00        0        29.81     85.52          69.10     5.72
4 2024-01-01 04:00:00        0        31.43     81.97          65.84     5.79


In [5]:
# Cell 4: Simulate Microbial and Soil Nutrient Data
print("\nSimulating soil and microbial data...")

# One entry per plot, representing a soil test
soil_data_list = []
for plot_id in range(15):
    # Assign a primary crop and a mix
    primary_crop = np.random.choice(CROPS)
    companions = [c for c in CROPS if c != primary_crop]
    crop_mix = f"{primary_crop} + {np.random.choice(companions)}"

    # Simulate microbe counts and nutrient levels
    # Let's create a simple correlation: higher organic carbon -> more microbes
    organic_carbon = np.random.uniform(0.7, 3.0)
    microbe_a_count = int(1e5 * organic_carbon + np.random.normal(0, 5e4))
    microbe_b_count = int(8e4 * organic_carbon + np.random.normal(0, 4e4))
    
    # Simulate yield based on a pseudo-formula for realism
    # Good conditions: high N, P, K, and balanced pH
    nitrogen = np.random.uniform(280, 550)
    phosphorus = np.random.uniform(25, 70)
    potassium = np.random.uniform(170, 380)
    
    yield_base = 1000
    yield_factor_n = (nitrogen / 400)
    yield_factor_p = (phosphorus / 50)
    yield_factor_k = (potassium / 250)
    
    # The target variable for our DNN model
    final_yield = yield_base * yield_factor_n * yield_factor_p * yield_factor_k + np.random.normal(0, 150)
    
    soil_data_list.append({
        'plot_id': plot_id,
        'primary_crop': primary_crop,
        'crop_mix': crop_mix,
        'microbe_trichoderma_cfu_g': np.clip(microbe_a_count, 1e4, 1e7),
        'microbe_pseudomonas_cfu_g': np.clip(microbe_b_count, 8e3, 8e6),
        'organic_carbon_percent': round(organic_carbon, 2),
        'nitrogen_kg_ha': round(nitrogen, 2),
        'phosphorus_kg_ha': round(phosphorus, 2),
        'potassium_kg_ha': round(potassium, 2),
        'yield_kg_ha': np.clip(final_yield, 400, 5000).round(2)
    })

soil_df = pd.DataFrame(soil_data_list)
soil_df.to_csv(os.path.join(output_dir, 'soil_microbe_data.csv'), index=False)

print(f"Generated soil/microbial data for {len(soil_df)} plots.")
print(soil_df.head())


Simulating soil and microbial data...
Generated soil/microbial data for 15 plots.
   plot_id  primary_crop                 crop_mix  microbe_trichoderma_cfu_g  \
0        0  Black Pepper  Black Pepper + Arecanut                   237940.0   
1        1        Banana        Banana + Arecanut                    38712.0   
2        2      Arecanut  Arecanut + Black Pepper                   166993.0   
3        3  Black Pepper    Black Pepper + Banana                   164362.0   
4        4  Black Pepper  Black Pepper + Arecanut                   240067.0   

   microbe_pseudomonas_cfu_g  organic_carbon_percent  nitrogen_kg_ha  \
0                   128595.0                    2.30          471.73   
1                    38878.0                    1.09          355.07   
2                    74996.0                    1.64          325.03   
3                   115056.0                    1.24          430.80   
4                   147384.0                    2.21          399.96   

   