# Soil Bioinformatics ML - Simulation Demo

This notebook demonstrates the workflow of our soil sensor simulation and ML prediction system.

In [None]:
import sys
import os
sys.path.append('../src')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from data_pipeline.sensor_simulator import SensorSimulator
from preprocessing.data_processor import DataProcessor
from training.model_trainer import ModelTrainer

## Generate Simulated Sensor Data

In [None]:
# Initialize simulator
config_path = "../configs/simulation_config.yaml"
simulator = SensorSimulator(config_path)

# Generate 24 hours of data
data = simulator.generate_sensor_data(duration_hours=24)
data.head()

## Visualize Sensor Data

In [None]:
plt.figure(figsize=(15, 10))

# Plot CO2
plt.subplot(3, 1, 1)
plt.plot(data['timestamp'], data['co2_ppm'])
plt.title('CO2 Levels')
plt.ylabel('PPM')

# Plot pH
plt.subplot(3, 1, 2)
plt.plot(data['timestamp'], data['ph'])
plt.title('pH Levels')
plt.ylabel('pH')

# Plot moisture
plt.subplot(3, 1, 3)
plt.plot(data['timestamp'], data['moisture_percent'])
plt.title('Soil Moisture')
plt.ylabel('%')

plt.tight_layout()
plt.show()

## Process Data and Train Model

In [None]:
# Save raw data
simulator.save_data(data, "../data/raw/training_data.csv")

# Process data
processor = DataProcessor(config_path)
sequences, labels = processor.process_sensor_data("../data/raw/training_data.csv")

print(f"Created {len(sequences)} sequences of shape {sequences.shape}")

In [None]:
# Train model
trainer = ModelTrainer(config_path)
trainer.train(sequences, labels, epochs=50)

# Save model
trainer.save_model("../models/time_series/model.pth")

## Make Predictions

In [None]:
# Generate new test data
test_data = simulator.generate_sensor_data(duration_hours=2)
simulator.save_data(test_data, "../data/raw/test_data.csv")

# Process test data
test_sequences, _ = processor.process_sensor_data("../data/raw/test_data.csv")

# Make predictions
predictions = []
for sequence in test_sequences:
    pred = trainer.predict(sequence)
    predictions.append(processor.inverse_transform_co2(pred)[0][0])

# Plot results
plt.figure(figsize=(10, 6))
plt.plot(test_data['timestamp'][12:], predictions, label='Predicted CO2')
plt.plot(test_data['timestamp'], test_data['co2_ppm'], label='Actual CO2')
plt.title('CO2 Predictions vs Actual Values')
plt.xlabel('Time')
plt.ylabel('CO2 (ppm)')
plt.legend()
plt.show()