# Crop-Water-Soil MTL Platform — Quickstart Demo

This notebook demonstrates the end-to-end pipeline: data → preprocessing → training → evaluation.

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent))

import numpy as np
from src.data.ingestion import DataIngestion
from src.data.preprocessing import DataPreprocessor
from src.data.feature_engineering import FeatureEngineer

## 1. Load Data (Synthetic)

In [None]:
ingestion = DataIngestion(data_root="../data")
data = ingestion.load_all(use_synthetic=True)
for k, v in data.items():
    print(f"{k}: {v.shape}")

## 2. Preprocess & Feature Engineering

In [None]:
preprocessor = DataPreprocessor(sequence_length=24)
merged, _, _ = preprocessor.preprocess_pipeline(data)
engineer = FeatureEngineer()
merged = engineer.transform(merged)
print(f"Merged shape: {merged.shape}")

## 3. Create Sequences & Train

In [None]:
exclude = ["region_id", "time_step", "ndvi", "organic_carbon", "groundwater_level",
           "nitrogen", "phosphorus", "potassium"]
feature_cols = [c for c in merged.select_dtypes(include=[np.number]).columns if c not in exclude][:64]
target_cols = {"crop": "ndvi", "soil": ["nitrogen", "phosphorus", "potassium", "organic_carbon"], "water": "groundwater_level"}
X, targets = preprocessor.create_sequences(merged, feature_cols, target_cols)
print(f"X: {X.shape}, crop: {targets['crop'].shape}, soil: {targets['soil'].shape}, water: {targets['water'].shape}")

In [None]:
import torch
import yaml
from src.models.mtl_model import CropWaterSoilMTL
from src.training.trainer import Trainer

with open("../configs/model.yaml") as f:
    config = yaml.safe_load(f)

targets_t = {
    "crop": torch.tensor(targets["crop"].reshape(-1, 1).astype(np.float32)),
    "soil": torch.tensor(targets["soil"].astype(np.float32) if targets["soil"].ndim == 2 else np.column_stack([targets["soil"]]*4).astype(np.float32)),
    "water": torch.tensor(targets["water"].reshape(-1, 1).astype(np.float32)),
}
X_t = torch.tensor(X, dtype=torch.float32)

enc = config["model"]["shared_encoder"]
heads = config["model"]["heads"]
model = CropWaterSoilMTL(
    input_dim=X.shape[2],
    encoder_config={"hidden_dim": enc["hidden_dim"], "num_layers": enc["num_layers"], "dropout": enc["dropout"], "encoder_type": enc["type"], "bidirectional": enc["bidirectional"]},
    crop_config=heads["crop"], soil_config=heads["soil"], water_config=heads["water"],
)
trainer = Trainer(model=model, config=config)
history = trainer.fit(X_t, targets_t)
print("Training complete.")

## 4. Quick Evaluation

In [None]:
model.eval()
with torch.no_grad():
    pred = model(X_t)
for task in ["crop", "soil", "water"]:
    p, t = pred[task].numpy(), targets_t[task].numpy()
    rmse = np.sqrt(np.mean((p - t) ** 2))
    print(f"{task} RMSE: {rmse:.4f}")