In [27]:
# Install required packages
# If using system Python, this will use --break-system-packages flag
import sys
import subprocess

# Check if we're in a virtual environment
in_venv = hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix)

packages = ["streamlit", "tensorflow", "pandas", "numpy", "matplotlib", "seaborn", "scikit-learn", "scipy"]

for package in packages:
    cmd = [sys.executable, "-m", "pip", "install", package, "--quiet"]
    if not in_venv:
        cmd.append("--break-system-packages")
    subprocess.check_call(cmd)

from dataclasses import dataclass
from typing import Tuple, List

import os
import math
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, losses, metrics

In [28]:
@dataclass
class Config:
    # длина окна по времени (сколько точек траектории подаём на вход)
    seq_len: int = 20

    # число признаков на одну точку (сейчас: lat, lon)
    n_features: int = 2

    batch_size: int = 64
    learning_rate: float = 1e-3
    epochs: int = 10

    # horizon_steps — на сколько шагов вперёд смотрим:
    # войдёт ли траектория в AOI в ближайшие horizon_steps точек
    horizon_steps: int = 10

    # Максимальное количество образцов для обучения (None = использовать все)
    max_samples: int = 1000000  # 1M samples для начала

    # Размер буфера для перемешивания (меньше = меньше памяти)
    shuffle_buffer_size: int = 100000  # 100K samples

    # путь до корня Geolife (папка, где лежат подпапки пользователей и .plt файлы)
    geolife_root: str = "../geo-date/Data" 

    # AOI — прямоугольник (min_lat, min_lon, max_lat, max_lon)
    # Здесь пример - прямоугольник в Пекине (примерные координаты).
    aoi_rect: Tuple[float, float, float, float] = (
        39.90, 116.38, 39.92, 116.42
    )


CFG = Config()

In [29]:
def point_in_rect(lat: float, lon: float, rect: Tuple[float, float, float, float]) -> bool:
    """
    Проверяет, лежит ли точка (lat, lon) внутри прямоугольника rect.

    rect = (min_lat, min_lon, max_lat, max_lon)
    """
    min_lat, min_lon, max_lat, max_lon = rect
    return (min_lat <= lat <= max_lat) and (min_lon <= lon <= max_lon)


def read_geolife_plt_file(path: str) -> np.ndarray:
    """
    Читает один .plt файл Geolife и возвращает массив координат.

    Формат строк в Geolife .plt (после 6 строк заголовка):
    lat, lon, 0, altitude, days, date, time

    Здесь мы берём только lat и lon.

    Возвращает:
      numpy array формы (num_points, 2) → [lat, lon]
    """
    coords: List[Tuple[float, float]] = []

    with open(path, "r") as f:
        lines = f.readlines()

    # первые 6 строк — заголовок, пропускаем
    for line in lines[6:]:
        parts = line.strip().split(",")
        if len(parts) < 2:
            continue
        try:
            lat = float(parts[0])
            lon = float(parts[1])
            coords.append((lat, lon))
        except ValueError:
            # если строка битая, просто пропускаем
            continue

    if not coords:
        return np.empty((0, 2), dtype="float32")

    return np.array(coords, dtype="float32")


def load_geolife_trajectories(root_dir: str) -> List[np.ndarray]:
    """
    Обходит папку Geolife и собирает траектории.

    Структура Geolife обычно такая:
    root_dir/
      000/
        Trajectory/
          *.plt
      001/
        Trajectory/
          *.plt
      ...

    На выходе получаем список траекторий, каждая — массив (num_points, 2).
    """
    trajectories: List[np.ndarray] = []

    for user_id in os.listdir(root_dir):
        user_path = os.path.join(root_dir, user_id)
        traj_dir = os.path.join(user_path, "Trajectory")
        if not os.path.isdir(traj_dir):
            continue

        for fname in os.listdir(traj_dir):
            if not fname.endswith(".plt"):
                continue
            path = os.path.join(traj_dir, fname)
            coords = read_geolife_plt_file(path)
            if coords.shape[0] >= CFG.seq_len + CFG.horizon_steps + 1:
                trajectories.append(coords)

    print(f"Loaded {len(trajectories)} trajectories from Geolife.")
    return trajectories


def build_windows_and_labels(
    trajectories: List[np.ndarray],
    seq_len: int,
    horizon_steps: int,
    aoi_rect: Tuple[float, float, float, float],
    max_samples: int = None
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Из списка траекторий строим обучающие окна и метки.

    Для каждой траектории:
      траектория = [p0, p1, ..., pN], p_i = (lat, lon)

    Берём окна:
      X_window = p_{i-seq_len} ... p_{i-1}
    и смотрим вперёд:
      future_segment = p_i ... p_{i + horizon_steps - 1}

    Если future_segment хотя бы в одной точке заходит в AOI → y = 1
    иначе → y = 0

    Возвращает:
      X: (num_samples, seq_len, 2)
      y: (num_samples,)
    """
    X_list: List[np.ndarray] = []
    y_list: List[float] = []

    for traj in trajectories:
        num_points = traj.shape[0]
        # i — индекс текущего "начала" временного окна (будем предсказывать с этого момента вперёд)
        # окно берём [i-seq_len, i), будущее [i, i+horizon_steps)
        for i in range(seq_len, num_points - horizon_steps):
            # Ограничиваем количество образцов, если указано
            if max_samples is not None and len(X_list) >= max_samples:
                break
            
            past_window = traj[i - seq_len:i]              # (seq_len, 2)
            future_segment = traj[i:i + horizon_steps]     # (horizon_steps, 2)

            # метка: войдёт ли объект в AOI в ближайшее будущее
            will_enter = any(
                point_in_rect(float(lat), float(lon), aoi_rect)
                for (lat, lon) in future_segment
            )
            label = 1.0 if will_enter else 0.0

            X_list.append(past_window)
            y_list.append(label)
        
        # Прерываем внешний цикл, если достигли лимита
        if max_samples is not None and len(X_list) >= max_samples:
            break

    if not X_list:
        raise RuntimeError("No training samples built. "
                           "Check that trajectories are long enough and root path is correct.")

    X = np.stack(X_list).astype("float32")   # (num_samples, seq_len, 2)
    y = np.array(y_list, dtype="float32")    # (num_samples,)

    print(f"Built windows: X shape={X.shape}, y shape={y.shape}")
    if max_samples is not None and len(X_list) >= max_samples:
        print(f"Limited to {max_samples} samples due to max_samples setting.")
    return X, y


def make_tf_datasets(
    X: np.ndarray,
    y: np.ndarray,
    validation_split: float = 0.2,
    shuffle_buffer_size: int = None
) -> Tuple[tf.data.Dataset, tf.data.Dataset]:
    """
    Разбивает массивы X, y на train/val и заворачивает в tf.data.Dataset.
    """
    num_samples = X.shape[0]
    split_idx = int(num_samples * (1.0 - validation_split))

    X_train, X_val = X[:split_idx], X[split_idx:]
    y_train, y_val = y[:split_idx], y[split_idx:]

    # Используем разумный размер буфера для перемешивания
    if shuffle_buffer_size is None:
        shuffle_buffer_size = min(CFG.shuffle_buffer_size, len(X_train))
    else:
        shuffle_buffer_size = min(shuffle_buffer_size, len(X_train))

    train_ds = (
        tf.data.Dataset.from_tensor_slices((X_train, y_train))
        .shuffle(buffer_size=shuffle_buffer_size)
        .batch(CFG.batch_size)
        .prefetch(tf.data.AUTOTUNE)
    )

    val_ds = (
        tf.data.Dataset.from_tensor_slices((X_val, y_val))
        .batch(CFG.batch_size)
        .prefetch(tf.data.AUTOTUNE)
    )

    return train_ds, val_ds

In [30]:
def build_cnn_gru_model(
    seq_len: int,
    n_features: int
) -> tf.keras.Model:
    """
    Модель:
      Input (seq_len, n_features)
        → Conv1D (ловит локальные паттерны движения)
        → MaxPool (упрощает последовательность)
        → GRU (запоминает историю)
        → Dense → Sigmoid (вероятность входа в AOI)
    """
    inputs = layers.Input(shape=(seq_len, n_features), name="trajectory_window")

    x = layers.Conv1D(
        filters=32,
        kernel_size=3,
        padding="same",
        activation="relu",
        name="conv1d_1"
    )(inputs)

    x = layers.MaxPooling1D(pool_size=2, name="maxpool_1")(x)

    x = layers.GRU(64, name="gru_1")(x)

    x = layers.Dense(64, activation="relu", name="dense_1")(x)
    x = layers.Dropout(0.3, name="dropout_1")(x)

    outputs = layers.Dense(1, activation="sigmoid", name="output")(x)

    model = models.Model(inputs=inputs, outputs=outputs, name="GeoFence_CNN_GRU")

    model.compile(
        optimizer=optimizers.Adam(learning_rate=CFG.learning_rate),
        loss=losses.BinaryCrossentropy(),
        metrics=[
            metrics.BinaryAccuracy(name="accuracy"),
            metrics.AUC(name="auc"),
        ],
    )

    return model

In [31]:
def train_model() -> tf.keras.Model:
    # 1) Загружаем траектории
    trajectories = load_geolife_trajectories(CFG.geolife_root)

    # 2) Строим окна и метки
    X, y = build_windows_and_labels(
        trajectories,
        seq_len=CFG.seq_len,
        horizon_steps=CFG.horizon_steps,
        aoi_rect=CFG.aoi_rect,
        max_samples=CFG.max_samples
    )

    # 3) Train/val split
    train_ds, val_ds = make_tf_datasets(
        X, y, 
        validation_split=0.2,
        shuffle_buffer_size=CFG.shuffle_buffer_size
    )

    # 4) Модель
    model = build_cnn_gru_model(seq_len=CFG.seq_len, n_features=CFG.n_features)
    model.summary()

    # ======= CALLBACKS =======
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
        "best_geofence_model.keras",
        monitor="val_auc",
        mode="max",
        save_best_only=True,
        verbose=1,
    )

    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.5,
        patience=2,
        min_lr=1e-6,
        verbose=1
    )

    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True,
        verbose=1
    )
    # ==========================

    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=CFG.epochs,
        callbacks=[checkpoint_cb, reduce_lr, early_stop],
    )

    print("Training finished.")

    # ===== EXPORT FOR JAVA =====
    export_dir = "exported_geofence_model"
    model.export(export_dir)
    print("SavedModel exported successfully to:", export_dir)

    return model

In [32]:
def predict_one_window(
    model: tf.keras.Model,
    window: np.ndarray
) -> float:
    """
    Предсказание для одного окна траектории.

    window: numpy array формы (seq_len, n_features)
    Возвращает вероятность входа в AOI в ближайшие horizon_steps шагов.
    """
    assert window.shape == (CFG.seq_len, CFG.n_features), (
        f"Expected window shape {(CFG.seq_len, CFG.n_features)}, "
        f"got {window.shape}"
    )

    window_batch = np.expand_dims(window, axis=0)  # (1, seq_len, n_features)
    prob = model.predict(window_batch, verbose=0)[0, 0]
    return float(prob)

In [33]:
if __name__ == "__main__":
    model = train_model()

    # Пример окна
    example_window = np.random.rand(CFG.seq_len, CFG.n_features).astype("float32")
    prob = predict_one_window(model, example_window)
    print(f"Intrusion probability in next {CFG.horizon_steps} steps: {prob:.3f}")

Loaded 17678 trajectories from Geolife.
Built windows: X shape=(1000000, 20, 2), y shape=(1000000,)
Limited to 1000000 samples due to max_samples setting.


2025-12-04 14:29:42.743896: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 128000000 exceeds 10% of free system memory.
2025-12-04 14:29:42.903349: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 128000000 exceeds 10% of free system memory.


Epoch 1/10
[1m12489/12500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.9951 - auc: 0.5418 - loss: 0.0284
Epoch 1: val_auc improved from None to 0.50000, saving model to best_geofence_model.keras
[1m12500/12500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 5ms/step - accuracy: 0.9954 - auc: 0.5410 - loss: 0.0301 - val_accuracy: 0.9934 - val_auc: 0.5000 - val_loss: 0.0398 - learning_rate: 0.0010
Epoch 2/10
[1m12494/12500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.9963 - auc: 0.5285 - loss: 0.0252
Epoch 2: val_auc did not improve from 0.50000
[1m12500/12500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 5ms/step - accuracy: 0.9956 - auc: 0.5355 - loss: 0.0294 - val_accuracy: 0.9934 - val_auc: 0.5000 - val_loss: 0.0398 - learning_rate: 0.0010
Epoch 3/10
[1m12490/12500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.9963 - auc: 0.5317 - loss: 0.0250
Epoch 3: val_auc did not i

INFO:tensorflow:Assets written to: exported_geofence_model/assets


Saved artifact at 'exported_geofence_model'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 20, 2), dtype=tf.float32, name='trajectory_window')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  132523504992528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132523505003472: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132523504995216: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132523505001360: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132523504995408: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132523500893584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132523500894928: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132523500889936: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132523500893200: TensorSpec(shape=(), dtype=tf.resource, name=None)
SavedModel exported successfully to: exported_geofence_model
Intrusion probabili

In [34]:
lat_min, lon_min, lat_max, lon_max = CFG.aoi_rect

inside_count = 0

for traj in trajectories:
    if np.any(
        (traj[:,0] >= lat_min) &
        (traj[:,0] <= lat_max) &
        (traj[:,1] >= lon_min) &
        (traj[:,1] <= lon_max)
    ):
        inside_count += 1

print("Trajectories that EVER touch AOI:", inside_count)
print("Total trajectories:", len(trajectories))


NameError: name 'trajectories' is not defined

In [None]:
# ===============================
# 1. Установка пакетов (можно убрать, если всё уже стоит)
# ===============================
import sys
import subprocess

in_venv = hasattr(sys, "real_prefix") or (
    hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix
)

packages = [
    "streamlit",
    "tensorflow",
    "pandas",
    "numpy",
    "matplotlib",
    "seaborn",
    "scikit-learn",
    "scipy",
]

for package in packages:
    cmd = [sys.executable, "-m", "pip", "install", package, "--quiet"]
    if not in_venv:
        cmd.append("--break-system-packages")
    subprocess.check_call(cmd)

# ===============================
# 2. Импорты и конфиг
# ===============================
from dataclasses import dataclass
from typing import Tuple, List

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, losses, metrics


@dataclass
class Config:
    # длина окна по времени (сколько точек траектории подаём на вход)
    seq_len: int = 20

    # число признаков на одну точку (сейчас: lat, lon)
    n_features: int = 2

    batch_size: int = 64
    learning_rate: float = 1e-3
    epochs: int = 10

    # horizon_steps — на сколько шагов вперёд смотрим:
    # войдёт ли траектория в AOI в ближайшие horizon_steps точек
    horizon_steps: int = 10

    # Максимальное количество образцов для обучения (None = использовать все)
    max_samples: int = 1_000_000  # 1M samples для начала

    # Размер буфера для перемешивания (меньше = меньше памяти)
    shuffle_buffer_size: int = 100_000  # 100K samples

    # путь до корня Geolife (папка, где лежат подпапки пользователей и .plt файлы)
    geolife_root: str = "../geo-date/Data"

    # AOI — прямоугольник (min_lat, min_lon, max_lat, max_lon)
    # Прямоугольник в Пекине (примерные координаты).
    aoi_rect: Tuple[float, float, float, float] = (39.90, 116.38, 39.92, 116.42)


CFG = Config()


# ===============================
# 3. Вспомогательные функции
# ===============================
def point_in_rect(lat: float, lon: float, rect: Tuple[float, float, float, float]) -> bool:
    """
    Проверяет, лежит ли точка (lat, lon) внутри прямоугольника rect.

    rect = (min_lat, min_lon, max_lat, max_lon)
    """
    min_lat, min_lon, max_lat, max_lon = rect
    return (min_lat <= lat <= max_lat) and (min_lon <= lon <= max_lon)


def read_geolife_plt_file(path: str) -> np.ndarray:
    """
    Читает один .plt файл Geolife и возвращает массив координат.

    Формат строк в Geolife .plt (после 6 строк заголовка):
    lat, lon, 0, altitude, days, date, time

    Здесь мы берём только lat и lon.

    Возвращает:
      numpy array формы (num_points, 2) → [lat, lon]
    """
    coords: List[Tuple[float, float]] = []

    with open(path, "r") as f:
        lines = f.readlines()

    # первые 6 строк — заголовок, пропускаем
    for line in lines[6:]:
        parts = line.strip().split(",")
        if len(parts) < 2:
            continue
        try:
            lat = float(parts[0])
            lon = float(parts[1])
            coords.append((lat, lon))
        except ValueError:
            # если строка битая, просто пропускаем
            continue

    if not coords:
        return np.empty((0, 2), dtype="float32")

    return np.array(coords, dtype="float32")


def load_geolife_trajectories(root_dir: str) -> List[np.ndarray]:
    """
    Обходит папку Geolife и собирает траектории.

    Структура Geolife обычно такая:
    root_dir/
      000/
        Trajectory/
          *.plt
      001/
        Trajectory/
          *.plt
      ...

    На выходе получаем список траекторий, каждая — массив (num_points, 2).
    """
    trajectories: List[np.ndarray] = []

    for user_id in os.listdir(root_dir):
        user_path = os.path.join(root_dir, user_id)
        traj_dir = os.path.join(user_path, "Trajectory")
        if not os.path.isdir(traj_dir):
            continue

        for fname in os.listdir(tr_dir := traj_dir):
            if not fname.endswith(".plt"):
                continue
            path = os.path.join(tr_dir, fname)
            coords = read_geolife_plt_file(path)
            if coords.shape[0] >= CFG.seq_len + CFG.horizon_steps + 1:
                trajectories.append(coords)

    print(f"Loaded {len(trajectories)} trajectories from Geolife.")
    return trajectories


def build_windows_and_labels(
    trajectories: List[np.ndarray],
    seq_len: int,
    horizon_steps: int,
    aoi_rect: Tuple[float, float, float, float],
    max_samples: int = None,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Из списка траекторий строим обучающие окна и метки.

    Для каждой траектории:
      траектория = [p0, p1, ..., pN], p_i = (lat, lon)

    Берём окна:
      X_window = p_{i-seq_len} ... p_{i-1}
    и смотрим вперёд:
      future_segment = p_i ... p_{i + horizon_steps - 1}

    Если future_segment хотя бы в одной точке заходит в AOI → y = 1
    иначе → y = 0

    Возвращает:
      X: (num_samples, seq_len, 2)
      y: (num_samples,)
    """
    X_list: List[np.ndarray] = []
    y_list: List[float] = []

    for traj in trajectories:
        num_points = traj.shape[0]
        for i in range(seq_len, num_points - horizon_steps):
            if max_samples is not None and len(X_list) >= max_samples:
                break

            past_window = traj[i - seq_len : i]  # (seq_len, 2)
            future_segment = traj[i : i + horizon_steps]  # (horizon_steps, 2)

            will_enter = any(
                point_in_rect(float(lat), float(lon), aoi_rect)
                for (lat, lon) in future_segment
            )
            label = 1.0 if will_enter else 0.0

            X_list.append(past_window)
            y_list.append(label)

        if max_samples is not None and len(X_list) >= max_samples:
            break

    if not X_list:
        raise RuntimeError(
            "No training samples built. "
            "Check that trajectories are long enough and AOI overlaps trajectories."
        )

    X = np.stack(X_list).astype("float32")  # (num_samples, seq_len, 2)
    y = np.array(y_list, dtype="float32")  # (num_samples,)

    print(f"Built windows: X shape={X.shape}, y shape={y.shape}")
    if max_samples is not None and len(X_list) >= max_samples:
        print(f"Limited to {max_samples} samples due to max_samples setting.")
    return X, y


def make_tf_datasets(
    X: np.ndarray,
    y: np.ndarray,
    validation_split: float = 0.2,
    shuffle_buffer_size: int = None,
) -> Tuple[tf.data.Dataset, tf.data.Dataset]:
    """
    Разбивает массивы X, y на train/val и заворачивает в tf.data.Dataset.
    """
    num_samples = X.shape[0]
    split_idx = int(num_samples * (1.0 - validation_split))

    X_train, X_val = X[:split_idx], X[split_idx:]
    y_train, y_val = y[:split_idx], y[split_idx:]

    if shuffle_buffer_size is None:
        shuffle_buffer_size = min(CFG.shuffle_buffer_size, len(X_train))
    else:
        shuffle_buffer_size = min(shuffle_buffer_size, len(X_train))

    train_ds = (
        tf.data.Dataset.from_tensor_slices((X_train, y_train))
        .shuffle(buffer_size=shuffle_buffer_size)
        .batch(CFG.batch_size)
        .prefetch(tf.data.AUTOTUNE)
    )

    val_ds = (
        tf.data.Dataset.from_tensor_slices((X_val, y_val))
        .batch(CFG.batch_size)
        .prefetch(tf.data.AUTOTUNE)
    )

    return train_ds, val_ds


def build_cnn_gru_model(seq_len: int, n_features: int) -> tf.keras.Model:
    """
    Модель:
      Input (seq_len, n_features)
        → Conv1D (ловит локальные паттерны движения)
        → MaxPool (упрощает последовательность)
        → GRU (запоминает историю)
        → Dense → Sigmoid (вероятность входа в AOI)
    """
    inputs = layers.Input(shape=(seq_len, n_features), name="trajectory_window")

    x = layers.Conv1D(
        filters=32,
        kernel_size=3,
        padding="same",
        activation="relu",
        name="conv1d_1",
    )(inputs)

    x = layers.MaxPooling1D(pool_size=2, name="maxpool_1")(x)

    x = layers.GRU(64, name="gru_1")(x)

    x = layers.Dense(64, activation="relu", name="dense_1")(x)
    x = layers.Dropout(0.3, name="dropout_1")(x)

    outputs = layers.Dense(1, activation="sigmoid", name="output")(x)

    model = models.Model(inputs=inputs, outputs=outputs, name="GeoFence_CNN_GRU")

    model.compile(
        optimizer=optimizers.Adam(learning_rate=CFG.learning_rate),
        loss=losses.BinaryCrossentropy(),
        metrics=[
            metrics.BinaryAccuracy(name="accuracy"),
            metrics.AUC(name="auc"),
        ],
    )

    return model


# ===============================
# 4. Аналитика датасета (AOI + баланс классов)
# ===============================
def analyze_dataset(trajectories: List[np.ndarray], X: np.ndarray, y: np.ndarray) -> None:
    """
    Печатает:
      - сколько траекторий вообще пересекают AOI
      - сколько положительных/отрицательных меток
    """
    lat_min, lon_min, lat_max, lon_max = CFG.aoi_rect

    inside_traj_count = 0
    for traj in trajectories:
        if np.any(
            (traj[:, 0] >= lat_min)
            & (traj[:, 0] <= lat_max)
            & (traj[:, 1] >= lon_min)
            & (traj[:, 1] <= lon_max)
        ):
            inside_traj_count += 1

    total_traj = len(trajectories)
    pos = float(y.sum())
    total = len(y)
    neg = total - pos
    ratio = pos / total if total > 0 else 0.0

    print("\n=== DATASET ANALYSIS ===")
    print(f"Trajectories that EVER touch AOI: {inside_traj_count} / {total_traj}")
    print(f"Labels: positives={pos:.0f}, negatives={neg:.0f}, ratio={ratio:.4f}")
    print("========================\n")


# ===============================
# 5. Тренировка
# ===============================
def train_model(trajectories: List[np.ndarray]) -> tf.keras.Model:
    # 1) Строим окна и метки
    X, y = build_windows_and_labels(
        trajectories,
        seq_len=CFG.seq_len,
        horizon_steps=CFG.horizon_steps,
        aoi_rect=CFG.aoi_rect,
        max_samples=CFG.max_samples,
    )

    # 2) Анализ датасета (важно: баланс классов и покрытие AOI)
    analyze_dataset(trajectories, X, y)

    # 3) Train/val split
    train_ds, val_ds = make_tf_datasets(
        X,
        y,
        validation_split=0.2,
        shuffle_buffer_size=CFG.shuffle_buffer_size,
    )

    # 4) Модель
    model = build_cnn_gru_model(
        seq_len=CFG.seq_len,
        n_features=CFG.n_features,
    )
    model.summary()

    # ======= CALLBACKS =======
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
        "best_geofence_model.keras",
        monitor="val_auc",
        mode="max",
        save_best_only=True,
        verbose=1,
    )

    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.5,
        patience=2,
        min_lr=1e-6,
        verbose=1,
    )

    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True,
        verbose=1,
    )
    # ==========================

    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=CFG.epochs,
        callbacks=[checkpoint_cb, reduce_lr, early_stop],
    )

    print("Training finished.")

    # ===== EXPORT FOR JAVA =====
    export_dir = "exported_geofence_model"
    model.export(export_dir)
    print("SavedModel exported successfully to:", export_dir)

    return model


# ===============================
# 6. Инференс для одного окна
# ===============================
def predict_one_window(model: tf.keras.Model, window: np.ndarray) -> float:
    """
    Предсказание для одного окна траектории.

    window: numpy array формы (seq_len, n_features)
    Возвращает вероятность входа в AOI в ближайшие horizon_steps шагов.
    """
    assert window.shape == (CFG.seq_len, CFG.n_features), (
        f"Expected window shape {(CFG.seq_len, CFG.n_features)}, "
        f"got {window.shape}"
    )

    window_batch = np.expand_dims(window, axis=0)  # (1, seq_len, n_features)
    prob = model.predict(window_batch, verbose=0)[0, 0]
    return float(prob)


# ===============================
# 7. Точка входа
# ===============================
if __name__ == "__main__":
    # 1) Загружаем траектории ОДИН раз
    trajectories = load_geolife_trajectories(CFG.geolife_root)

    # 2) Тренируем модель
    model = train_model(trajectories)

    # 3) Тестовое предсказание на рандомном окне (просто sanity-check)
    example_window = np.random.rand(CFG.seq_len, CFG.n_features).astype("float32")
    prob = predict_one_window(model, example_window)
    print(
        f"Intrusion probability in next {CFG.horizon_steps} steps (random window): {prob:.3f}"
    )
