In [None]:
import pandas as pd
from dateutil import parser
result_df = pd.read_csv("../../data/clean_data.100k.csv")

## 4. –ú–æ–¥–µ–ª—å GNN (GraphSAGE).

–ú–æ–¥–µ–ª—å GraphSAGE (PyG) —Å –æ–±—É—á–µ–Ω–∏–µ–º –∏ –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–æ–º.
–í–∫–ª—é—á—ë–Ω –∏ node-level head (–ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç churn_rate) –∏ edge-level head (–ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç transition_count / flow).
–ö–æ–¥ –ø—Ä–µ–¥–ø–æ–ª–∞–≥–∞–µ—Ç, —á—Ç–æ —É –Ω–∞—Å –≤ –Ω–∞–ª–∏—á–∏–∏:

- `node_df` ‚Äî DataFrame –∞–≥—Ä–µ–≥–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö –Ω–æ–¥ (–∏–Ω–¥–µ–∫—Å = node_id), –≤ –∫–æ—Ç–æ—Ä–æ–º –µ—Å—Ç—å –∫–æ–ª–æ–Ω–∫–∞ text_embedding (np.array) –∏ –∫–æ–ª–æ–Ω–∫–∞-—Ç–∞—Ä–≥–µ—Ç churn_rate.

- `edge_df` ‚Äî DataFrame –∞–≥—Ä–µ–≥–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö —Ä–µ–±–µ—Ä, –∏–Ω–¥–µ–∫—Å = (src_node, dst_node), —Å–æ–¥–µ—Ä–∂–∏—Ç —á–∏—Å–ª–æ–≤—ã–µ edge-—Ñ–∏—á–∏ –∏ –∫–æ–ª–æ–Ω–∫—É transition_count.


In [None]:
# gnn_training.py
# GraphSAGE for node-level (churn_rate) and edge-level (transition_count) regression
# Requires: torch, torch_geometric, scikit-learn, numpy, pandas

import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch import nn
from torch_geometric.data import Data
from torch_geometric.nn import SAGEConv, global_mean_pool
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# -----------------------------
# Helper: build PyG Data (if not already built)
# -----------------------------
def build_pyg_data_from_dfs(node_df: pd.DataFrame, edge_df: pd.DataFrame,
                            node_target_col='churn_rate', edge_target_col='transition_count'):
    """
    node_df.index -> node_id
    node_df contains text_embedding column (np.array) and numeric/categorical columns
    edge_df.index -> (src_node, dst_node)
    """
    # Map node_id -> idx
    node_ids = list(node_df.index)
    node_id_map = {nid: i for i, nid in enumerate(node_ids)}

    # Prepare node numeric features (exclude target and keep embeddings)
    # Extract embedding
    if 'text_embedding' in node_df.columns:
        emb = np.vstack(node_df['text_embedding'].values).astype(float)
        node_df_wo_emb = node_df.drop(columns=['text_embedding'])
    else:
        emb = np.zeros((len(node_df), 0))
        node_df_wo_emb = node_df.copy()

    # Identify categorical/object columns and encode to codes
    node_df_enc = node_df_wo_emb.copy()
    cat_cols = []
    for c in node_df_enc.columns:
        if node_df_enc[c].dtype == object or str(node_df_enc[c].dtype).startswith('category'):
            cat_cols.append(c)
            node_df_enc[c] = node_df_enc[c].astype('category').cat.codes

    # Numeric columns (exclude target)
    num_cols = [c for c in node_df_enc.columns if c != node_target_col]
    X_num = node_df_enc[num_cols].astype(float).fillna(0).values

    # Scale numeric
    scaler = StandardScaler()
    if X_num.size > 0:
        X_num = scaler.fit_transform(X_num)
    X = np.concatenate([X_num, emb], axis=1) if emb.size else X_num

    x = torch.tensor(X, dtype=torch.float)

    # node target y
    y_node = torch.tensor(node_df[node_target_col].astype(float).values, dtype=torch.float).unsqueeze(1)

    # Edge index and attributes
    # edge_df index should be MultiIndex (src, dst) OR columns src_node/dst_node
    if isinstance(edge_df.index, pd.MultiIndex) and edge_df.index.nlevels == 2:
        srcs = [node_id_map[s] for s, d in edge_df.index]
        dsts = [node_id_map[d] for s, d in edge_df.index]
    else:
        # try columns
        if {'src_node','dst_node'}.issubset(edge_df.columns):
            srcs = [node_id_map[s] for s in edge_df['src_node'].values]
            dsts = [node_id_map[d] for d in edge_df['dst_node'].values]
        else:
            raise ValueError("edge_df index must be MultiIndex (src,dst) or contain src_node/dst_node columns")

    edge_index = torch.tensor([srcs, dsts], dtype=torch.long)

    # Edge attributes (drop src/dst columns if present and target if present)
    edge_df_local = edge_df.copy()
    for c in ['src_node','dst_node', edge_target_col]:
        if c in edge_df_local.columns:
            edge_df_local = edge_df_local.drop(columns=[c])
    # encode categorical edge cols
    for c in edge_df_local.columns:
        if edge_df_local[c].dtype == object or str(edge_df_local[c].dtype).startswith('category'):
            edge_df_local[c] = edge_df_local[c].astype('category').cat.codes
    edge_attr = torch.tensor(edge_df_local.fillna(0).astype(float).values, dtype=torch.float)

    # Edge target (for training edge head)
    if edge_target_col in edge_df.columns:
        edge_y = torch.tensor(edge_df[edge_target_col].astype(float).values, dtype=torch.float).unsqueeze(1)
    else:
        edge_y = None

    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y_node)
    data.edge_y = edge_y  # custom attribute for edge targets
    data.node_id_map = node_id_map

    return data, scaler, num_cols

# -----------------------------
# Model: GraphSAGE encoder + node regression head + edge regression head
# -----------------------------
class GraphSAGENet(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels=64, num_layers=2, edge_dim=0):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        for _ in range(num_layers-1):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))

        # Node regression head
        self.node_mlp = nn.Sequential(
            nn.Linear(hidden_channels, hidden_channels//2),
            nn.ReLU(),
            nn.Linear(hidden_channels//2, 1)  # churn_rate scalar
        )

        # Edge regression head: we'll use concatenation of src_emb || dst_emb || edge_attr
        self.edge_dim = edge_dim
        edge_input_dim = hidden_channels * 2 + edge_dim
        self.edge_mlp = nn.Sequential(
            nn.Linear(edge_input_dim, hidden_channels),
            nn.ReLU(),
            nn.Linear(hidden_channels, 1)  # transition_count (or normalized)
        )

    def forward(self, x, edge_index, edge_attr=None):
        # x: [N, in_channels], edge_index: [2, E], edge_attr: [E, edge_dim]
        h = x
        for conv in self.convs:
            h = conv(h, edge_index)
            h = F.relu(h)

        # node preds
        node_pred = self.node_mlp(h)  # [N, 1]

        # edge preds
        if edge_attr is not None:
            src_idx = edge_index[0]
            dst_idx = edge_index[1]
            src_h = h[src_idx]
            dst_h = h[dst_idx]
            edge_input = torch.cat([src_h, dst_h, edge_attr], dim=1)
            edge_pred = self.edge_mlp(edge_input)  # [E, 1]
        else:
            edge_pred = None

        return node_pred, edge_pred, h  # also return node embeddings

# -----------------------------
# Training loop
# -----------------------------
def train_model(data: Data,
                lr=1e-3,
                epochs=200,
                val_ratio=0.1,
                test_ratio=0.1,
                hidden=64,
                device=torch.device('cpu')):
    """
    Train GraphSAGENet on Data.
    data.edge_y can be None (skip edge head training).
    Returns trained model and scalers/maps for later inference.
    """
    # Move data to device
    data = data.clone()
    data = data.to(device)
    N = data.num_nodes
    E = data.edge_index.shape[1]

    # Train/val/test split on nodes (node-level supervised)
    idx = np.arange(N)
    idx_train, idx_tmp = train_test_split(idx, test_size=(val_ratio+test_ratio), random_state=42)
    relative_val = val_ratio / (val_ratio + test_ratio)
    idx_val, idx_test = train_test_split(idx_tmp, test_size=(1-relative_val), random_state=42)

    train_mask = torch.zeros(N, dtype=torch.bool, device=device)
    val_mask = torch.zeros(N, dtype=torch.bool, device=device)
    test_mask = torch.zeros(N, dtype=torch.bool, device=device)
    train_mask[idx_train] = True
    val_mask[idx_val] = True
    test_mask[idx_test] = True

    # If edge target exists, train on all edges (no edge split here for simplicity)
    edge_has_target = getattr(data, 'edge_y', None) is not None
    if edge_has_target:
        edge_y = data.edge_y.to(device)
    else:
        edge_y = None

    model = GraphSAGENet(in_channels=data.x.size(1),
                         hidden_channels=hidden,
                         num_layers=2,
                         edge_dim=(data.edge_attr.size(1) if data.edge_attr is not None else 0)).to(device)

    opt = torch.optim.Adam(model.parameters(), lr=lr)
    best = {'val_loss': float('inf'), 'model_state': None}

    for epoch in range(1, epochs+1):
        model.train()
        opt.zero_grad()
        node_pred, edge_pred, _ = model(data.x, data.edge_index, data.edge_attr)
        # compute node loss only on train_mask
        loss_node = F.mse_loss(node_pred[train_mask], data.y[train_mask])

        if edge_has_target and edge_pred is not None:
            loss_edge = F.mse_loss(edge_pred, edge_y)  # all edges
            loss = loss_node + loss_edge
        else:
            loss = loss_node

        loss.backward()
        opt.step()

        # validation
        model.eval()
        with torch.no_grad():
            node_pred_val, edge_pred_val, _ = model(data.x, data.edge_index, data.edge_attr)
            val_loss_node = F.mse_loss(node_pred_val[val_mask], data.y[val_mask]).item()
            if edge_has_target and edge_pred_val is not None:
                val_loss_edge = F.mse_loss(edge_pred_val, edge_y).item()
                val_loss = val_loss_node + val_loss_edge
            else:
                val_loss = val_loss_node

        if val_loss < best['val_loss']:
            best['val_loss'] = val_loss
            best['model_state'] = model.state_dict()

        if epoch % 50 == 0 or epoch == 1:
            print(f"Epoch {epoch:03d} train_loss={loss.item():.6f} val_loss={val_loss:.6f}")

    # load best
    model.load_state_dict(best['model_state'])
    print("Training finished. Best val loss:", best['val_loss'])
    return model, (train_mask, val_mask, test_mask)

# -----------------------------
# Inference: add new node & new edges, get predictions
# -----------------------------
def append_node_and_edges_and_predict(model: GraphSAGENet, data: Data,
                                      new_node_features: np.ndarray,
                                      new_edges: list,
                                      device_map: dict = None,
                                      scaler=None):
    """
    new_node_features: 1D numpy array matching data.x columns
    new_edges: list of tuples (src_node_id, dst_node_id, edge_attr_array)
      - src/dst are node indices or node_ids recognized by device_map: if device_map provided,
        keys are node_id strings and values are indices in data.x
      - for edges involving the new node, you can use 'NEW' as src or dst to indicate the new node.
    Returns:
      node_pred_for_new_node (float), edge_preds_for_new_edges (list)
    Note: This function constructs a new Data object with appended node and edges for inference.
    """
    device = next(model.parameters()).device

    # current counts
    N = data.x.size(0)
    E = data.edge_index.size(1)

    # map node identifiers
    def resolve(idx_or_id):
        if isinstance(idx_or_id, int):
            return idx_or_id
        elif device_map is not None and idx_or_id in device_map:
            return device_map[idx_or_id]
        else:
            raise ValueError("Unknown node identifier and no device_map provided")

    # Build new x
    x_new = torch.cat([data.x.cpu(), torch.tensor(new_node_features, dtype=torch.float).unsqueeze(0)], dim=0)
    # Build new edge_index and edge_attr
    edge_idx_list = [data.edge_index.cpu().numpy()[0].tolist(), data.edge_index.cpu().numpy()[1].tolist()]
    edge_attr_list = data.edge_attr.cpu().numpy().tolist() if data.edge_attr is not None else []

    new_edge_attr_tensors = []
    new_edges_pairs = []
    for src, dst, edge_attr in new_edges:
        # allow 'NEW' to denote new node
        if src == 'NEW':
            src_idx = N
        else:
            src_idx = resolve(src)
        if dst == 'NEW':
            dst_idx = N
        else:
            dst_idx = resolve(dst)
        edge_idx_list[0].append(src_idx)
        edge_idx_list[1].append(dst_idx)
        edge_attr_list.append(np.array(edge_attr).astype(float))
        new_edges_pairs.append((src_idx, dst_idx))

    edge_index_new = torch.tensor(edge_idx_list, dtype=torch.long)
    edge_attr_new = torch.tensor(np.vstack(edge_attr_list), dtype=torch.float)

    data_new = Data(x=x_new, edge_index=edge_index_new, edge_attr=edge_attr_new)
    data_new = data_new.to(device)

    model.eval()
    with torch.no_grad():
        node_pred, edge_pred, node_emb = model(data_new.x, data_new.edge_index, data_new.edge_attr)

    # new node prediction is at index N
    new_node_pred = node_pred[N].cpu().item()

    # predictions for newly added edges: find their positions (they are at the tail of edges)
    edge_preds = []
    total_edges = edge_pred.size(0)
    num_added = len(new_edges)
    start_idx = total_edges - num_added
    for i in range(start_idx, total_edges):
        edge_preds.append(edge_pred[i].cpu().item())

    return new_node_pred, edge_preds

# -----------------------------
# Example usage (put in main cell)
# -----------------------------
if __name__ == "__main__":
    # –ü—Ä–µ–¥–ø–æ–ª–∞–≥–∞–µ—Ç—Å—è, —á—Ç–æ node_df –∏ edge_df —É–∂–µ –ø–æ–¥–≥–æ—Ç–æ–≤–ª–µ–Ω—ã
    # node_df.index = node_id
    # edge_df.index = MultiIndex (src_node, dst_node)
    # –ò–º–µ—é—Ç—Å—è –∫–æ–ª–æ–Ω–∫–∏ node_df['text_embedding'], node_df['churn_rate']
    # –ò–º–µ–µ—Ç—Å—è –∫–æ–ª–æ–Ω–∫–∞ edge_df['transition_count']

    # --------------------------
    # 1) Build data
    # --------------------------
    # Example:
    # node_df = pd.read_parquet("node_features.parquet")
    # edge_df = pd.read_parquet("edge_features.parquet")

    # Uncomment and load your data here:
    # node_df = ...
    # edge_df = ...

    # For safety, here's a quick guard:
    try:
        node_df  # noqa
        edge_df  # noqa
    except NameError:
        raise RuntimeError("You must provide node_df and edge_df variables before running this script.")

    data, scaler, numeric_cols = build_pyg_data_from_dfs(node_df, edge_df,
                                                         node_target_col='churn_rate',
                                                         edge_target_col='transition_count')

    # --------------------------
    # 2) Train
    # --------------------------
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model, masks = train_model(data, lr=1e-3, epochs=300, val_ratio=0.1, test_ratio=0.1, hidden=64, device=device)

    # --------------------------
    # 3) Inference example: add a new node and edge(s)
    # --------------------------
    # new_node_features must be same length as data.x.shape[1]
    # Strategy to build new_node_features:
    # - If you have embedding and numeric features prepared, concatenate them
    # - If not, use mean of neighbor nodes features as initialization (quick hack)
    #
    # Quick hack: use mean of existing X rows
    new_node_features = data.x.cpu().mean(dim=0).numpy()  # naive init; replace with real features if available

    # Build edge_attr sample: must match existing edge_attr columns count
    E_attr_dim = data.edge_attr.size(1) if data.edge_attr is not None else 0
    sample_edge_attr = np.zeros(E_attr_dim, dtype=float)
    # Suppose we connect new node from an existing node with index 0
    new_edges = [
        (0, 'NEW', sample_edge_attr),    # edge from node index 0 -> NEW
        ('NEW', 1, sample_edge_attr)     # edge NEW -> node index 1
    ]

    # For device_map usage (if you want to refer to nodes by node_id string),
    # pass data.node_id_map as device_map argument.
    new_node_pred, new_edge_preds = append_node_and_edges_and_predict(model, data,
                                                                      new_node_features=new_node_features,
                                                                      new_edges=new_edges,
                                                                      device_map=None,
                                                                      scaler=scaler)

    print("Predicted churn_rate for new node:", new_node_pred)
    print("Predicted edge flows for new edges:", new_edge_preds)

    # --------------------------
    # 4) Evaluate on test set (optional)
    # --------------------------
    train_mask, val_mask, test_mask = masks
    device = next(model.parameters()).device
    data = data.to(device)
    model.eval()
    with torch.no_grad():
        node_pred_all, edge_pred_all, _ = model(data.x, data.edge_index, data.edge_attr)
        test_mse = F.mse_loss(node_pred_all[test_mask], data.y[test_mask]).item()
    print("Test MSE (node churn_rate):", test_mse)


Epoch 001 train_loss=324698.500000 val_loss=324241.193476
Epoch 050 train_loss=276987.687500 val_loss=275422.702426
Epoch 100 train_loss=200794.500000 val_loss=199357.622091
Epoch 150 train_loss=145250.796875 val_loss=144521.742381
Epoch 200 train_loss=111651.218750 val_loss=110930.779796
Epoch 250 train_loss=75849.648438 val_loss=75142.252204
Epoch 300 train_loss=46587.550781 val_loss=46190.166986
Training finished. Best val loss: 46190.1669860743
Predicted churn_rate for new node: -0.002679973840713501
Predicted edge flows for new edges: [-11.576274871826172, -3.8096964359283447]
Test MSE (node churn_rate): 0.0785408467054367


## ‚úÖ 7. –û–±—É—á–µ–Ω–∏–µ GraphSAGE –Ω–∞ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ churn_rate

–ó–¥–µ—Å—å –º—ã –æ–±—É—á–∞–µ–º —Ä–µ–≥—Ä–µ—Å—Å–∏—é (–ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ churn_rate), –∏—Å–ø–æ–ª—å–∑—É–µ–º:

- train/val/test split
- HuberLoss (–±–æ–ª–µ–µ —É—Å—Ç–æ–π—á–∏–≤–∞ –∫ –≤—ã–±—Ä–æ—Å–∞–º)
- Adam
- —ç–ø–æ—Ö–∏, –≤—ã–≤–æ–¥ MSE/MAE

–†–∞–±–æ—Ç–∞–µ—Ç —Å –æ–±—ä–µ–∫—Ç–æ–º graph, –∫–æ—Ç–æ—Ä—ã–π –±—ã–ª –ø–æ—Å—Ç—Ä–æ–µ–Ω –Ω–∞ —à–∞–≥–µ 1 –∏ 2.

In [None]:
# train_gnn_regression.py
import torch
import torch.nn as nn
from torch_geometric.loader import NeighborLoader
from sklearn.model_selection import train_test_split


def train_val_test_split(num_nodes, test_size=0.15, val_size=0.15, seed=42):
    """
    –†–∞–∑–±–∏–≤–∞–µ–º –∏–Ω–¥–µ–∫—Å—ã –Ω–æ–¥ –Ω–∞ train/val/test.
    """
    all_idx = list(range(num_nodes))

    train_idx, test_idx = train_test_split(all_idx, test_size=test_size, random_state=seed)
    train_idx, val_idx = train_test_split(train_idx, test_size=val_size, random_state=seed)

    return (
        torch.tensor(train_idx, dtype=torch.long),
        torch.tensor(val_idx, dtype=torch.long),
        torch.tensor(test_idx, dtype=torch.long)
    )


def train_graphsage_regression_fullbatch(
    graph,
    model,
    epochs=50,
    lr=0.001,
    device="cpu"
):
    graph = graph.to(device)
    model = model.to(device)

    num_nodes = graph.num_nodes
    train_idx, val_idx, test_idx = train_val_test_split(num_nodes)

    loss_fn = nn.HuberLoss()
    opt = torch.optim.Adam(model.parameters(), lr=lr)

    x = graph.x
    edge_index = graph.edge_index
    y = graph.y

    for epoch in range(1, epochs + 1):
        # ::::::::::::::: TRAIN :::::::::::::::
        model.train()
        # preds = model(x, edge_index).squeeze()
        preds = model(x, edge_index)
        # print('l[0]', l[0])
        # print(type(l[0]))
        preds = preds[0].squeeze()

        loss = loss_fn(preds[train_idx], y[train_idx])

        opt.zero_grad()
        loss.backward()
        opt.step()

        # ::::::::::::::: VAL :::::::::::::::
        model.eval()
        with torch.no_grad():
            val_pred = model(x, edge_index)[0].squeeze()

            val_loss = loss_fn(val_pred[val_idx], y[val_idx]).item()
            val_mae = (val_pred[val_idx] - y[val_idx]).abs().mean().item()

        print(f"Epoch {epoch:03d} | Train={loss.item():.4f} | Val={val_loss:.4f} | MAE={val_mae:.4f}")

    # ::::::::::::::: TEST :::::::::::::::
    model.eval()
    with torch.no_grad():
        test_pred = model(x, edge_index)[0].squeeze()

        test_loss = loss_fn(test_pred[test_idx], y[test_idx]).item()
        test_mae = (test_pred[test_idx] - y[test_idx]).abs().mean().item()

    print("\n===== FINAL TEST =====")
    print(f"Test Loss = {test_loss:.4f}")
    print(f"Test MAE  = {test_mae:.4f}")

    return model


### üìò –ö–∞–∫ –∑–∞–ø—É—Å–∫–∞—Ç—å –æ–±—É—á–µ–Ω–∏–µ

In [None]:
# from gnn_models import GraphSAGENet
# from train_gnn_regression import train_graphsage_regression

# graph = Data(...) ‚Äî —Ç–æ, —á—Ç–æ –º—ã —Å–æ–±—Ä–∞–ª–∏ —Ä–∞–Ω–µ–µ

model = GraphSAGENet(
    in_channels=graph.x.size(1),
    hidden_channels=128,
    # out_channels=1,
    num_layers=2,
    edge_dim=graph.edge_attr.size(1)
)

# trained_model = train_graphsage_regression(
#     graph=graph,
#     model=model,
#     epochs=40,
#     batch_size=64,
#     lr=0.001,
#     device='cpu'  # –µ—Å–ª–∏ GPU –Ω–µ—Ç ‚Üí 'cpu'
# )

trained_model = train_graphsage_regression_fullbatch(
    graph=graph,
    model=model,
    epochs=40,
    lr=0.001,
    device="cpu"
)


Epoch 001 | Train=0.0277 | Val=0.0065 | MAE=0.0668
Epoch 002 | Train=0.0160 | Val=0.0095 | MAE=0.1178
Epoch 003 | Train=0.0126 | Val=0.0151 | MAE=0.1576
Epoch 004 | Train=0.0138 | Val=0.0161 | MAE=0.1636
Epoch 005 | Train=0.0136 | Val=0.0135 | MAE=0.1497
Epoch 006 | Train=0.0117 | Val=0.0100 | MAE=0.1277
Epoch 007 | Train=0.0098 | Val=0.0073 | MAE=0.1064
Epoch 008 | Train=0.0086 | Val=0.0057 | MAE=0.0903
Epoch 009 | Train=0.0083 | Val=0.0049 | MAE=0.0793
Epoch 010 | Train=0.0082 | Val=0.0045 | MAE=0.0745
Epoch 011 | Train=0.0079 | Val=0.0044 | MAE=0.0747
Epoch 012 | Train=0.0072 | Val=0.0045 | MAE=0.0790
Epoch 013 | Train=0.0063 | Val=0.0050 | MAE=0.0857
Epoch 014 | Train=0.0055 | Val=0.0058 | MAE=0.0930
Epoch 015 | Train=0.0051 | Val=0.0067 | MAE=0.0980
Epoch 016 | Train=0.0049 | Val=0.0071 | MAE=0.0986
Epoch 017 | Train=0.0048 | Val=0.0068 | MAE=0.0936
Epoch 018 | Train=0.0044 | Val=0.0058 | MAE=0.0843
Epoch 019 | Train=0.0038 | Val=0.0046 | MAE=0.0735
Epoch 020 | Train=0.0033 | Val=