**Loading and Pre-Processing Bulgarian and USA (benchmark) Death and Exposure Rates**

In [1]:
import pandas as pd
import re
from io import StringIO
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

def read_tabular_file(filepath):
    """
    Reads a text file with metadata lines and a whitespace-delimited table.
    Returns a DataFrame with columns: Year, Age, Female, Male, Total.
    """
    with open(filepath, 'r') as f:
        lines = f.readlines()

    header_idx = next(i for i, line in enumerate(lines) if line.strip().startswith('Year'))

    table_str = ''.join(lines[header_idx:]).strip()

    df = pd.read_csv(
        StringIO(table_str),
        sep=r'\s+',
        engine='python'
    )
    return df

bulgaria_exposures_df = read_tabular_file('/content/Bulgaria_Exposures_1x1.txt')
bulgaria_deaths_df = read_tabular_file('/content/Bulgaria_Deaths_1x1.txt')

usa_exposure_df = read_tabular_file('/content/USA_Exposures_1x1.txt')
usa_deaths_df = read_tabular_file('/content/USA_Deaths_1x1.txt')


In [2]:
usa_deaths_df

Unnamed: 0,Year,Age,Female,Male,Total
0,1933,0,52615.77,68438.11,121053.88
1,1933,1,8917.13,10329.16,19246.29
2,1933,2,4336.92,5140.05,9476.97
3,1933,3,3161.59,3759.88,6921.47
4,1933,4,2493.84,2932.59,5426.43
...,...,...,...,...,...
10096,2023,106,569.01,89.00,658.01
10097,2023,107,282.01,39.01,321.02
10098,2023,108,153.00,17.00,170.00
10099,2023,109,90.00,11.00,101.00


In [3]:
bulgaria_deaths_df

Unnamed: 0,Year,Age,Female,Male,Total
0,1947,0,10197.46,12378.73,22576.19
1,1947,1,2152.10,2298.13,4450.23
2,1947,2,871.04,856.05,1727.09
3,1947,3,423.02,386.02,809.04
4,1947,4,306.02,300.02,606.04
...,...,...,...,...,...
8320,2021,106,1.91,0.39,2.30
8321,2021,107,0.68,0.00,0.68
8322,2021,108,0.00,0.00,0.00
8323,2021,109,0.00,0.00,0.00


In [4]:
def preprocess_df(exposures_df, deaths_df):
    for df in (exposures_df, deaths_df):
        df['Age'] = df['Age'].astype(str).replace({'110+': '110'}).astype(int)
    exp = exposures_df.pivot(index='Year', columns='Age', values='Total')
    dth = deaths_df   .pivot(index='Year', columns='Age', values='Total')
    if 110 in exp.columns:
        exp = exp.drop(columns=110)
        dth = dth.drop(columns=110)
    ages = exp.columns.intersection(dth.columns).sort_values()
    exp, dth = exp[ages], dth[ages]
    return dth / exp

def life_table(mx):
    ε = 1e-8
    mx = np.maximum(mx, ε)
    q = mx / (1 + 0.5*mx); q[-1] = 1.0
    p = 1 - q
    n = len(mx)
    l = np.empty(n+1); l[0] = 1.0
    for i in range(n):
        l[i+1] = l[i]*p[i]
    L = np.empty(n)
    for i in range(n-1):
        L[i] = 0.5*(l[i] + l[i+1])
    L[-1] = l[-1]/mx[-1]
    T = np.empty(n)
    T[-1] = L[-1]
    for i in range(n-2, -1, -1):
        T[i] = L[i] + T[i+1]
    e0 = T[0]
    d = l[:-1] - l[1:]
    e_x = T / l[:-1]
    disp = np.sum(d * e_x)
    return e0, disp

def compute_gap_series(m_bench, m_tgt):
    yrs = sorted(m_bench.index.intersection(m_tgt.index))
    gaps = []
    for y in yrs:
        e0_b, _ = life_table(m_bench.loc[y].values)
        e0_t, _ = life_table(m_tgt.loc[y].values)
        gaps.append(e0_b - e0_t)
    return np.array(yrs), np.array(gaps)

def compute_weights(g):
    gT = g[-1]
    ratio = np.clip((gT - g) / gT, 1e-6, 1.0)
    inv   = 1.0 / ratio
    return 0.5 * (1 + np.sin((np.pi/2) * inv))


class GapDataset(Dataset):
    def __init__(self, g, seq_len):
        self.X, self.y = [], []
        g_t = torch.tensor(g, dtype=torch.float32).unsqueeze(-1)
        ω   = torch.tensor(compute_weights(g), dtype=torch.float32)
        for i in range(seq_len, len(g)):
            self.X.append(g_t[i-seq_len:i])
            self.y.append(ω[i])
        self.X = torch.stack(self.X)
        self.y = torch.stack(self.y)

    def __len__(self): return len(self.y)
    def __getitem__(self, i): return self.X[i], self.y[i]


class WeightNet(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc   = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        h_last = out[:, -1, :]
        logit  = self.fc(h_last)
        ω      = torch.sigmoid(logit)
        return ω.squeeze(-1)

In [6]:
m_usa = preprocess_df(usa_exposure_df,     usa_deaths_df)
m_bul = preprocess_df(bulgaria_exposures_df, bulgaria_deaths_df)

years, gap_series = compute_gap_series(m_usa, m_bul)
gap_pd = pd.Series(gap_series, index=years)
gap_pd = gap_pd.interpolate().ffill().bfill()
years      = gap_pd.index.values
gap_series = gap_pd.values

gap_mean, gap_std = gap_series.mean(), gap_series.std()
gaps_norm = (gap_series - gap_mean) / gap_std

seq_len, batch_size = 10, 16
ds = GapDataset(gaps_norm, seq_len)
dl = DataLoader(ds, batch_size=batch_size, shuffle=True)

model   = WeightNet(input_size=1, hidden_size=32, num_layers=2)
opt     = optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()


metrics = []
epochs = 50
for epoch in range(1, epochs+1):
    epoch_loss = 0.0
    for X, y in dl:
        opt.zero_grad()
        y_pred = model(X)
        loss   = loss_fn(y_pred, y)
        loss.backward()
        opt.step()
        epoch_loss += loss.item() * X.size(0)
    mse  = epoch_loss / len(ds)
    rmse = np.sqrt(mse)
    metrics.append({'epoch': epoch, 'mse': mse, 'rmse': rmse})
    print(f"Epoch {epoch:2d} — MSE: {mse:.6f}, RMSE: {rmse:.6f}")


metrics_df_lc = pd.DataFrame(metrics)
print("\nTraining metrics:\n", metrics_df_lc)

Epoch  1 — MSE: 0.166508, RMSE: 0.408054
Epoch  2 — MSE: 0.157221, RMSE: 0.396512
Epoch  3 — MSE: 0.148870, RMSE: 0.385837
Epoch  4 — MSE: 0.140388, RMSE: 0.374684
Epoch  5 — MSE: 0.129576, RMSE: 0.359966
Epoch  6 — MSE: 0.115954, RMSE: 0.340520
Epoch  7 — MSE: 0.099012, RMSE: 0.314662
Epoch  8 — MSE: 0.086957, RMSE: 0.294885
Epoch  9 — MSE: 0.082715, RMSE: 0.287602
Epoch 10 — MSE: 0.079952, RMSE: 0.282759
Epoch 11 — MSE: 0.075994, RMSE: 0.275670
Epoch 12 — MSE: 0.072884, RMSE: 0.269970
Epoch 13 — MSE: 0.069578, RMSE: 0.263777
Epoch 14 — MSE: 0.066881, RMSE: 0.258614
Epoch 15 — MSE: 0.063897, RMSE: 0.252778
Epoch 16 — MSE: 0.062608, RMSE: 0.250215
Epoch 17 — MSE: 0.062023, RMSE: 0.249044
Epoch 18 — MSE: 0.060936, RMSE: 0.246851
Epoch 19 — MSE: 0.059323, RMSE: 0.243563
Epoch 20 — MSE: 0.058245, RMSE: 0.241340
Epoch 21 — MSE: 0.057629, RMSE: 0.240059
Epoch 22 — MSE: 0.057154, RMSE: 0.239069
Epoch 23 — MSE: 0.056535, RMSE: 0.237772
Epoch 24 — MSE: 0.055529, RMSE: 0.235646
Epoch 25 — MSE: 