# Aging Aware with Model Variation
Previously, we optimize the following functional:
$$ \min_\theta \, \int_{t=0}^{1} \; L(\theta(t)) \, {\rm d}t. $$
The optimum is for a specific aging model $\omega$, i.e., we minimized the 
$$ \min_\theta \, \int_{t=0}^{1} \; L(\theta(t,\omega)) \, {\rm d}t. $$
However, we dont know how will the resistance decay, i.e., we should also minimize the loss function w.r.t. aging models with different parameters. That means we need to optimized
$$ \min_\theta \, \int_{\omega}\int_{t=0}^{1} \; L(\theta(t,\omega)) \, {\rm d}t\, p(\omega){\rm d}\omega. $$
The Mento Carlo Approximation is then
$$
\min_{\theta_{\rm init}} \frac{1}{\Omega}\frac{1}{K}\sum_{\omega\in\mathfrak{M}}\sum_{k\in \mathfrak{K} } L \left(\theta[k, \omega]\right),
$$
where $\mathfrak{M}$ is the set of $\Omega$ elements following the distribution $p(\omega)$. $p(\omega)$ is the distributions of parameters of the aging model. We have already obtained these distributions as we modeled the aging decay.

That means we should optimize this problem by
$$
\begin{align}
\theta_{\rm init}&:=\theta_{\rm init} - \alpha\cdot\nabla_{\theta_{\rm init}}\left(\frac{1}{\Omega}\frac{1}{K}\sum_{\omega\in\mathfrak{M}}\sum_{k\in \mathfrak{K} } L \left(\theta[k, \omega]\right)\right)\\
&=\theta_{\rm init} - \frac{\alpha}{\Omega K}\cdot\nabla_{\theta_{\rm init}}\left(\sum_{\omega\in\mathfrak{M}}\sum_{k\in \mathfrak{K} } L \left(\theta[k, \omega]\right)\right)\\
&=\theta_{\rm init} - \frac{\alpha}{\Omega K}\left(\sum_{\omega\in\mathfrak{M}}\sum_{k\in \mathfrak{K} }\nabla_{\theta_{\rm init}} L \left(\theta[k, \omega]\right)\right)
\end{align}
$$

# Get aging model

In [1]:
import importlib
from torch.autograd import Variable
import torch
import pickle
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
sys.path.append(os.path.join(os.getcwd(), 'Aging_Model'))

with open(os.path.join(os.getcwd(), 'Aging_Model', 'exp_aging_model.p'), 'rb') as f:
    age_generator = pickle.load(f)

# Prepare data

## Load data

In [2]:
datapath = os.path.join(os.getcwd(), 'Datasets', 'PMLC',
                        'data_processed', 'Dataset_Pendigits.p')
with open(datapath, 'rb') as f:
    dataset = pickle.load(f)
X = dataset['X'].float()
y = dataset['y']
M, N_features, N_class = X.shape[0], X.shape[1], torch.max(
    torch.unique(y)).item()+1
X.shape, y.shape, M, N_features, N_class

(torch.Size([10992, 16]), torch.Size([10992]), 10992, 16, 10)

## data preprocessing

In [3]:
# normalization
X = X / (torch.max(X, axis=0)[0] - torch.min(X, axis=0)[0])
X = X - torch.min(X, axis=0)[0]
torch.min(X), torch.max(X)

(tensor(0.), tensor(1.))

In [4]:
X[:10, :9]

tensor([[0.4700, 1.0000, 0.2700, 0.8100, 0.5700, 0.3700, 0.2600, 0.0000, 0.0000],
        [0.0000, 0.8900, 0.2700, 1.0000, 0.4200, 0.7500, 0.2900, 0.4500, 0.1500],
        [0.0000, 0.5700, 0.3100, 0.6800, 0.7200, 0.9000, 1.0000, 1.0000, 0.7600],
        [0.0000, 1.0000, 0.0700, 0.9200, 0.0500, 0.6800, 0.1900, 0.4500, 0.8600],
        [0.0000, 0.6700, 0.4900, 0.8300, 1.0000, 1.0000, 0.8100, 0.8000, 0.6000],
        [1.0000, 1.0000, 0.8800, 0.9900, 0.4900, 0.7400, 0.1700, 0.4700, 0.0000],
        [0.0000, 1.0000, 0.0300, 0.7200, 0.2600, 0.3500, 0.8500, 0.3500, 1.0000],
        [0.0000, 0.3900, 0.0200, 0.6200, 0.1100, 0.0500, 0.6300, 0.0000, 1.0000],
        [0.1300, 0.8900, 0.1200, 0.5000, 0.7200, 0.3800, 0.5600, 0.0000, 0.0400],
        [0.5700, 1.0000, 0.2200, 0.7200, 0.0000, 0.3100, 0.2500, 0.0000, 0.7500]])

In [5]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

# generate tensordataset
dataset = TensorDataset(X, y)

# split
train_rate = 0.6
test_rate = 0.2
M_train = int(M*train_rate)
M_test = int(M*test_rate)
train_data, rest_data = random_split(dataset, [M_train, M-M_train], generator=torch.Generator().manual_seed(19950102))
test_data, valid_data = random_split(rest_data, [M_test, M-M_train-M_test], generator=torch.Generator().manual_seed(19950102))
len(train_data), len(test_data), len(valid_data)

(6595, 2198, 2199)

In [6]:
# batch
train_loader = DataLoader(train_data, batch_size=len(train_data))
test_loader = DataLoader(test_data, batch_size=len(test_data))
valid_loader = DataLoader(valid_data, batch_size=len(valid_data))

# Hyperparameter

In [7]:
N_Hidden = 16
m = 0.3
T = 0.1
K = 5  # number of time sampling
M = 10  # number of model sampling
K_test = 5
M_test = 10

# Aging Aware PNN

In [8]:
import pNN_aging_aware_vectorization as pnnv

AAPNN = torch.nn.Sequential(pnnv.PNNLayer(N_features, N_Hidden, age_generator),
                            pnnv.PNNLayer(N_Hidden, N_class, age_generator))

optimizer_AAPNN = torch.optim.Adam(AAPNN.parameters(), lr=0.01)
AAPNN

Sequential(
  (0): PNNLayer()
  (1): PNNLayer()
)

In [9]:
AAPNN.apply(lambda z: pnnv.MakeModel(z, M))

Sequential(
  (0): PNNLayer()
  (1): PNNLayer()
)

In [10]:
Xv = X.repeat(M,K,1,1)
Xv.shape

torch.Size([10, 5, 10992, 16])

In [11]:
prediction = AAPNN(Xv)
prediction.shape

torch.Size([10, 5, 10992, 10])

In [12]:
importlib.reload(pnnv)
l = pnnv.LossFunction(prediction, y, 0.3, 0.1)

In [13]:
l.backward()

In [14]:
AAPNN[0].theta_.grad

tensor([[-6.9246e-05,  2.9282e-04, -1.5269e-05,  2.5732e-04,  8.3477e-05,
          8.9210e-05,  1.1465e-04, -4.2086e-05,  1.0696e-04, -2.6019e-05,
          2.2283e-05,  1.1463e-04,  1.1987e-04,  1.7370e-04,  2.9913e-04,
          1.3855e-04,  3.4895e-04, -9.1313e-05],
        [-1.0326e-05,  9.5576e-06, -2.9984e-06,  1.3275e-05,  1.4765e-05,
          4.0644e-06,  2.8112e-06, -2.3273e-06, -4.6894e-06, -7.3365e-06,
         -1.0289e-05, -5.4724e-06, -2.0454e-06, -3.1609e-06,  1.8205e-05,
         -1.1077e-06,  1.4632e-05, -1.0832e-05],
        [ 7.2025e-04,  2.2064e-03,  6.0857e-04,  2.0832e-03,  7.6821e-04,
          1.4851e-03,  6.6165e-04,  5.3485e-04,  8.8463e-04,  1.4643e-04,
          1.2872e-03,  1.0438e-04,  1.0830e-03,  0.0000e+00,  4.7367e-04,
         -3.9300e-05,  2.3965e-03, -9.5338e-04],
        [-2.8188e-04,  2.8729e-04,  3.9899e-05,  3.8659e-04,  3.7205e-04,
          4.3388e-04,  4.3354e-04,  2.7659e-04,  2.1281e-04,  1.7176e-04,
         -5.2479e-05,  1.0550e-04,  6.6

# Normal PNN

## Training

In [36]:
import pNN_aging_aware_vectorization as pnnv
importlib.reload(pnnv)

PNN = torch.nn.Sequential(pnnv.PNNLayer(N_features, N_Hidden, age_generator),
                            pnnv.PNNLayer(N_Hidden, N_class, age_generator))

PNN.apply(pnnv.LockTime)
PNN.apply(pnnv.MakeModel)
optimizer_PNN = torch.optim.Adam(PNN.parameters(), lr=0.01)
PNN

Sequential(
  (0): PNNLayer()
  (1): PNNLayer()
)

In [37]:
for epoch in range(1000):
    for x_train, y_train in train_loader:
        optimizer_PNN.zero_grad()

        xv_train = x_train.repeat(1,1,1,1)
        prediction = PNN(xv_train)
        loss = pnnv.LossFunction(prediction, y_train, m, T)

        loss.backward()
        optimizer_PNN.step()
    
    for x_test, y_test in test_loader:
        xv_test = x_test.repeat(1,1,1,1)
        prediction_test = PNN(xv_test)
        loss_test = pnnv.LossFunction(prediction_test, y_test, m, T)
        
        if not epoch % 10:
            print('loss: ',loss_test.data)
    


loss:  tensor(1.3956)
loss:  tensor(1.3936)
loss:  tensor(1.3771)
loss:  tensor(1.3001)
loss:  tensor(0.9969)
loss:  tensor(0.7495)
loss:  tensor(0.6943)
loss:  tensor(0.6715)
loss:  tensor(0.6453)
loss:  tensor(0.6098)
loss:  tensor(0.5690)
loss:  tensor(0.5349)
loss:  tensor(0.5056)
loss:  tensor(0.4750)
loss:  tensor(0.4421)
loss:  tensor(0.4081)
loss:  tensor(0.3780)
loss:  tensor(0.3530)
loss:  tensor(0.3330)
loss:  tensor(0.3157)
loss:  tensor(0.3004)
loss:  tensor(0.2868)
loss:  tensor(0.2748)
loss:  tensor(0.2637)
loss:  tensor(0.2531)
loss:  tensor(0.2434)
loss:  tensor(0.2350)
loss:  tensor(0.2277)
loss:  tensor(0.2213)
loss:  tensor(0.2156)
loss:  tensor(0.2105)
loss:  tensor(0.2058)
loss:  tensor(0.2016)
loss:  tensor(0.1979)
loss:  tensor(0.1947)
loss:  tensor(0.1917)
loss:  tensor(0.1889)
loss:  tensor(0.1863)
loss:  tensor(0.1840)
loss:  tensor(0.1816)
loss:  tensor(0.1793)
loss:  tensor(0.1770)
loss:  tensor(0.1747)
loss:  tensor(0.1724)
loss:  tensor(0.1701)
loss:  ten