# Aging Aware with Model Variation
Previously, we optimize the following functional:
$$ \min_\theta \, \int_{t=0}^{1} \; L(\theta(t)) \, {\rm d}t. $$
The optimum is for a specific aging model $\omega$, i.e., we minimized the 
$$ \min_\theta \, \int_{t=0}^{1} \; L(\theta(t,\omega)) \, {\rm d}t. $$
However, we dont know how will the resistance decay, i.e., we should also minimize the loss function w.r.t. aging models with different parameters. That means we need to optimized
$$ \min_\theta \, \int_{\omega}\int_{t=0}^{1} \; L(\theta(t,\omega)) \, {\rm d}t\, p(\omega){\rm d}\omega. $$
The Mento Carlo Approximation is then
$$
\min_{\theta_{\rm init}} \frac{1}{\Omega}\frac{1}{K}\sum_{\omega\in\mathfrak{M}}\sum_{k\in \mathfrak{K} } L \left(\theta[k, \omega]\right),
$$
where $\mathfrak{M}$ is the set of $\Omega$ elements following the distribution $p(\omega)$. $p(\omega)$ is the distributions of parameters of the aging model. We have already obtained these distributions as we modeled the aging decay.

That means we should optimize this problem by
$$
\begin{align}
\theta_{\rm init}&:=\theta_{\rm init} - \alpha\cdot\nabla_{\theta_{\rm init}}\left(\frac{1}{\Omega}\frac{1}{K}\sum_{\omega\in\mathfrak{M}}\sum_{k\in \mathfrak{K} } L \left(\theta[k, \omega]\right)\right)\\
&=\theta_{\rm init} - \frac{\alpha}{\Omega K}\cdot\nabla_{\theta_{\rm init}}\left(\sum_{\omega\in\mathfrak{M}}\sum_{k\in \mathfrak{K} } L \left(\theta[k, \omega]\right)\right)\\
&=\theta_{\rm init} - \frac{\alpha}{\Omega K}\left(\sum_{\omega\in\mathfrak{M}}\sum_{k\in \mathfrak{K} }\nabla_{\theta_{\rm init}} L \left(\theta[k, \omega]\right)\right)
\end{align}
$$

# Get aging model

In [1]:
import importlib
from torch.autograd import Variable
import torch
import pickle
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
sys.path.append(os.path.join(os.getcwd(), 'Aging_Model'))

with open(os.path.join(os.getcwd(), 'Aging_Model', 'exp_aging_model.p'), 'rb') as f:
    age_generator = pickle.load(f)

# Device

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Prepare data

## Load data

In [3]:
datapath = os.path.join(os.getcwd(), 'Datasets', 'PMLC',
                        'data_processed', 'Dataset_Pendigits.p')
with open(datapath, 'rb') as f:
    dataset = pickle.load(f)
X = dataset['X'].float()
y = dataset['y']
M, N_features, N_class = X.shape[0], X.shape[1], torch.max(
    torch.unique(y)).item()+1
X.shape, y.shape, M, N_features, N_class

(torch.Size([10992, 16]), torch.Size([10992]), 10992, 16, 10)

## data preprocessing

In [4]:
# normalization
X = X / (torch.max(X, axis=0)[0] - torch.min(X, axis=0)[0])
X = X - torch.min(X, axis=0)[0]
torch.min(X), torch.max(X)

(tensor(0.), tensor(1.))

In [5]:
X[:10, :9]

tensor([[0.4700, 1.0000, 0.2700, 0.8100, 0.5700, 0.3700, 0.2600, 0.0000, 0.0000],
        [0.0000, 0.8900, 0.2700, 1.0000, 0.4200, 0.7500, 0.2900, 0.4500, 0.1500],
        [0.0000, 0.5700, 0.3100, 0.6800, 0.7200, 0.9000, 1.0000, 1.0000, 0.7600],
        [0.0000, 1.0000, 0.0700, 0.9200, 0.0500, 0.6800, 0.1900, 0.4500, 0.8600],
        [0.0000, 0.6700, 0.4900, 0.8300, 1.0000, 1.0000, 0.8100, 0.8000, 0.6000],
        [1.0000, 1.0000, 0.8800, 0.9900, 0.4900, 0.7400, 0.1700, 0.4700, 0.0000],
        [0.0000, 1.0000, 0.0300, 0.7200, 0.2600, 0.3500, 0.8500, 0.3500, 1.0000],
        [0.0000, 0.3900, 0.0200, 0.6200, 0.1100, 0.0500, 0.6300, 0.0000, 1.0000],
        [0.1300, 0.8900, 0.1200, 0.5000, 0.7200, 0.3800, 0.5600, 0.0000, 0.0400],
        [0.5700, 1.0000, 0.2200, 0.7200, 0.0000, 0.3100, 0.2500, 0.0000, 0.7500]])

In [6]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

# generate tensordataset
dataset = TensorDataset(X.to(device), y.to(device))

# split
train_rate = 0.6
test_rate = 0.2
M_train = int(M*train_rate)
M_test = int(M*test_rate)
train_data, rest_data = random_split(
    dataset, [M_train, M-M_train], generator=torch.Generator().manual_seed(19950102))
test_data, valid_data = random_split(rest_data, [
                                     M_test, M-M_train-M_test], generator=torch.Generator().manual_seed(19950102))
len(train_data), len(test_data), len(valid_data)

(6595, 2198, 2199)

In [7]:
# batch
train_loader = DataLoader(train_data, batch_size=len(train_data))
test_loader = DataLoader(test_data, batch_size=len(test_data))
valid_loader = DataLoader(valid_data, batch_size=len(valid_data))

# Hyperparameter

In [8]:
N_Hidden = 16
m = 0.3
T = 0.1
K = 5  # number of time sampling
M = 10  # number of model sampling
K_test = 5
M_test = 10

# Normal PNN

## Training

In [11]:
import pNN_aging_aware_vectorization as pnnv
importlib.reload(pnnv)

PNN = torch.nn.Sequential(pnnv.PNNLayer(N_features, N_Hidden, age_generator),
                          pnnv.PNNLayer(N_Hidden, N_class, age_generator))


PNN.apply(lambda z: pnnv.MakeModel(z, M=1))
PNN.apply(lambda z: pnnv.SetTime(z, t=[0]))

optimizer_PNN = torch.optim.Adam(PNN.parameters(), lr=0.01)
PNN.to(device)

Sequential(
  (0): PNNLayer()
  (1): PNNLayer()
)

In [12]:
import training_vectorization as tv
importlib.reload(tv)
train_loss_PNN, test_loss_PNN, parameter_PNN = tv.train_normal_pnn(PNN, train_loader, test_loader,
                                                                   m, T, optimizer_PNN,
                                                                   pnnv.LossFunction, Epoch=5000,
                                                                   cache='try')

  0%|          | 0/5000 [00:00<?, ?it/s]

| Epoch:     0 | Accuracy: 0.09827 | Loss: 1.395274162 |
| Epoch:   100 | Accuracy: 0.73112 | Loss: 0.564792812 |
| Epoch:   200 | Accuracy: 0.77707 | Loss: 0.374519110 |


KeyboardInterrupt: 

In [13]:
M=40
K=5
M_test=10
K_test=5
importlib.reload(pnnv)
AAPNN = torch.nn.Sequential(pnnv.PNNLayer(N_features, N_Hidden, age_generator),
                            pnnv.PNNLayer(N_Hidden, N_class, age_generator))


AAPNN.apply(lambda z: pnnv.MakeModel(z, M))
AAPNN.apply(lambda z: pnnv.SetTime(z, np.random.rand(K).tolist()))

optimizer_AAPNN = torch.optim.Adam(AAPNN.parameters(), lr=0.01)
AAPNN.to(device)

Sequential(
  (0): PNNLayer()
  (1): PNNLayer()
)

In [None]:
importlib.reload(tv)
loss_AAPNN, parameters_AAPNN = tv.train_aged_pnn(AAPNN, train_loader, test_loader,
                                        m, T, M, K, M_test, K_test,
                                        optimizer_AAPNN, pnnv.LossFunction,
                                        Epoch=100, cache='default')

  0%|          | 0/100 [00:00<?, ?it/s]

| Epoch:     0 | Accuracy: 0.89222 | Loss: 0.200018466 |
| Epoch:    10 | Accuracy: 0.88635 | Loss: 0.214767292 |
| Epoch:    20 | Accuracy: 0.88449 | Loss: 0.213117555 |
| Epoch:    30 | Accuracy: 0.88761 | Loss: 0.213532507 |
| Epoch:    40 | Accuracy: 0.87904 | Loss: 0.219701231 |
| Epoch:    50 | Accuracy: 0.88646 | Loss: 0.208015084 |
| Epoch:    60 | Accuracy: 0.88707 | Loss: 0.212350190 |


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [129]:
device

device(type='cpu')