In [1]:
import importlib
from torch.autograd import Variable
import torch
import pickle
import pNN_aging_aware as pnn
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import PNN_Setting as ps

# Prepare data

## Load data

In [2]:
datapath = os.path.join(os.getcwd(), 'Datasets', 'PMLC',
                        'data_processed', 'Dataset_Pendigits.p')
with open(datapath, 'rb') as f:
    dataset = pickle.load(f)
X = dataset['X'].float()
y = dataset['y']
M, N_features, N_class = X.shape[0], X.shape[1], torch.max(
    torch.unique(y)).item()+1
X.shape, y.shape, M, N_features, N_class

(torch.Size([10992, 16]), torch.Size([10992]), 10992, 16, 10)

## data preprocessing

In [3]:
# normalization
X = X / (torch.max(X, axis=0)[0] - torch.min(X, axis=0)[0])
X = X - torch.min(X, axis=0)[0]
torch.min(X), torch.max(X)

(tensor(0.), tensor(1.))

In [4]:
X[:10, :9]

tensor([[0.4700, 1.0000, 0.2700, 0.8100, 0.5700, 0.3700, 0.2600, 0.0000, 0.0000],
        [0.0000, 0.8900, 0.2700, 1.0000, 0.4200, 0.7500, 0.2900, 0.4500, 0.1500],
        [0.0000, 0.5700, 0.3100, 0.6800, 0.7200, 0.9000, 1.0000, 1.0000, 0.7600],
        [0.0000, 1.0000, 0.0700, 0.9200, 0.0500, 0.6800, 0.1900, 0.4500, 0.8600],
        [0.0000, 0.6700, 0.4900, 0.8300, 1.0000, 1.0000, 0.8100, 0.8000, 0.6000],
        [1.0000, 1.0000, 0.8800, 0.9900, 0.4900, 0.7400, 0.1700, 0.4700, 0.0000],
        [0.0000, 1.0000, 0.0300, 0.7200, 0.2600, 0.3500, 0.8500, 0.3500, 1.0000],
        [0.0000, 0.3900, 0.0200, 0.6200, 0.1100, 0.0500, 0.6300, 0.0000, 1.0000],
        [0.1300, 0.8900, 0.1200, 0.5000, 0.7200, 0.3800, 0.5600, 0.0000, 0.0400],
        [0.5700, 1.0000, 0.2200, 0.7200, 0.0000, 0.3100, 0.2500, 0.0000, 0.7500]])

In [5]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

# generate tensordataset
dataset = TensorDataset(X, y)

# split
train_rate = 0.6
test_rate = 0.2
M_train = int(M*train_rate)
M_test = int(M*test_rate)
train_data, rest_data = random_split(dataset, [M_train, M-M_train])
test_data, valid_data = random_split(rest_data, [M_test, M-M_train-M_test])
len(train_data), len(test_data), len(valid_data)

(6595, 2198, 2199)

In [6]:
# batch
train_loader = DataLoader(train_data, batch_size=len(train_data))
test_loader = DataLoader(test_data, batch_size=len(test_data))
valid_loader = DataLoader(valid_data, batch_size=len(valid_data))

# Hyperparameter

In [7]:
N_Hidden = 8

# Functions for generating parallel NN

In [8]:
# ps.MakeParallelPNNs
# ps.MakeParallelModels

Usage:

`Parallel_PNNs = ps.MakeParallelPNNs(pnn, M, K)`, generating different models & different time stamps

`Parallel_PNNs = ps.MakeParallelPNNs(pnn, M)`, generating only different models

# Try parallelization on normal NN

In [9]:
NN_test = torch.nn.Sequential(torch.nn.Linear(N_features, N_Hidden), torch.nn.Tanh(),
                              torch.nn.Linear(
                                  N_Hidden, N_Hidden), torch.nn.Tanh(),
                              torch.nn.Linear(
                                  N_Hidden, N_Hidden), torch.nn.Tanh(),
                              torch.nn.Linear(N_Hidden, N_class), torch.nn.Tanh())

def weights_init(m):
    if isinstance(m, torch.nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

NN_test.apply(weights_init)

celoss = torch.nn.CrossEntropyLoss()
optimizer_NN_test = torch.optim.Adam(NN_test.parameters(), lr=0.1)

NN_test

Sequential(
  (0): Linear(in_features=16, out_features=8, bias=True)
  (1): Tanh()
  (2): Linear(in_features=8, out_features=8, bias=True)
  (3): Tanh()
  (4): Linear(in_features=8, out_features=8, bias=True)
  (5): Tanh()
  (6): Linear(in_features=8, out_features=10, bias=True)
  (7): Tanh()
)

In fact, there is no model variety and time variety, i.e., the $M\times K$ `NN_test` are exactly the same.

In [10]:
M = 3
K = 3
for epoch in range(1000):
    # clear gradient of NN
    optimizer_NN_test.zero_grad()

    # copy NN M times for parallel training
    Parallel_Models = ps.MakeParallelModels(NN_test, M)

    # for each nn
    for nn in Parallel_Models:
        # sample K time stamps
        for k in range(K):
            # apply forward propagation
            for X_train, y_train in train_loader:
                prediction = nn(X_train)

                # calculate loss and do back propagation
                loss = celoss(prediction, y_train)
                loss.backward()

    # get gradients for each layer of NN_test
    for n, p in NN_test.named_parameters():
        # enlarge 1 dim for torch.cat(), i.e. temp is [M, n_out, n_in+2] dimensional tensor
        temp = torch.cat([dict(nn.named_parameters())[n].grad[None, :]
                          for nn in Parallel_Models])
        # average w.r.t. 0. dimension, i.e. M parallel nn
        # devide K to average w.r.t. K time samples
        p.grad = torch.mean(temp, dim=0) / K

    # update parameter
    optimizer_NN_test.step()

    # test
    for x_test, y_test in test_loader:
        prediction_test = NN_test(x_test)
        loss_test = celoss(prediction_test, y_test)
        y_hat = torch.argmax(prediction_test, dim=1).data.numpy().squeeze()
        acc_test = sum(y_hat == y_test.numpy()) / y_test.shape[0]

    print(
        f'| Epoch: {epoch:-5d} | Accuracy: {acc_test:.5f} | Loss: {loss_test.data:.5f} |')

| Epoch:     0 | Accuracy: 0.10692 | Loss: 2.29221 |
| Epoch:     1 | Accuracy: 0.19654 | Loss: 2.28091 |
| Epoch:     2 | Accuracy: 0.27571 | Loss: 2.08707 |
| Epoch:     3 | Accuracy: 0.39809 | Loss: 1.90075 |
| Epoch:     4 | Accuracy: 0.48544 | Loss: 1.79733 |
| Epoch:     5 | Accuracy: 0.48135 | Loss: 1.72169 |
| Epoch:     6 | Accuracy: 0.44404 | Loss: 1.68533 |
| Epoch:     7 | Accuracy: 0.48635 | Loss: 1.64248 |
| Epoch:     8 | Accuracy: 0.50045 | Loss: 1.61433 |
| Epoch:     9 | Accuracy: 0.52502 | Loss: 1.57897 |
| Epoch:    10 | Accuracy: 0.52821 | Loss: 1.55988 |
| Epoch:    11 | Accuracy: 0.51911 | Loss: 1.53636 |
| Epoch:    12 | Accuracy: 0.52366 | Loss: 1.50423 |
| Epoch:    13 | Accuracy: 0.53731 | Loss: 1.49020 |
| Epoch:    14 | Accuracy: 0.54459 | Loss: 1.46292 |
| Epoch:    15 | Accuracy: 0.55050 | Loss: 1.44967 |
| Epoch:    16 | Accuracy: 0.55641 | Loss: 1.43425 |
| Epoch:    17 | Accuracy: 0.55187 | Loss: 1.41275 |
| Epoch:    18 | Accuracy: 0.55778 | Loss: 1.3

In [11]:
for x_valid, y_valid in valid_loader:
    prediction_valid = NN_test(x_valid)
    p = torch.argmax(prediction_valid, 1)
    pred_y = p.data.numpy().squeeze()
    acc_valid = sum(pred_y == y_valid.numpy()) / y_valid.shape[0]
print(acc_valid)

0.8767621646202819
