In [78]:
import os
import random
from typing import Tuple

import numpy as np
import torch
from torch import Tensor
from torch import nn
from torch import optim
from torch.backends import cudnn
from torch.nn import functional as F
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.dataset import TensorDataset, random_split
from tqdm import tqdm
torch.set_printoptions(precision=4)

In [79]:
def set_seed(seed):
    os.environ["PYTHONHASHSEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    cudnn.deterministic = True
    cudnn.benchmark = False

In [80]:
# def prepare_dataloader(
#     batch_size: int = 128,
#     num_samples: int = 100,
#     spread: float = 0.5,
#     split: float = 0.8,
#     seed: int = 1234,
# ):
#     """
#     Creates the dataset
#     :param batch_size: the batch size
#     :param num_samples: number of items per class/label/letter
#     :param spread: the std for normal sampling
#     :param split: train-val split (<1). Number given is allotted as the train %
#     :param seed: seed for the "random" dataset generator
#     :return: the dataloaders
#     """

#     np.random.seed(seed)

#     ideal_sensory_values = np.random.randint(-24, 23, (24, 16))
#     dataset = list()
#     classes = 24
#     for letter in range(classes):
#         for _ in range(num_samples):
#             sensors = []

#             for sensor in ideal_sensory_values[letter]:
#                 sensors.append(np.random.normal(loc=sensor, scale=spread))
            
#             sensors= np.array(sensors)
#             if np.random.choice([True,False],p=[0.7,0.3]):
#                 indices = np.random.choice(np.arange(sensors.size), replace=False, size = int(sensors.size * 0.3))
#                 sensors[indices] = np.random.choice([0,100])               
#             dataset.append([sensors, np.array([letter])])

#     x = list()
#     y = list()

#     for i in range(num_samples * 24): #24 if we train fully instead of classes
#             x.append(dataset[i][0])
#             y.append(dataset[i][1])

#     tensor_x = torch.Tensor(x)
#     tensor_y = torch.Tensor(y)

#     # train_split = int(split * len(x))
#     # val_split = len(x) - train_split
#     train_split = int(split * len(x))
#     val_split = len(x) - train_split

#     tensor_dataset = TensorDataset(tensor_x, tensor_y)
#     train_dataset, val_dataset = random_split(tensor_dataset, [train_split, val_split], generator=torch.Generator().manual_seed(1234))
#     # train_dataset = tensor_dataset[:train_split] 
#     # val_dataset   = tensor_dataset[train_split:] 
#     train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
#     val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

#     return train_loader, val_loader, train_dataset, val_dataset

In [81]:
def prepare_dataloader(
    batch_size: int = 128,
    num_samples: int = 10000,
    spread: float = 0.5,
    split: float = 0.8,
    seed: int = 1234,
):
    """
    Creates the dataset
    :param batch_size: the batch size
    :param num_samples: number of items per class/label/letter
    :param spread: the std for normal sampling
    :param split: train-val split (<1). Number given is allotted as the train %
    :param seed: seed for the "random" dataset generator
    :return: the dataloaders
    """
    np.random.seed(seed)
    ideal_sensory_values = np.random.randint(-24, 23, (24, 16))
    

    classes = 24

    dataset     = list()
    dataset_cor = list()

    for letter in range(classes):
        for _ in range(num_samples):
            sensors = []
            un_cor = []
            for sensor in ideal_sensory_values[letter]:
                r = np.random.normal(loc=sensor, scale=spread)
                sensors.append(r)
                un_cor.append(r)
            
            sensors = np.array(sensors)
            un_cor = np.array(un_cor)
            dataset.append([un_cor, np.array([letter])])
    

            if np.random.choice([True,False],p=[0.7,0.3]):
                indices = np.random.choice(np.arange(sensors.size), replace=False, size=int(sensors.size * 0.3))
                sensors[indices] = np.random.choice([-100,100])
                
            dataset_cor.append([sensors, np.array([letter])])
            
    
    x = list()
    w = list()
    y = list()
    
    for i in range(num_samples * classes): #24 if we train fully instead of classes
        x.append(dataset_cor[i][0])
        w.append(dataset[i][0])
        y.append(dataset_cor[i][1])

    x = np.array(x)
    w = np.array(w)
    y = np.array(y)

    tensor_x = torch.Tensor(x)
    tensor_w = torch.Tensor(w)
    tensor_y = torch.Tensor(y)

    train_split = int(split * len(x))
    val_split = len(x) - train_split

    tensor_dataset = TensorDataset(tensor_x, tensor_w, tensor_y)

    train_dataset, val_dataset = random_split(tensor_dataset, [train_split, val_split])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

    return train_loader, val_loader, x, w

In [82]:
class LinearAE(nn.Module):
    def __init__(self, in_size=16, latent_size=64):
        super(LinearAE, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(in_size, 64),
            nn.ReLU(),
            nn.Linear(64,latent_size),
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_size, 64),
            nn.ReLU(),
            nn.Linear(64, in_size),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)

        return x

In [83]:
def get_min_and_max(dataloader, device) -> Tuple[float, float]:
    """
    Finds the min and max of the dataset.
    This is used for Normalization of the dataset.

    :param dataloader: dataloader to calculate it for
    :param device: device to run computations on
    :return: tuple of mean and std
    """
    min_val, max_val = torch.Tensor([999]).to(device), torch.Tensor([-999]).to(device)
    for data, data_uncor, _ in tqdm(dataloader):
        data_uncor = data_uncor.to(device)
        min_val = torch.min(min_val, torch.min(data_uncor))
        max_val = torch.max(max_val, torch.max(data_uncor))

    return min_val.item(), max_val.item()

In [84]:
set_seed(1234)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LinearAE(16, 256).cuda()
train_loader, val_loader, corrupted_dataset, uncor_dataset = prepare_dataloader()

In [85]:
corrupted_dataset[0], uncor_dataset[0]

(array([100.        ,  14.35540801, -12.02123764, 100.        ,
         -9.33854858,  -0.14932535,  17.25901441,   1.70367204,
          6.74142256,  19.11366117,   5.54541005,  20.10880644,
          2.13634056, 100.        , -19.6863874 , 100.        ]),
 array([ -4.96206429,  14.35540801, -12.02123764,  -0.63251244,
         -9.33854858,  -0.14932535,  17.25901441,   1.70367204,
          6.74142256,  19.11366117,   5.54541005,  20.10880644,
          2.13634056,   3.98658565, -19.6863874 ,  -7.44503909]))

In [86]:
min_val, max_val = get_min_and_max(train_loader, device)

optimizer = optim.Adam(model.parameters())
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True)

100%|██████████| 1500/1500 [00:03<00:00, 439.20it/s]


In [88]:
for epoch in range(50):
  train_loss = 0
 
  for batch_idx, data in enumerate(train_loader):
      vector, vector_uncor, _ = data

      vector     = vector.to(device)
      vector     = (vector - min_val) / (max_val - min_val )
      vector_uncor = vector_uncor.to(device)
      vector_uncor = (vector_uncor - min_val) / (max_val - min_val )
      

      optimizer.zero_grad()
      y = model(vector)
      # recon_batch, mu, logvar = model(x)
      loss = F.mse_loss(y, vector_uncor)
      # loss = loss_function(recon_batch, x, mu, logvar)
      loss.backward()
      optimizer.step()
      train_loss += loss.item()

  scheduler.step(train_loss/len(train_loader))
  print("Epoch: {:03d} || Train Loss: {:.20f} ".format(epoch, train_loss / len(train_loader)))


Epoch: 000 || Train Loss: 0.00858986299764365017 
Epoch: 001 || Train Loss: 0.00829912059567868704 
Epoch: 002 || Train Loss: 0.00802067039037744275 
Epoch: 003 || Train Loss: 0.00777044168487191211 
Epoch: 004 || Train Loss: 0.00756072256310532514 
Epoch: 005 || Train Loss: 0.00737407410796731694 
Epoch: 006 || Train Loss: 0.00718174670419345282 
Epoch: 007 || Train Loss: 0.00702525631555666556 
Epoch: 008 || Train Loss: 0.00689436893754949171 
Epoch: 009 || Train Loss: 0.00676313549963136491 
Epoch: 010 || Train Loss: 0.00663972909531245631 
Epoch: 011 || Train Loss: 0.00653147411222259212 
Epoch: 012 || Train Loss: 0.00644933758769184341 
Epoch: 013 || Train Loss: 0.00634075252432376134 
Epoch: 014 || Train Loss: 0.00623630272503942259 
Epoch: 015 || Train Loss: 0.00604097409080713976 
Epoch: 016 || Train Loss: 0.00587463004856059959 
Epoch: 017 || Train Loss: 0.00575796936918050034 
Epoch: 018 || Train Loss: 0.00563345705935110620 
Epoch: 019 || Train Loss: 0.00554637003752092485 


In [89]:
model_dir = 'models'
model_filename = 'domainA-encoder-corrupted.pt'
model_filepath = os.path.join(model_dir, model_filename)
torch.save(model.state_dict(), model_filepath)

In [90]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        
        self.fc1 = nn.Linear(16 , 120) 
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 24)

    def forward(self, x):
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


def DNN():
  net = Net()
  return net

In [91]:
model = DNN().cuda()
model.load_state_dict(torch.load('/home/ubuntu/Latent-Transfer/Validation-Model/models/dnn.pt'))

enc_model = LinearAE(16,256).cuda()
enc_model.load_state_dict(torch.load('/home/ubuntu/Latent-Transfer/DomainA/models/domainA-encoder-corrupted.pt'))

<All keys matched successfully>

In [95]:
test = val_loader.dataset[0][0].cuda()
test_crct = val_loader.dataset[0][1].cuda()
inp = (test - min_val) / (max_val - min_val)
enc = enc_model.encoder(inp)
recon = enc_model.decoder(enc)
reconstructed = (recon  *  (max_val - min_val)) + min_val
print('Input Sensor Data (Unnormalized) -> \n',test)
print('Input Sensor Data (Normalized)   -> \n',inp)
print('-----------------------------------------------------------')
print('Input Sensor Data (Normalized) -> \n',inp)
print('Reconstructed Sensor Data (Normalized) -> \n',recon)
print('-----------------------------------------------------------')
print('Input Uncorrupted data (Unnormalized)      -> \n',test_crct )
print('Input Corrupted Sensor Data (Unnormalized) -> \n',test) 
print('Reconstruced Sensor Data (Unnormalized)    -> \n',reconstructed)

Input Sensor Data (Unnormalized) -> 
 tensor([-100.0000, -100.0000,   18.1098,    8.8315,  -18.9407,   -0.4759,
          21.3509,   -7.9034,  -23.7517,  -23.0033, -100.0000,   20.9954,
        -100.0000,    5.8936,   -9.8196,   20.2412], device='cuda:0')
Input Sensor Data (Normalized)   -> 
 tensor([-1.4737, -1.4737,  0.8811,  0.6961,  0.1424,  0.5105,  0.9457,  0.3624,
         0.0465,  0.0614, -1.4737,  0.9386, -1.4737,  0.6375,  0.3242,  0.9235],
       device='cuda:0')
-----------------------------------------------------------
Input Sensor Data (Normalized) -> 
 tensor([-1.4737, -1.4737,  0.8811,  0.6961,  0.1424,  0.5105,  0.9457,  0.3624,
         0.0465,  0.0614, -1.4737,  0.9386, -1.4737,  0.6375,  0.3242,  0.9235],
       device='cuda:0')
Reconstructed Sensor Data (Normalized) -> 
 tensor([0.7940, 0.2542, 0.9102, 0.7272, 0.0686, 0.4870, 0.9371, 0.2764, 0.0117,
        0.0465, 0.0963, 0.8993, 0.3245, 0.6179, 0.3526, 0.9829],
       device='cuda:0', grad_fn=<SigmoidBackward>)


In [94]:
s = 0
p = 0
r = 0

for i in range(0, len(val_loader)):

    test = val_loader.dataset[i][0].cuda()
    test_label = val_loader.dataset[i][2].cuda()
    inp = (test - min_val) / (max_val - min_val)

# print('-------------------------------------------------------------------')
# print('Input Sensor Data (Unnormalized) -> \n',test)
# print('Input Sensor Data (Normalized)   -> \n',inp)

    
    enc = enc_model.encoder(inp)
    recon = enc_model.decoder(enc)
# print('-------------------------------------------------------------------')
# print('Input Sensor Data (Normalized) -> \n',inp)
# print('Input Sensor Data (Normalized) -> \n',recon)
    reconstructed = (recon  *  (max_val - min_val)) + min_val
# print('-------------------------------------------------------------------')
# print('Input Sensor Data (Unnormalized)        -> \n',test) 
# print('Reconstruced Sensor Data (Unnormalized) -> \n',reconstructed)
# print('-------------------------------------------------------------------')
    x_check = torch.stack([test ,reconstructed])
    out = model(x_check)
    _, preds = torch.max(out, 1)
    # print(preds[0], preds[1], test_label)
    if (preds[0]==preds[1]):
        s = s + 1
    if (preds[0]==test_label):
        p = p + 1
    if (preds[1]==test_label):
        r = r + 1


print('Comparative Accuracy        : {}'.format((s / len(val_loader)) * 100))   
print('True Label Accuracy         : {}'.format((p / len(val_loader)) * 100))
print('Reconstructed Label Accuracy: {}'.format((r / len(val_loader)) * 100))
# print('Prediction of the Input data         : ',preds[0])
# print('Prediction of the Reconstructed data : ',preds[1])

Comparative Accuracy        : 38.13333333333333
True Label Accuracy         : 38.13333333333333
Reconstructed Label Accuracy: 100.0
