In [1]:
import torch
import torch.nn as tnn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn.functional as F

from collections import deque
import os
import copy
import numpy as np
from sklearn.manifold import Isomap
from sklearn.neighbors import NearestNeighbors
from locally_linear import LocallyLinearBackward

from tqdm import tqdm
from collections import deque
import os

from tensorboardX import SummaryWriter
from model.vgg_tiny import Conv, Fc

In [119]:
os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free >tmp')
memory_gpu=[int(x.split()[2]) for x in open('tmp','r').readlines()]
os.environ['CUDA_VISIBLE_DEVICES']=str(np.argmax(memory_gpu))
os.system('rm tmp')

0

In [120]:
BATCH_SIZE = 50
LEARNING_RATE = 0.01
EPOCH = 1
n_dimentions = 32

In [121]:
transform = transforms.Compose([
    transforms.RandomResizedCrop(28),
    transforms.ToTensor()])

data_train = dsets.MNIST(root = "./data/",
                         transform=transform,
                            train = True,
                            download = True)

data_test = dsets.MNIST(root="./data/",
                        transform=transform,
                           train = False)

trainLoader = torch.utils.data.DataLoader(dataset=data_train, batch_size=BATCH_SIZE, shuffle=True)
testLoader = torch.utils.data.DataLoader(dataset=data_test, batch_size=BATCH_SIZE, shuffle=False)

In [122]:
class VGG_conv(tnn.Module):
    def __init__(self):
        super(VGG_conv, self).__init__()
        self.layer1 = tnn.Sequential(

            # 1-1 conv layer
            tnn.Conv2d(1, 64, kernel_size=3, padding=1),
            tnn.BatchNorm2d(64),
            tnn.ReLU(),

            # 1-2 conv layer
            tnn.Conv2d(64, 64, kernel_size=3, padding=1),
            tnn.BatchNorm2d(64),
            tnn.ReLU(),

            # 1 Pooling layer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer2 = tnn.Sequential(

            # 2-1 conv layer
            tnn.Conv2d(64, 128, kernel_size=3, padding=1),
            tnn.BatchNorm2d(128),
            tnn.ReLU(),

            # 2-2 conv layer
            tnn.Conv2d(128, 128, kernel_size=3, padding=1),
            tnn.BatchNorm2d(128),
            tnn.ReLU(),

            # 2 Pooling lyaer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer3 = tnn.Sequential(

            # 3-1 conv layer
            tnn.Conv2d(128, 256, kernel_size=3, padding=1),
            tnn.BatchNorm2d(256),
            tnn.ReLU(),

            # 3-2 conv layer
            tnn.Conv2d(256, 256, kernel_size=3, padding=1),
            tnn.BatchNorm2d(256),
            tnn.ReLU(),

            # 3 Pooling layer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer4 = tnn.Sequential(

            # 4-1 conv layer
            tnn.Conv2d(256, 512, kernel_size=3, padding=1),
            tnn.BatchNorm2d(512),
            tnn.ReLU(),

            # 4-2 conv layer
            tnn.Conv2d(512, 512, kernel_size=3, padding=1),
            tnn.BatchNorm2d(512),
            tnn.ReLU(),

            # 4 Pooling layer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        # self.layer5 = tnn.Sequential(
        #
        #     # 5-1 conv layer
        #     tnn.Conv2d(512, 512, kernel_size=3, padding=1),
        #     tnn.BatchNorm2d(512),
        #     tnn.ReLU(),
        #
        #     # 5-2 conv layer
        #     tnn.Conv2d(512, 512, kernel_size=3, padding=1),
        #     tnn.BatchNorm2d(512),
        #     tnn.ReLU(),
        #
        #     # 5 Pooling layer
        #    tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer6 = tnn.Sequential(

            # 6 Fully connected layer
            # Dropout layer omitted since batch normalization is used.
            tnn.Linear(512, 512),
            tnn.BatchNorm1d(512),
            tnn.ReLU())


        self.layer7 = tnn.Sequential(

            # 7 Fully connected layer
            # Dropout layer omitted since batch normalization is used.
            tnn.Linear(512, 512,
            tnn.BatchNorm1d(512)),
            tnn.ReLU())
    
    def forward(self, x):
      out = self.layer1(x)
      out = self.layer2(out)
      out = self.layer3(out)
      out = self.layer4(out)
   #   out = self.layer5(out)
      vgg16_features = out.view(out.size(0), -1)
      out = self.layer6(vgg16_features)
      out = self.layer7(out)
      return out

In [123]:
class VGG_fc(tnn.Module):
    def __init__(self):
        super(VGG_fc, self).__init__()
        self.layer8 = tnn.Sequential(

        # 8 output layer
        tnn.Linear(32, 10))

    def forward(self, x):
        out = self.layer8(x)
#         out = F.softmax(out, dim=1)  #CrossEntropy 不能用这个
        return out

In [167]:
def isomap(feature_queue, n_components):
    length = len(feature_queue)
    for i in range(length):
        if i == 0:
            feature_tmp = feature_queue.popleft()
            features = feature_tmp
            feature_to_use = feature_tmp
        else:
            feature_tmp = feature_queue.popleft()
            features = np.concatenate((features, feature_tmp), axis=0)
        
    feature_input = features
    embedding = Isomap(n_components=n_components)
    transformed = embedding.fit_transform(feature_input)

    return transformed, feature_to_use
    

In [194]:
def isomap_back(X_Que,Y_use,Y_all,Error):
#Error has dim:p*N, where p is the dims of every object after isomap N is the batchsize
#Y is the feature during the forward process
#X is the feature before Isomap

    k=4
    E=Error.cpu().numpy()
    Y_use = Y_use.detach().cpu().numpy()
    length=len(X_Que)
    for i in range(length):
        if i == 0:
            feature_tmp = X_Que.popleft()
            X_use=feature_tmp
            X = feature_tmp
        else:
            feature_tmp = X_Que.popleft()
            X = np.concatenate((X, feature_tmp), axis=0)

    Yb=Y_use+E
#Calculate all the distances between Yb and all Y
    n=Yb.shape
    total=Y_all.shape
    for i in range(n[0]):
        dis=np.zeros(total[0])
        Yi=Yb[i]
        for j in range(total[0]):
            z=(Yi-Y_all[j]).reshape(-1,1)
            dis[j]=np.matmul(z.transpose(),z)
        idx = np.argpartition(dis, k)[0:k]
        for m in range(k):
            if m==0:
                Y_near=Y_all[idx[m]].reshape([1,-1])
                X_near = X[idx[m]].reshape([1,-1])
                Y_bar=Yi.reshape([1,-1])
            else:
                Y_near=np.concatenate((Y_near,Y_all[idx[m]].reshape([1,-1])),axis=0)
                X_near=np.concatenate((X_near,X[idx[m]].reshape([1,-1])),axis=0)
                Y_bar=np.concatenate((Y_bar,Yi.reshape([1,-1])),axis=0)
        
        tmp=np.mat(Y_bar-Y_near)
        Z=tmp*tmp.transpose()
        One=np.mat(np.ones([k, 1]))
        X_near=np.mat(X_near)
        w=(np.linalg.pinv(Z))*One/(One.transpose()*(np.linalg.pinv(Z))*One)
        if i==0:
            X_back=(X_near.transpose()*w).reshape([1,-1])
        else:
            X_back=np.concatenate((X_back,(X_near*w).reshape([1,-1])),axis=0)

        return X_back

In [195]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [210]:
vgg_conv = VGG_conv().to(device)
# vgg_conv.cuda()
vgg_fc = VGG_fc().to(device)
# vgg_fc.cuda()

isomap_feature = torch.empty(BATCH_SIZE, n_dimentions, requires_grad=True, device=device)
cost1 = tnn.MSELoss()
cost2 = tnn.CrossEntropyLoss()
optimizer1 = torch.optim.SGD(vgg_conv.parameters(), lr=LEARNING_RATE)
optimizer2 = torch.optim.SGD(vgg_fc.parameters(), lr=LEARNING_RATE)

In [None]:
# Train the model
for epoch in range(EPOCH):
#  for i, (images, labels) in enumerate(trainLoader):
  vgg_conv.train()
  vgg_fc.train()
  correct = 0
  total = 0
  train_img_queue = deque(maxlen=1000//BATCH_SIZE)    #构建输入图像的队列
  train_label_queue = deque(maxlen=1000//BATCH_SIZE) #构建label的队列
  train_vec_queue = deque(maxlen=1000//BATCH_SIZE)    #构建卷积网络输出向量的队列
  for batch_idx, (images, labels) in enumerate(trainLoader):
    train_img_queue.append(images)   #入队是append，出队是popleft
    train_label_queue.append(labels)
    
    # Forward + Backward + Optimize
    
#     optimizer1.zero_grad()
#     optimizer2.zero_grad()

    outputs1 = vgg_conv(images.to(device)) #卷积网络的输出，将图片embedding成512维向量，the shape of output is (batch_size, 512)
    #print(images, outputs1)
    train_vec_queue.append(outputs1.detach().cpu().numpy())
#     print(train_vec_queue.qsize())
#     print(train_vec_queue.get_nowait().shape)
    
    if len(train_img_queue) == 1000//BATCH_SIZE:  #等队列满了之后，开始让所有图片进入isomap，然后pop出队首的数据进行反向传播
        isomap_forward, feature_to_use = isomap(copy.deepcopy(train_vec_queue), n_components=32)#将1000张图片通过卷积层得到的embedding向量输入isomap层，获得降维后的结果
        #isomap: numpy.ndarray   feature_to_use: numpy.ndarray
        
        if outputs1.is_cuda:
            batch_feature = torch.from_numpy(isomap_forward[:BATCH_SIZE]).cuda()
        else:
            batch_feature = torch.from_numpy(isomap_forward[:BATCH_SIZE])
        
        batch_feature = batch_feature.float()
        
#         img_tmp = train_img_queue.popleft()
#         label_tmp = train_label_queue.popleft()
#         vec_tmp = train_vec_queue.popleft()
        
        isomap_feature = batch_feature
        isomap_feature.requires_grad = True
        outputs2 = vgg_fc(isomap_feature)
        
        batch_label = train_label_queue.popleft()
        loss2 = cost2(outputs2, batch_label.to(device))
        
        #---------------------------------------
        optimizer2.zero_grad()
        loss2.backward()
        optimizer2.step()
        
        grad = isomap_feature.grad
        
        E = grad*LEARNING_RATE
        
        x_hat = isomap_back(copy.deepcopy(train_vec_queue), isomap_feature, isomap_forward, E)
        #all X:isomap_back(copy.deepcopy(train_vec_queue) Y:isomap_feature    all Y: isomap_forward     y error:E
        
        feature_to_use = torch.Tensor(feature_to_use)
        x_hat = torch.Tensor(x_hat)
        loss1 = cost1(feature_to_use.to(device), x_hat.to(device))
        loss1.requires_grad = True
        optimizer1.zero_grad()
        loss1.backward()
        optimizer1.step()
        
        train_vec_queue.popleft()
        train_img_queue.popleft()
        
        pred = torch.max(outputs2.data, 1)[1]
        train_correct = (pred == batch_label.to(device)).sum()
        
        print('epoch:{}/{}  batch:{}/{}  loss1:{:.6f}  loss2:{:.6f}  acc:{:.4f}'.format(epoch, EPOCH, batch_idx,
                                                                         data_train.__len__() // BATCH_SIZE, loss1,
                                                                         loss2, float(train_correct) / BATCH_SIZE))

#   vgg_conv.train()
#   vgg_fc.train()
#   correct = 0
#   with torch.no_grad():
#     for batch_idx, (val_x, val_y) in enumerate(validation_loader):
#         val_x, val_y = val_x.cuda(), val_y.cuda()
#         output1 = vgg_conv(val_x)
        
#         output2 = vgg_fc()
#         pred = torch.max(output.data, 1)[1]
#         correct += (pred == val_y.squeeze(1)).sum()
#     acc = float(correct) / validation_set.__len__()
#     print('epoch:{}/{} acc:{:.4f} max_val_acc:{}'.format(epoch, EPOCH, acc, max_acc))
  
        
#         Y = isomap_feature.detach().cpu().numpy()
        
        
#         Y_hat = isomap_feature.detach().cpu().numpy()
        
#         if (np.any(np.isnan(Y))):
#             print("Nan element")
#         if(np.all(np.isfinite(Y))):
#             print("")
#         back = LocallyLinearBackward(n_neighbors=10) # n_neighbors is a hyperparameter
#         back.fit(Y, Y_hat)
        
#         X_hat = back.error_backward(feature_to_use.detach().cpu().numpy())
#         print(X_hat)
#         target = torch.from_numpy(X_hat).to(device)
#         loss1 = cost1(feature_to_use, target)
        
#         optimizer1.zero_grad()
#         loss1.backward()
#         optimizer1.step()
#         #----------------------------------------
# #         current_img = train_img_queue.get()
# #         current_label = train_label_queue.get()
# #         current_vec = train_vec_queue.get()
        
    
    
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted.cpu() == labels.cpu()).sum()
#     loss = cost(outputs, labels.cuda())
#     loss.backward()
#     optimizer.step()

    

#   print ('Epoch [%d/%d], Loss. %.4f' %(epoch+1, EPOCH, loss.data[0]))
#   print('Test Accuracy of the model on the training set: %d %%' % (100 * correct / total))

# # Test the model
#   vgg16.eval()
#   correct = 0
#   total = 0

#   for images, labels in testLoader:
#     images = Variable(images).cuda()
#     outputs = vgg16(images)
#     _, predicted = torch.max(outputs.data, 1)
#     total += labels.size(0)
#     correct += (predicted.cpu() == labels).sum()

#   print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))

# # Save the Trained Model
# torch.save(vgg16.state_dict(),'checkpoint_without_model.pt')


epoch:0/1  batch:19/1200  loss1:0.029237  loss2:2.488489  acc:0.0800
epoch:0/1  batch:20/1200  loss1:0.020116  loss2:2.625279  acc:0.0800
epoch:0/1  batch:21/1200  loss1:0.026848  loss2:2.292303  acc:0.2000
epoch:0/1  batch:22/1200  loss1:0.027397  loss2:2.370904  acc:0.1600
epoch:0/1  batch:23/1200  loss1:0.021815  loss2:2.884284  acc:0.0800
epoch:0/1  batch:24/1200  loss1:0.022291  loss2:2.578059  acc:0.0600
epoch:0/1  batch:25/1200  loss1:0.031076  loss2:2.644250  acc:0.0800
epoch:0/1  batch:26/1200  loss1:0.030357  loss2:2.553852  acc:0.0600
epoch:0/1  batch:27/1200  loss1:0.022617  loss2:2.697981  acc:0.0400
epoch:0/1  batch:28/1200  loss1:0.026064  loss2:2.626992  acc:0.0800
epoch:0/1  batch:29/1200  loss1:0.028195  loss2:2.755454  acc:0.0600
epoch:0/1  batch:30/1200  loss1:0.022766  loss2:2.612507  acc:0.1400
epoch:0/1  batch:31/1200  loss1:0.030420  loss2:2.526416  acc:0.1000
epoch:0/1  batch:32/1200  loss1:0.029058  loss2:2.607552  acc:0.1000
epoch:0/1  batch:33/1200  loss1:0.