In [1]:
import torch
import torch.nn as tnn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn.functional as F

from collections import deque
import os
import copy
import numpy as np
from sklearn.manifold import Isomap
from sklearn.neighbors import NearestNeighbors
from locally_linear import LocallyLinearBackward

In [2]:
os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free >tmp')
memory_gpu=[int(x.split()[2]) for x in open('tmp','r').readlines()]
os.environ['CUDA_VISIBLE_DEVICES']=str(np.argmax(memory_gpu))
os.system('rm tmp')

0

In [3]:
BATCH_SIZE = 50
LEARNING_RATE = 0.01
EPOCH = 1
n_dimentions = 32

In [4]:
transform = transforms.Compose([
    transforms.RandomResizedCrop(28),
    transforms.ToTensor()])

data_train = dsets.MNIST(root = "./data/",
                         transform=transform,
                            train = True,
                            download = True)

data_test = dsets.MNIST(root="./data/",
                        transform=transform,
                           train = False)

trainLoader = torch.utils.data.DataLoader(dataset=data_train, batch_size=BATCH_SIZE, shuffle=True)
testLoader = torch.utils.data.DataLoader(dataset=data_test, batch_size=BATCH_SIZE, shuffle=False)

In [5]:
class VGG_conv(tnn.Module):
    def __init__(self):
        super(VGG_conv, self).__init__()
        self.layer1 = tnn.Sequential(

            # 1-1 conv layer
            tnn.Conv2d(1, 64, kernel_size=3, padding=1),
            tnn.BatchNorm2d(64),
            tnn.ReLU(),

            # 1-2 conv layer
            tnn.Conv2d(64, 64, kernel_size=3, padding=1),
            tnn.BatchNorm2d(64),
            tnn.ReLU(),

            # 1 Pooling layer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer2 = tnn.Sequential(

            # 2-1 conv layer
            tnn.Conv2d(64, 128, kernel_size=3, padding=1),
            tnn.BatchNorm2d(128),
            tnn.ReLU(),

            # 2-2 conv layer
            tnn.Conv2d(128, 128, kernel_size=3, padding=1),
            tnn.BatchNorm2d(128),
            tnn.ReLU(),

            # 2 Pooling lyaer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer3 = tnn.Sequential(

            # 3-1 conv layer
            tnn.Conv2d(128, 256, kernel_size=3, padding=1),
            tnn.BatchNorm2d(256),
            tnn.ReLU(),

            # 3-2 conv layer
            tnn.Conv2d(256, 256, kernel_size=3, padding=1),
            tnn.BatchNorm2d(256),
            tnn.ReLU(),

            # 3 Pooling layer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer4 = tnn.Sequential(

            # 4-1 conv layer
            tnn.Conv2d(256, 512, kernel_size=3, padding=1),
            tnn.BatchNorm2d(512),
            tnn.ReLU(),

            # 4-2 conv layer
            tnn.Conv2d(512, 512, kernel_size=3, padding=1),
            tnn.BatchNorm2d(512),
            tnn.ReLU(),

            # 4 Pooling layer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        # self.layer5 = tnn.Sequential(
        #
        #     # 5-1 conv layer
        #     tnn.Conv2d(512, 512, kernel_size=3, padding=1),
        #     tnn.BatchNorm2d(512),
        #     tnn.ReLU(),
        #
        #     # 5-2 conv layer
        #     tnn.Conv2d(512, 512, kernel_size=3, padding=1),
        #     tnn.BatchNorm2d(512),
        #     tnn.ReLU(),
        #
        #     # 5 Pooling layer
        #    tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer6 = tnn.Sequential(

            # 6 Fully connected layer
            # Dropout layer omitted since batch normalization is used.
            tnn.Linear(512, 512),
            tnn.BatchNorm1d(512),
            tnn.ReLU())


        self.layer7 = tnn.Sequential(

            # 7 Fully connected layer
            # Dropout layer omitted since batch normalization is used.
            tnn.Linear(512, 512,
            tnn.BatchNorm1d(512)),
            tnn.ReLU())
    
    def forward(self, x):
      out = self.layer1(x)
      out = self.layer2(out)
      out = self.layer3(out)
      out = self.layer4(out)
   #   out = self.layer5(out)
      vgg16_features = out.view(out.size(0), -1)
      out = self.layer6(vgg16_features)
      out = self.layer7(out)
      return out

In [6]:
class VGG_fc(tnn.Module):
    def __init__(self):
        super(VGG_fc, self).__init__()
        self.layer8 = tnn.Sequential(

        # 8 output layer
        tnn.Linear(32, 10))

    def forward(self, x):
        out = self.layer8(x)
#         out = F.softmax(out, dim=1)  #CrossEntropy 不能用这个
        return out

In [26]:
def isomap(feature_queue, n_components):
    length = len(feature_queue)
    for i in range(length):
        if i == 0:
            feature_tmp = feature_queue.popleft()
            features = feature_tmp
            feature_to_use = feature_tmp
        else:
            feature_tmp = feature_queue.popleft()
            features = np.concatenate((features, feature_tmp), axis=0)
        
    feature_input = features
    embedding = Isomap(n_components=n_components)
    transformed = embedding.fit_transform(feature_input)

    return transformed, feature_to_use
    

In [31]:
def isomap_back(feature_queue, isomap_forward, E):

SyntaxError: unexpected EOF while parsing (<ipython-input-31-1481f5cd4b7a>, line 1)

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [10]:
vgg_conv = VGG_conv().to(device)
# vgg_conv.cuda()
vgg_fc = VGG_fc().to(device)
# vgg_fc.cuda()

isomap_feature = torch.empty(BATCH_SIZE, n_dimentions, requires_grad=True, device=device)
cost1 = tnn.MSELoss()
cost2 = tnn.CrossEntropyLoss()
optimizer1 = torch.optim.SGD(vgg_conv.parameters(), lr=LEARNING_RATE)
optimizer2 = torch.optim.SGD([{'params':vgg_fc.parameters()}
                               ,{'params':isomap_feature}
                              ], lr=LEARNING_RATE)

In [30]:
# Train the model
for epoch in range(EPOCH):
#  for i, (images, labels) in enumerate(trainLoader):
  vgg_conv.train()
  correct = 0
  total = 0
  train_img_queue = deque(maxlen=1000//BATCH_SIZE)    #构建输入图像的队列
  train_label_queue = deque(maxlen=1000//BATCH_SIZE) #构建label的队列
  train_vec_queue = deque(maxlen=1000//BATCH_SIZE)    #构建卷积网络输出向量的队列
  for images, labels in trainLoader:
    train_img_queue.append(images)   #入队是append，出队是popleft
    train_label_queue.append(labels)
    
    # Forward + Backward + Optimize
    
#     optimizer1.zero_grad()
#     optimizer2.zero_grad()

    outputs1 = vgg_conv(images.to(device)) #卷积网络的输出，将图片embedding成512维向量，the shape of output is (batch_size, 512)
    
    train_vec_queue.append(outputs1.detach().cpu().numpy())
    
#     print(train_vec_queue.qsize())
#     print(train_vec_queue.get_nowait().shape)
    
    if len(train_img_queue) == 1000//BATCH_SIZE:  #等队列满了之后，开始让所有图片进入isomap，然后pop出队首的数据进行反向传播
        isomap_forward, feature_to_use = isomap(copy.deepcopy(train_vec_queue), n_components=32)#将1000张图片通过卷积层得到的embedding向量输入isomap层，获得降维后的结果
        #isomap: numpy.ndarray   feature_to_use: numpy.ndarray
        
        if outputs1.is_cuda:
            batch_feature = torch.from_numpy(isomap_forward[:BATCH_SIZE]).cuda()
        else:
            batch_feature = torch.from_numpy(isomap_forward[:BATCH_SIZE])
        
        batch_feature = batch_feature.float()
        
#         img_tmp = train_img_queue.popleft()
#         label_tmp = train_label_queue.popleft()
#         vec_tmp = train_vec_queue.popleft()
        
        isomap_feature = batch_feature
        isomap_feature.requires_grad = True
        outputs = vgg_fc(isomap_feature)
        
        
        loss2 = cost2(outputs, labels.to(device))
        
        #---------------------------------------
        optimizer2.zero_grad()
        loss2.backward()
        optimizer2.step()
        
        grad = isomap_feature.grad
        
        E = grad*LEARNING_RATE
        
        x_hat = isomap_back(copy.deepcopy(train_vec_queue), isomap_feature, E)
        
        Y = isomap_feature.detach().cpu().numpy()
        
   
        
        Y_hat = isomap_feature.detach().cpu().numpy()
        
        if (np.any(np.isnan(Y))):
            print("Nan element")
        if(np.all(np.isfinite(Y))):
            print("")
        back = LocallyLinearBackward(n_neighbors=10) # n_neighbors is a hyperparameter
        back.fit(Y, Y_hat)
        
        X_hat = back.error_backward(feature_to_use.detach().cpu().numpy())
        target = torch.from_numpy(X_hat).to(device)
        loss1 = cost1(feature_to_use, target)
        
        optimizer1.zero_grad()
        loss1.backward()
        optimizer1.step()
        #----------------------------------------
#         current_img = train_img_queue.get()
#         current_label = train_label_queue.get()
#         current_vec = train_vec_queue.get()
        
    
    
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.cpu() == labels.cpu()).sum()
#     loss = cost(outputs, labels.cuda())
#     loss.backward()
#     optimizer.step()

  print ('Epoch [%d/%d], Loss. %.4f' %
             (epoch+1, EPOCH, loss.data[0]))
  print('Test Accuracy of the model on the training set: %d %%' % (100 * correct / total))

# # Test the model
#   vgg16.eval()
#   correct = 0
#   total = 0

#   for images, labels in testLoader:
#     images = Variable(images).cuda()
#     outputs = vgg16(images)
#     _, predicted = torch.max(outputs.data, 1)
#     total += labels.size(0)
#     correct += (predicted.cpu() == labels).sum()

#   print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))

# # Save the Trained Model
# torch.save(vgg16.state_dict(),'checkpoint_without_model.pt')


<class 'torch.Tensor'>
tensor([[-0.0018, -0.0002, -0.0038,  ...,  0.0025,  0.0015,  0.0038],
        [-0.0021, -0.0002, -0.0039,  ...,  0.0025,  0.0022,  0.0034],
        [-0.0024,  0.0001, -0.0030,  ...,  0.0019,  0.0012,  0.0020],
        ...,
        [ 0.0017,  0.0017,  0.0005,  ...,  0.0029, -0.0024, -0.0041],
        [ 0.0016,  0.0021,  0.0011,  ...,  0.0039, -0.0009, -0.0040],
        [ 0.0003,  0.0007,  0.0011,  ..., -0.0017,  0.0031,  0.0009]],
       device='cuda:0')



UnboundLocalError: local variable 'W' referenced before assignment

In [7]:
testModel = VGG_fc()
# input dimention = 32
Input = torch.ones(50,32)
Input.requires_grad = True
optimizer = torch.optim.Adam([{'params':testModel.parameters()},
                           {'params':Input}])

In [9]:
output = testModel(Input)

cost = tnn.CrossEntropyLoss()
loss = cost(output, torch.ones(50).long())

loss.backward()

In [10]:
Input.grad

tensor([[-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004],
        [-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004],
        [-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004],
        ...,
        [-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004],
        [-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004],
        [-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004]])

In [68]:
optimizer.step()

In [12]:
Input = Input *0 + torch.ones((50,32))
Input.requires_grad

True

In [14]:
Input = torch.from_numpy(np.ones((50,32)))

In [16]:
Input.requires_grad

False

In [33]:
import numpy as np

Y = np.random.randint(0,100,(20,5))
idx = np.random.randint(0,20,(20,3))
weight = np.random.rand(20,3)

In [34]:
weight = np.reshape(weight, weight.shape+(1,))

In [36]:
weight.shape
tmp1 = Y[idx] * weight
tmp = np.empty(Y.shape)
for i,w in enumerate(weight):
    tmp[i] = np.dot(w.T, Y[idx][i])

In [37]:
np.sum(tmp1, axis=1).shape

(20, 5)

In [38]:
tmp.shape

(20, 5)

In [44]:
(np.sum(tmp1, axis=1)-tmp)**2

array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 8.07793567e-28,
        0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00],
       [0.00000000e+00, 2.01948392e-28, 0.00000000e+00, 5.04870979e-29,
        0.00000000e+00],
       [2.01948392e-28, 8.07793567e-28, 0.00000000e+00, 8.07793567e-28,
        0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 5.04870979e-29,
        0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00],
       [0.00000000e+00, 2.01948392e-28, 5.04870979e-29, 0.00000000e+00,
        0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 2.01948392e-28, 0.00000000e+00,
        0.00000000e+00],
       [0.00000000e+00, 0.0000