In [66]:
import torch
import torch.nn as tnn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn.functional as F

import queue
import os
import numpy as np
from sklearn.manifold import Isomap
from sklearn.neighbors import NearestNeighbors
from locally_linear import LocallyLinearBackward
import time

In [2]:
os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free >tmp')
memory_gpu=[int(x.split()[2]) for x in open('tmp','r').readlines()]
os.environ['CUDA_VISIBLE_DEVICES']=str(np.argmax(memory_gpu))
os.system('rm tmp')

0

In [3]:
BATCH_SIZE = 50
LEARNING_RATE = 0.01
EPOCH = 1
n_dimentions = 32
n_neighbors = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
transform = transforms.Compose([
    transforms.RandomResizedCrop(28),
    transforms.ToTensor()])

data_train = dsets.MNIST(root = "./data/",
                         transform=transform,
                            train = True,
                            download = True)

data_test = dsets.MNIST(root="./data/",
                        transform=transform,
                           train = False)

trainLoader = torch.utils.data.DataLoader(dataset=data_train, batch_size=BATCH_SIZE, shuffle=True)
testLoader = torch.utils.data.DataLoader(dataset=data_test, batch_size=BATCH_SIZE, shuffle=False)

In [5]:
class VGG_conv(tnn.Module):
    def __init__(self):
        super(VGG_conv, self).__init__()
        self.layer1 = tnn.Sequential(

            # 1-1 conv layer
            tnn.Conv2d(1, 64, kernel_size=3, padding=1),
            tnn.BatchNorm2d(64),
            tnn.ReLU(),

            # 1-2 conv layer
            tnn.Conv2d(64, 64, kernel_size=3, padding=1),
            tnn.BatchNorm2d(64),
            tnn.ReLU(),

            # 1 Pooling layer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer2 = tnn.Sequential(

            # 2-1 conv layer
            tnn.Conv2d(64, 128, kernel_size=3, padding=1),
            tnn.BatchNorm2d(128),
            tnn.ReLU(),

            # 2-2 conv layer
            tnn.Conv2d(128, 128, kernel_size=3, padding=1),
            tnn.BatchNorm2d(128),
            tnn.ReLU(),

            # 2 Pooling lyaer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer3 = tnn.Sequential(

            # 3-1 conv layer
            tnn.Conv2d(128, 256, kernel_size=3, padding=1),
            tnn.BatchNorm2d(256),
            tnn.ReLU(),

            # 3-2 conv layer
            tnn.Conv2d(256, 256, kernel_size=3, padding=1),
            tnn.BatchNorm2d(256),
            tnn.ReLU(),

            # 3 Pooling layer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer4 = tnn.Sequential(

            # 4-1 conv layer
            tnn.Conv2d(256, 512, kernel_size=3, padding=1),
            tnn.BatchNorm2d(512),
            tnn.ReLU(),

            # 4-2 conv layer
            tnn.Conv2d(512, 512, kernel_size=3, padding=1),
            tnn.BatchNorm2d(512),
            tnn.ReLU(),

            # 4 Pooling layer
            tnn.MaxPool2d(kernel_size=2, stride=2))

        # self.layer5 = tnn.Sequential(
        #
        #     # 5-1 conv layer
        #     tnn.Conv2d(512, 512, kernel_size=3, padding=1),
        #     tnn.BatchNorm2d(512),
        #     tnn.ReLU(),
        #
        #     # 5-2 conv layer
        #     tnn.Conv2d(512, 512, kernel_size=3, padding=1),
        #     tnn.BatchNorm2d(512),
        #     tnn.ReLU(),
        #
        #     # 5 Pooling layer
        #    tnn.MaxPool2d(kernel_size=2, stride=2))

        self.layer6 = tnn.Sequential(

            # 6 Fully connected layer
            # Dropout layer omitted since batch normalization is used.
            tnn.Linear(512, 512),
            tnn.BatchNorm1d(512),
            tnn.ReLU())


        self.layer7 = tnn.Sequential(

            # 7 Fully connected layer
            # Dropout layer omitted since batch normalization is used.
            tnn.Linear(512, 512,
            tnn.BatchNorm1d(512)),
            tnn.ReLU())
    
    def forward(self, x):
      out = self.layer1(x)
      out = self.layer2(out)
      out = self.layer3(out)
      out = self.layer4(out)
   #   out = self.layer5(out)
      vgg16_features = out.view(out.size(0), -1)
      out = self.layer6(vgg16_features)
      out = self.layer7(out)
      return out

In [6]:
class VGG_fc(tnn.Module):
    def __init__(self):
        super(VGG_fc, self).__init__()
        self.layer8 = tnn.Sequential(

        # 8 output layer
        tnn.Linear(32, 10))

    def forward(self, x):
        out = self.layer8(x)
#         out = F.softmax(out, dim=1)  #CrossEntropy 不能用这个
        return out

In [16]:
def isomap(features, n_components):
#     length = feature_queue.qsize()
#     for i in range(length):
#         if i == 0:
#             feature_tmp = feature_queue.get()
#             feature_queue.put(feature_tmp)
#             features = feature_tmp
#         else:
#             feature_tmp = feature_queue.get()
#             feature_queue.put(feature_tmp)
#             features = torch.cat((features, feature_tmp), dim=0)
        
    feature_input = features
    embedding = Isomap(n_components=n_components)
    transformed = embedding.fit_transform(feature_input)
#     if features.is_cuda:
#         output = torch.from_numpy(transformed).cuda()
#     else:
#         output = torch.from_numpy(transformed)
    output = transformed
    return output
    

In [11]:
def get_queue_elements(feature_queue):
    length = feature_queue.qsize()
    for i in range(length):
        if i == 0:
            feature_tmp = feature_queue.get()
            feature_queue.put(feature_tmp)
            features = feature_tmp
            feature_to_use = feature_tmp
        else:
            feature_tmp = feature_queue.get()
            feature_queue.put(feature_tmp)
            features = np.concatenate((features, feature_tmp), axis=0)
    return features

In [50]:
vgg_conv = VGG_conv().to(device)
# vgg_conv.cuda()
vgg_fc = VGG_fc().to(device)
# vgg_fc.cuda()

isomap_feature = torch.empty((BATCH_SIZE, n_dimentions), requires_grad=True, device=device)
cost1 = tnn.MSELoss()
cost2 = tnn.CrossEntropyLoss()
optimizer1 = torch.optim.Adam(vgg_conv.parameters(), lr=LEARNING_RATE)
optimizer2 = torch.optim.Adam([{'params':vgg_fc.parameters()}
                               ,{'params':isomap_feature}
                              ], lr=LEARNING_RATE)
back_tool = LocallyLinearBackward(n_neighbors=n_neighbors)

In [56]:
isomap_feature.is_cuda
isomap_feature.type()

'torch.cuda.FloatTensor'

In [13]:
# Train the model
for epoch in range(EPOCH):
#  for i, (images, labels) in enumerate(trainLoader):
  vgg_conv.train()
  correct = 0
  total = 0
#   train_img_queue = queue.Queue(maxsize=1000/BATCH_SIZE)    #构建输入图像的队列
#   train_label_queue = queue.Queue(maxsize=1000/BATCH_SIZE) #构建label的队列
  train_vec_queue = queue.Queue(maxsize=1000/BATCH_SIZE)    #构建卷积网络输出向量的队列
  for images, labels in trainLoader:
#     train_img_queue.put(images)
#     train_label_queue.put(labels)
    
    # Forward + Backward + Optimize
    
#     optimizer1.zero_grad()
#     optimizer2.zero_grad()

    outputs1 = vgg_conv(images.to(device)) #卷积网络的输出，将图片embedding成512维向量，the shape of output is (batch_size, 512)
    
    train_vec_queue.put(outputs1)
    
#     print(train_vec_queue.qsize())
#     print(train_vec_queue.get_nowait().shape)
    
    if train_img_queue.full():  #等队列满了之后，开始让所有图片进入isomap，然后pop出队首的数据进行反向传播
        train_features = get_queue_elements(train_vec_queue) # type torch.tensor
        
#         feature_to_use = train_features[:BATCH_SIZE]
        isomap_forward = isomap(train_features, n_components=32)#将1000张图片通过卷积层得到的embedding向量输入isomap层，获得降维后的结果
                                                                    # type torch.tensor
        batch_feature = isomap_forward[:BATCH_SIZE].float()
        
        img_tmp = train_img_queue.get()
        label_tmp = train_label_queue.get()
        vec_tmp = train_vec_queue.get()
        
        isomap_feature = batch_feature
        outputs = vgg_fc(isomap_feature)
        
        
        loss2 = cost2(outputs, label_tmp.to(device))
        
        #---------------------------------------
#         Y = isomap_feature.detach().cpu().numpy()
    
        optimizer2.zero_grad()
        loss2.backward()
        optimizer2.step()
        
        Y = isomap_forward.detach().cpu().numpy()
        Y_hat = isomap_feature.detach().cpu().numpy()
        
        if (np.any(np.isnan(Y))):
            print("Nan element in Y")
            break
        if( not np.all(np.isfinite(Y))):
            print("Infinit element in Y")
            break
            
        back = LocallyLinearBackward(n_neighbors=10) # n_neighbors is a hyperparameter
        back.fit(Y, Y_hat)
        
        X_hat = back.error_backward(vec_tmp.detach().cpu().numpy())
        target = torch.from_numpy(X_hat).to(device)
        loss1 = cost1(vec_tmp, target)
        
        optimizer1.zero_grad()
        loss1.backward()
        optimizer1.step()
        #----------------------------------------
#         current_img = train_img_queue.get()
#         current_label = train_label_queue.get()
#         current_vec = train_vec_queue.get()
        
    
    
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.cpu() == labels.cpu()).sum()
#     loss = cost(outputs, labels.cuda())
#     loss.backward()
#     optimizer.step()

  print ('Epoch [%d/%d], Loss. %.4f' %
             (epoch+1, EPOCH, loss.data[0]))
  print('Test Accuracy of the model on the training set: %d %%' % (100 * correct / total))

# # Test the model
#   vgg16.eval()
#   correct = 0
#   total = 0

#   for images, labels in testLoader:
#     images = Variable(images).cuda()
#     outputs = vgg16(images)
#     _, predicted = torch.max(outputs.data, 1)
#     total += labels.size(0)
#     correct += (predicted.cpu() == labels).sum()

#   print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))

# # Save the Trained Model
# torch.save(vgg16.state_dict(),'checkpoint_without_model.pt')


RuntimeError: CUDA out of memory. Tried to allocate 9.62 MiB (GPU 0; 11.93 GiB total capacity; 4.10 GiB already allocated; 3.06 MiB free; 11.47 MiB cached)

In [69]:
for epoch in range(EPOCH):
    vgg_conv.train()
    vgg_fc.train()
    
    correct = 0
    total = 0

    train_vec_queue = queue.Queue(maxsize=1000/BATCH_SIZE)
    
    for images, labels in trainLoader:
        vec_output = vgg_conv(images.to(device)) # torch.tensor
        
        train_vec_queue.put(vec_output.detach().cpu().numpy()) # queue 中保存 numpy 变量
        
        if train_vec_queue.full():
            train_vec_queue.get()
            train_features = get_queue_elements(train_vec_queue) # np.float
            
            isomap_forward = isomap(train_features, n_components=n_dimentions) # np.float
            
            batch_feature = isomap_forward[-BATCH_SIZE:] # 取最后的BATCH_SIZE个用于训练
            
            isomap_feature.data -= isomap_feature.data + torch.from_numpy(batch_feature).float().to(device) # 保持tensor对象不变，只改变值
            
            outputs = vgg_fc(isomap_feature)
            
            #---------
            loss2 = cost2(outputs, labels.to(device))
            
            optimizer2.zero_grad()
            loss2.backward()
            optimizer2.step()
            #---------
            # 误差前向传播
            Y = isomap_forward
            Y_hat = isomap_feature.data.cpu().numpy()
            X = train_features
            
            start = time.time()
            back_tool.fit(Y, Y_hat)
            
            X_hat = back_tool.error_backward(X)
            end = time.time()
            print("backward use time: %f" %(end-start))
            target = torch.from_numpy(X_hat).to(device)
            
            #----------
            loss1 = cost1(vec_output, target)
            optimizer1.zero_grad()
            loss1.backward()
            optimizer1.step()
            #----------
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted.cpu() == labels.cpu()).sum()
        
    print ('Epoch [%d/%d], Loss. %.4f' %
             (epoch+1, EPOCH, loss.data[0]))
    print('Test Accuracy of the model on the training set: %d %%' % (100 * correct / total))
            

backward use time: 0.042298
backward use time: 0.043172
backward use time: 0.074420
backward use time: 0.061149
backward use time: 0.053147
backward use time: 0.060358
backward use time: 0.047945
backward use time: 0.038147
backward use time: 0.052690
backward use time: 0.038536
backward use time: 0.061477
backward use time: 0.079262
backward use time: 0.043253
backward use time: 0.035550
backward use time: 0.061698
backward use time: 0.051023
backward use time: 0.076821
backward use time: 0.045864


  X_transformed = self.alphas_ * np.sqrt(self.lambdas_)


ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

In [None]:
testModel = VGG_fc()
# input dimention = 32
Input = torch.ones(50,32)
Input.requires_grad = True
optimizer = torch.optim.Adam([{'params':testModel.parameters()},
                           {'params':Input}])

In [9]:
output = testModel(Input)

cost = tnn.CrossEntropyLoss()
loss = cost(output, torch.ones(50).long())

loss.backward()

In [10]:
Input.grad

tensor([[-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004],
        [-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004],
        [-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004],
        ...,
        [-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004],
        [-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004],
        [-0.0011, -0.0017,  0.0018,  ...,  0.0016,  0.0015,  0.0004]])

In [68]:
optimizer.step()

In [12]:
Input = Input *0 + torch.ones((50,32))
Input.requires_grad

True

In [14]:
Input = torch.from_numpy(np.ones((50,32)))

In [16]:
Input.requires_grad

False

In [45]:
import torch

x = torch.empty((5,10),requires_grad=True)
I = torch.ones((10,1))

In [46]:
t = np.ones((5,10), dtype=np.float32)

In [35]:
tmp = x.mm(I)
loss = torch.sum(tmp)
loss.backward()

In [41]:
x.grad

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [42]:
x = torch.from_numpy(t)
x.grad

tensor([[-1.3630e-36,  4.5775e-41, -3.5093e-34,  4.5775e-41,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00, -3.8761e-34,  4.5775e-41],
        [-3.8761e-34,  4.5775e-41, -3.8761e-34,  4.5775e-41,  0.0000e+00,
          0.0000e+00,  4.2964e+24,  1.0765e+21,  7.9050e+31,  5.5515e-08],
        [ 1.3882e+31,  1.3043e-11,  2.7489e+26,  1.8175e+31, -3.8761e-34,
          4.5775e-41, -3.8761e-34,  4.5775e-41, -3.8761e-34,  4.5775e-41],
        [ 1.4013e-45,  0.0000e+00, -3.5094e-34,  4.5775e-41, -6.0348e+22,
          4.5775e-41, -3.1804e-34,  4.5775e-41, -5.8122e+22,  4.5775e-41],
        [ 2.1019e-44,  0.0000e+00,  1.4013e-45,  0.0000e+00,  3.6869e+24,
          4.5775e-41,  3.6154e-43,  4.5559e-41,  2.9147e-43,  0.0000e+00]])

In [47]:
print(id(x.data))
x.data = torch.from_numpy(t)
print(id(x.data))
x = torch.from_numpy(t)
print(id(x))
# x = x -1
# print(id(x))

140105861432592
140105861432592
140105861432592


In [48]:
x.requires_grad

False

In [24]:
t[-2:]

array([], shape=(0, 10), dtype=float32)