In [1]:
import torch
import torch.nn as nn
from torchvision import models
import numpy as np
from PIL import Image
from numpy import asarray
import matplotlib.pyplot as plt

In [2]:
# Define encoder 
# First load resnet
model_conv = models.resnet50(pretrained=True)
for param in model_conv.parameters():
    #Requires_grad = True if is needed to be computed for this Tensor. All are True by default
    param.requires_grad = False
    
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 256)    #Add a new last fully connected layer that will output a 256-features vector
#model_conv.cuda()                          #Allow the model to run on CUDA
#torch.cuda.set_device(device)

#Encoder is the sequential model of a 2D convolutional model + the modified resnet50 model
encoder = nn.Sequential(
    #nn.Conv2d(in_channels=6,out_channels=3,kernel_size=7,stride=2,padding=3,bias=False)
    nn.Conv2d(6,3,7,2,3,bias=False),
    model_conv)

print("default resnet")
#print(model_conv)



default resnet


In [3]:
print("our resnet")
#print(encoder)

our resnet


In [4]:
print(encoder.parameters)

<bound method Module.parameters of Sequential(
  (0): Conv2d(6, 3, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, aff

In [5]:
# Define decoder
class FFNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.layers = [
          nn.Linear(self.input_size, self.hidden_size),
          nn.ReLU(),
        ]
        for i in range(self.num_layers - 1):
            self.layers.append(nn.Linear(self.hidden_size, self.hidden_size))
            self.layers.append(nn.ReLU())
        
        #Add the last layer that will output from 512 to 1 feature
        self.layers.append(nn.Linear(self.hidden_size, self.output_size))

        #Compile all the layers into a sequential model
        self.layers = nn.Sequential(*self.layers)

    #Computation performed at every call
    def forward(self, x):
        sdf_xyz = torch.tanh(self.layers(x))
        return sdf_xyz

In [6]:
#Input_size=259 (256 features + 3 positions), hidden_size=512?, output_size=1 (sdf?), num_layers=8
decoder = FFNN(259, 512, 1, 8)

In [7]:
print(decoder)

FFNN(
  (layers): Sequential(
    (0): Linear(in_features=259, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=512, bias=True)
    (7): ReLU()
    (8): Linear(in_features=512, out_features=512, bias=True)
    (9): ReLU()
    (10): Linear(in_features=512, out_features=512, bias=True)
    (11): ReLU()
    (12): Linear(in_features=512, out_features=512, bias=True)
    (13): ReLU()
    (14): Linear(in_features=512, out_features=512, bias=True)
    (15): ReLU()
    (16): Linear(in_features=512, out_features=1, bias=True)
  )
)


In [8]:
#Load data for training
import os

all_datapath_img = [] # with path = ./obj_id/image_subdiretory....
all_sdfpath_obj = [] # with : one_sdf_path = ./obj_id.npy (or obj_id.txt)

#Load data for images
dataset_folder = "dataset"

file_list = sorted(os.listdir(dataset_folder))

for i, file in enumerate(file_list):    
    file_img = sorted(os.listdir(os.path.join(dataset_folder, file, "img/03001627")))
    
    for idx, imgFile in enumerate(file_img):
        datapath_img = os.path.join(dataset_folder, file, "img/03001627", imgFile)
        all_datapath_img.append(datapath_img)
        #print(idx, " : ", datapath_img)
        
print(all_datapath_img)

#Load data for points array and their sdf value
dataset_folder = "sdf"

file_list = sorted(os.listdir(dataset_folder))
for idx, file in enumerate(file_list):
    sdfpath_obj = os.path.join(dataset_folder, file)
    all_sdfpath_obj.append(sdfpath_obj)
    
print(all_sdfpath_obj)

# Dictionary that maps integer to its string value 
label_dict = {}

# List to store integer labels 
int_labels = []

for i in range(len(all_datapath_img)):
    label_dict[i] = all_datapath_img[i]
    int_labels.append(i)

#print(label_dict)

# obj.txt or obj.npy should contain all point (xyz) and their sdf for one obj object
# EX : all_datapath_img = ["./03001627/1a6f615e8b1b5ae4dbbc9440457e303e(obj_id)/subdirectory_1", "./03001627/1a6f615e8b1b5ae4dbbc9440457e303e(obj_id)/subdirectory_2", ...]
# EX : all_sdfpath_obj = [".../1a6f615e8b1b5ae4dbbc9440457e303e.npy", ".../1a8bbf2994788e2743e99e0cae970928.npy", etc.]

['dataset/render0/img/03001627/ffd258571807e6425b1205fcf56bb774', 'dataset/render0/img/03001627/ffd3064cff5757695ecd29875b6f0d44', 'dataset/render0/img/03001627/ffd616229a97642c7ea8c9f2db0a45da', 'dataset/render0/img/03001627/ffd9387a533fe59e251990397636975f', 'dataset/render0/img/03001627/ffdc46ab1cfe759ce6fe3612af521500', 'dataset/render0/img/03001627/ffed7e95160f8edcdea0b1aceafe4876', 'dataset/render0/img/03001627/fff29a99be0df71455a52e01ade8eb6a', 'dataset/render0/img/03001627/fffda9f09223a21118ff2740a556cc3', 'dataset/render0/img/03001627/u1e22cc04-7c4d-4ed5-bda3-8ff8067f22ee', 'dataset/render0/img/03001627/u45c7b89f-d996-4c29-aecf-4b760d1fb2b6', 'dataset/render0/img/03001627/u481ebf18-4bbb-4b49-90c9-7a1e9348b647', 'dataset/render0/img/03001627/u6028f63e-4111-4412-9098-fe5f4f0c7c83', 'dataset/render0/img/03001627/ub5d972a1-de16-4d0a-aa40-85cd3a69aa8a', 'dataset/render0/img/03001627/uca24feec-f0c0-454c-baaf-561530686f40', 'dataset/render0/img/03001627/udf068a6b-e65b-430b-bc17-611b0

In [9]:
from torch.utils.data import Dataset, TensorDataset, DataLoader

train_dataset = TensorDataset(torch.tensor(int_labels))
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=len(train_dataset))

In [10]:
for subdirect in train_dataloader.dataset:
    print(subdirect[0], " : ", label_dict[int(subdirect[0])])

tensor(0)  :  dataset/render0/img/03001627/ffd258571807e6425b1205fcf56bb774
tensor(1)  :  dataset/render0/img/03001627/ffd3064cff5757695ecd29875b6f0d44
tensor(2)  :  dataset/render0/img/03001627/ffd616229a97642c7ea8c9f2db0a45da
tensor(3)  :  dataset/render0/img/03001627/ffd9387a533fe59e251990397636975f
tensor(4)  :  dataset/render0/img/03001627/ffdc46ab1cfe759ce6fe3612af521500
tensor(5)  :  dataset/render0/img/03001627/ffed7e95160f8edcdea0b1aceafe4876
tensor(6)  :  dataset/render0/img/03001627/fff29a99be0df71455a52e01ade8eb6a
tensor(7)  :  dataset/render0/img/03001627/fffda9f09223a21118ff2740a556cc3
tensor(8)  :  dataset/render0/img/03001627/u1e22cc04-7c4d-4ed5-bda3-8ff8067f22ee
tensor(9)  :  dataset/render0/img/03001627/u45c7b89f-d996-4c29-aecf-4b760d1fb2b6
tensor(10)  :  dataset/render0/img/03001627/u481ebf18-4bbb-4b49-90c9-7a1e9348b647
tensor(11)  :  dataset/render0/img/03001627/u6028f63e-4111-4412-9098-fe5f4f0c7c83
tensor(12)  :  dataset/render0/img/03001627/ub5d972a1-de16-4d0a-aa4

In [11]:
def my_plot(epochs, loss):
    plt.plot(epochs, loss)
    


In [12]:
# Define optimizer
optimizer_all = torch.optim.Adam(
        [
            {
                "params": decoder.parameters(),
                "lr": 0.0001,
            },
        ]
    )

In [13]:
epochs = 5
from torch.autograd import grad
# Define optimization function

for epoch in range(epochs):
    decoder.train()
    total_loss = 0
    count = 0
    print("Entrée dans l'epoch ", epoch)
    
    for subdirect in train_dataloader.dataset:
        # Get obj id from this path
        path = label_dict[int(subdirect[0])]
        #obj_id = path.split("\\")[3]
        
        print("Accès au dossier ", path)
        
        # Load each image for this subdirectory
        file_list = sorted(os.listdir(path))
        list_image = []
        for idx, file in enumerate(file_list):
            if (file.endswith('.png')):
                image = Image.open(os.path.join(path, file))
                data = asarray(image)
                data = data[:, :, 0]
                list_image.append(data)
        
        # Create a numpy tensor for each image
        image_1 = torch.tensor(list_image[0])
        image_2 = torch.tensor(list_image[1])
        image_3 = torch.tensor(list_image[2])
        image_4 = torch.tensor(list_image[3])
        image_5 = torch.tensor(list_image[4])
        image_6 = torch.tensor(list_image[5])
        
        # Then concatenate these tensor in one torch.tensor
        input_image = torch.stack((image_1, image_2, image_3, image_4, image_5, image_6), 0).float() # concatenate these 6 image so we will have a tensor with shape (1,im_height,im_width,6)
        input_image = input_image.unsqueeze(0)
        
        # Latent vector prediction for these 6 images (len 256)
        vect_image = encoder(input_image)
        
        # Get pos and sdf path for this obj
        sdf_obj_path = all_sdfpath_obj[count%len(all_sdfpath_obj)] # todo , maybe we can create a dictionnary (obj_id => sdf_path) insted of list in "all_sdfpath_obj"...
        count += 1
        
        # Get the dict or correspondance list for each point and his sdf value
        points = np.load(os.path.join(sdf_obj_path, 'pos.npy')) #each point is a array [x,y,z] which represent coordonnées
        sdf = np.load(os.path.join(sdf_obj_path, 'sdf.npy'))
        
        # Predict the sdf of each point
        for id in range(0, len(sdf)):
            p = points[id]
            p = torch.tensor(p)
            p = p.unsqueeze(0)
            
            #print("Lecture du point ", id)
            
            real_sdf = sdf[id]
                        
            # construct the global latent vector
            vect_latent = torch.cat((vect_image,p), 1) #concatenate -> len 259
            predicted_sdf = decoder(vect_latent)
            
            #calcul loss 
            loss = predicted_sdf - real_sdf # pred_sdf - real_sdf for example
            
            # calcul gradiant
            loss.backward(retain_graph=True)
        
        optimizer_all.step()
                

Entrée dans l'epoch  0
Accès au dossier  dataset/render0/img/03001627/ffd258571807e6425b1205fcf56bb774


KeyboardInterrupt: 