In [1]:
import os
import time
import torch
import torch.nn as nn
from torchvision import models
from torch.utils.data import Dataset, TensorDataset, DataLoader, random_split
from torchvision import transforms
import numpy as np
from PIL import Image
from numpy import asarray
import matplotlib.pyplot as plt

In [2]:
# Define encoder

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device : ", device)

# First load resnet
def loadResNet50():
    
    model_conv = models.resnet50(pretrained=True)
    model_conv.to(device)

    for param in model_conv.parameters():
        #Requires_grad = True if is needed to be computed for this Tensor. All are True by default
        param.requires_grad = False
        param.to(device)

    num_ftrs = model_conv.fc.in_features
    model_conv.fc = nn.Linear(num_ftrs, 256)    #Add a new last fully connected layer that will output a 256-features vector
    model_conv.to(device)

    #Encoder is the sequential model of a 2D convolutional model + the modified resnet50 model
    encoder = nn.Sequential(
        nn.Conv2d(6,3,7,2,3,bias=False),   #(in_channels=6,out_channels=3,kernel_size=7,stride=2,padding=3,bias=False)
        model_conv)

    return encoder

encoder = loadResNet50()

print("our resnet")
encoder.to(device)
print(encoder)

Device :  cuda
our resnet
Sequential(
  (0): Conv2d(6, 3, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True,

In [3]:
# Define decoder
class FFNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.layers = [
          nn.Linear(self.input_size, self.hidden_size),
          nn.ReLU(),
        ]
        for i in range(self.num_layers - 1):
            self.layers.append(nn.Linear(self.hidden_size, self.hidden_size))
            self.layers.append(nn.ReLU())
        
        #Add the last layer that will output from 512 to 1 feature
        self.layers.append(nn.Linear(self.hidden_size, self.output_size))

        #Compile all the layers into a sequential model
        self.layers = nn.Sequential(*self.layers)

    #Computation performed at every call
    def forward(self, x):
        sdf_xyz = torch.tanh(self.layers(x))
        return sdf_xyz

In [4]:
#Input_size=259 (256 features + 3 positions), hidden_size=512?, output_size=1 (sdf?), num_layers=8
decoder = FFNN(259, 512, 1, 8)
decoder.to(device)

print(decoder)

FFNN(
  (layers): Sequential(
    (0): Linear(in_features=259, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=512, bias=True)
    (7): ReLU()
    (8): Linear(in_features=512, out_features=512, bias=True)
    (9): ReLU()
    (10): Linear(in_features=512, out_features=512, bias=True)
    (11): ReLU()
    (12): Linear(in_features=512, out_features=512, bias=True)
    (13): ReLU()
    (14): Linear(in_features=512, out_features=512, bias=True)
    (15): ReLU()
    (16): Linear(in_features=512, out_features=1, bias=True)
  )
)


In [5]:
#Load data for training

# Dictionary that maps integer value to its path value (string)
int_labels = []
label_dict = {}
count = 0

#Load data for images
dataset_folder = "dataset"

file_list = sorted(os.listdir(dataset_folder))
for i, file in enumerate(file_list):
    file_img = sorted(os.listdir(os.path.join(dataset_folder, file, "img/03001627"))) 
    
    for idx, imgFile in enumerate(file_img):
        int_labels.append(count)
        datapath_img = os.path.join(dataset_folder, file, "img/03001627", imgFile)
        label_dict[count] = datapath_img
        count+=1

#Load data for points array and their sdf value
dataset_folder = "sdf"
sdf_dict = {}

file_list = sorted(os.listdir(dataset_folder))
for idx, file in enumerate(file_list):
    sdfpath_obj = os.path.join(dataset_folder, file)
    sdf_dict[file] = sdfpath_obj

In [6]:
#Datasets preparation - 70/30
train_dataset, val_dataset = random_split(int_labels, [int(len(int_labels)*0.7), int(len(int_labels)*0.3)])

train_dataset = TensorDataset(torch.tensor(train_dataset))
val_dataset = TensorDataset(torch.tensor(val_dataset))

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=len(train_dataset))
val_dataloader = DataLoader(val_dataset, shuffle=True, batch_size=len(val_dataset))

In [7]:
def my_plot(epochs, loss):
    plt.plot(epochs, loss)

In [8]:
# Define optimizer
optimizer_all = torch.optim.Adam(
        [
            {
                "params": decoder.parameters(),
                "lr": 0.0001,
            },
        ]
    )

In [10]:
num_epochs = 5
from torch.autograd import grad
# Define optimization function

loss_vals=  []
for epoch in range(1, num_epochs+1):
    start = time.time()
    decoder.train()
    epoch_loss= []
    
    print("Epoch", epoch, "/ 5")
    
    for subdirect in train_dataloader.dataset:
        # Get obj id from this path
        path = label_dict[int(subdirect[0])]
        obj_id = path.split("\\")[3]
        
        #print("Accès au dossier ", path)
        
        # Load each image for this subdirectory
        list_image = []
        for idx, file in enumerate(os.listdir(path)):
            if (file.endswith('.png')):
                image = Image.open(os.path.join(path, file))
                data = asarray(image)
                data = data[:, :, 3]
                list_image.append(data)
        
        # Create a numpy tensor for each image
        image_1 = torch.tensor(list_image[0], device='cuda')
        image_2 = torch.tensor(list_image[1], device='cuda')
        image_3 = torch.tensor(list_image[2], device='cuda')
        image_4 = torch.tensor(list_image[3], device='cuda')
        image_5 = torch.tensor(list_image[4], device='cuda')
        image_6 = torch.tensor(list_image[5], device='cuda')
        
        # Then concatenate these tensor in one torch.tensor
        input_image = torch.stack((image_1, image_2, image_3, image_4, image_5, image_6), 0).float() # concatenate these 6 image so we will have a tensor with shape (1,im_height,im_width,6)
        input_image = input_image.unsqueeze(0)
        input_image.to(device)
        input_image.cuda()
        
        # Latent vector prediction for these 6 images (len 256)
        vect_image = encoder(input_image)

        # Get pos and sdf path for this obj
        sdf_obj_path = sdf_dict[obj_id] # todo , maybe we can create a dictionnary (obj_id => sdf_path) insted of list in "all_sdfpath_obj"...
        
        # Get the dict or correspondance list for each point and his sdf value
        points = np.load(os.path.join(sdf_obj_path, 'pos.npy')) #each point is a array [x,y,z] which represent coordonnées
        sdf = np.load(os.path.join(sdf_obj_path, 'sdf.npy'))
        
        # Predict the sdf of each point
        for id in range(0, len(sdf)):
            p = points[id]
            p = torch.tensor(p, device='cuda')
            p = p.unsqueeze(0)

            #print("Lecture du point ", sdf_obj_path)
            
            real_sdf = sdf[id]
            
            # Construct the global latent vector
            vect_latent = torch.cat((vect_image,p), 1) #concatenate -> len 259
            vect_latent.to(device)
            vect_latent.cuda()
            
            predicted_sdf = decoder(vect_latent)
            
            # Calcul loss
            loss = abs(predicted_sdf - real_sdf) # pred_sdf - real_sdf for example
            
            # Calcul gradiant
            loss.backward(retain_graph=True)
            epoch_loss.append(loss.item())
        
        optimizer_all.step()
        optimizer_all.zero_grad()
        
    loss_vals.append(sum(epoch_loss)/len(epoch_loss))
    end = time.time()
    print("Temps d'entraînement écoulé pour l'epoch n°", epoch, " : ", (end - start), "s")
    
    start = time.time()
    decoder.eval()
    val_epoch_loss= []
        
    for subdirect in val_dataloader.dataset:
        # Get obj id from this path
        path = label_dict[int(subdirect[0])]
        obj_id = path.split("\\")[3]
        
        #print("Accès au dossier ", path)
        
        # Load each image for this subdirectory
        list_image = []
        for idx, file in enumerate(os.listdir(path)):
            if (file.endswith('.png')):
                image = Image.open(os.path.join(path, file))
                data = asarray(image)
                data = data[:, :, 3]
                list_image.append(data)
        
        # Create a numpy tensor for each image
        image_1 = torch.tensor(list_image[0], device='cuda')
        image_2 = torch.tensor(list_image[1], device='cuda')
        image_3 = torch.tensor(list_image[2], device='cuda')
        image_4 = torch.tensor(list_image[3], device='cuda')
        image_5 = torch.tensor(list_image[4], device='cuda')
        image_6 = torch.tensor(list_image[5], device='cuda')
        
        # Then concatenate these tensor in one torch.tensor
        input_image = torch.stack((image_1, image_2, image_3, image_4, image_5, image_6), 0).float() # concatenate these 6 image so we will have a tensor with shape (1,im_height,im_width,6)
        input_image = input_image.unsqueeze(0)
        input_image.to(device)
        input_image.cuda()
        
        # Latent vector prediction for these 6 images (len 256)
        vect_image = encoder(input_image)

        # Get pos and sdf path for this obj
        sdf_obj_path = sdf_dict[obj_id] # todo , maybe we can create a dictionnary (obj_id => sdf_path) insted of list in "all_sdfpath_obj"...
        
        # Get the dict or correspondance list for each point and his sdf value
        points = np.load(os.path.join(sdf_obj_path, 'pos.npy')) #each point is a array [x,y,z] which represent coordonnées
        sdf = np.load(os.path.join(sdf_obj_path, 'sdf.npy'))
        
        # Predict the sdf of each point
        for id in range(0, len(sdf)):
            p = points[id]
            p = torch.tensor(p, device='cuda')
            p = p.unsqueeze(0)

            #print("Lecture du point ", sdf_obj_path)
            
            real_sdf = sdf[id]
            
            # Construct the global latent vector
            vect_latent = torch.cat((vect_image,p), 1) #concatenate -> len 259
            vect_latent.to(device)
            vect_latent.cuda()
            
            predicted_sdf = decoder(vect_latent)
            
            # Calcul loss
            loss = abs(predicted_sdf - real_sdf) # pred_sdf - real_sdf for example
            
            # Calcul gradiant
            loss.backward(retain_graph=True)
            val_epoch_loss.append(loss.item())
        
    print(sum(val_epoch_loss)/len(val_epoch_loss))
    end = time.time()
    print("Temps de validation écoulé pour l'epoch n°", epoch, " : ", (end - start), "s")
    
my_plot(np.linspace(1, num_epochs, num_epochs).astype(int), loss_vals)

Epoch 1 / 5
Temps d'entraînement écoulé pour l'epoch n° 1  :  209.94323897361755 s
0.01753319460825474
Temps de validation écoulé pour l'epoch n° 1  :  106.58384537696838 s
Epoch 2 / 5


KeyboardInterrupt: 

In [14]:
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pickle

# save the model to disk
filename = 'finalized_model.sav'
pickle.dump(decoder, open(filename, 'wb'))

In [11]:
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pickle

# load the model from disk
filename = 'finalized_model.sav'
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, Y_test)
print(result)

AttributeError: 'FFNN' object has no attribute 'score'