## Imports

In [1]:
import numpy as np
import sys
import os
from sklearn.model_selection import train_test_split

# Torch library
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

# Matplotlib to visualize the result
import matplotlib as mpl
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d as plt3d
import matplotlib.cm as cm

## Load the data

In [2]:
# Arguments
ml_args_raster_low_res = './raster/low_res'
ml_args_raster_high_res = './raster/high_res'
ml_args_model = './model/'
ml_args_count = 13597

def ml_raster_import( ml_path ):
    ''' Import a raster found at given the path '''
    """
        Args:
            ml_path(string): the full path of the raster .ras to import
    """

    # check consistency #
    if ( not os.path.exists( ml_path ) ):

        # send message #
        sys.exit( 'turing : error : unable to access raster' )

    # retrieve raster size #
    ml_size = os.path.getsize( ml_path )

    # compute raster width #
    ml_width = int( round( ml_size ** ( 1.0 / 3.0 ) ) )

    # import raster data #
    with open( ml_path, 'rb' ) as ml_file:

        # read raster bytes #
        ml_byte = ml_file.read( ml_size )

    # convert to numpy array #
    ml_data = np.frombuffer( ml_byte, dtype=np.uint8 )

    # return raster array #
    return ml_data.reshape( ml_width, ml_width, ml_width )

In [3]:
def get_ml_data(ml_args_raster):
    ''' Calls ml_raster_import to import all rasters '''
    """
        Args:
            ml_args_raster(string): the rasters folder path
    """

    ml_data = []
    
    for raster_id in range(ml_args_count):
        
        raster_path = ml_args_raster + '/raster-{:06d}.ras'.format(raster_id)
    
        # import raster array #
        data = ml_raster_import(raster_path)
    
        ml_data.append(data)
        
    return ml_data

# Import xs (low resolution rasters) and ys (high resolution rasters)
ml_data_low_res = get_ml_data(ml_args_raster_low_res)
ml_data_high_res = get_ml_data(ml_args_raster_high_res)

In [None]:
def ml_raster_convert( ml_raster ):
    ''' Convert rasters in form of a serie of 0 and 1 (1 = point) into an array containing the
        coordinates of the points '''
    """
        Args:
            ml_raster(numpy 3 dim array): the array containing a serie of 0 and 1
    """

    # check consistency #
    if ( ( ml_raster.shape[0] != ml_raster.shape[1] ) or ( ml_raster.shape[0] != ml_raster.shape[2] ) ):

        # send message #
        sys.exit( 'turing : error : raster not consistent' )

    # retrieve raster width #
    ml_width = ml_raster.shape[0]

    # initialise array #
    ml_data = np.empty( ( 0, 3 ), dtype=np.uint8 )

    # parsing raster array #
    for ml_x in range( ml_width ):

        # parsing raster array #
        for ml_y in range( ml_width ):

            # parsing raster array #
            for ml_z in range( ml_width ):

                # check raster element #
                if ( ml_raster[ml_x, ml_y, ml_z] != 0 ):

                    # compute element coordinates #
                    ml_r = ml_x / ml_width
                    ml_s = ml_y / ml_width
                    ml_t = ml_z / ml_width

                    # append element #
                    ml_data = np.append( ml_data, np.array( [[ ml_r, ml_s, ml_t ]] ), axis=0 )

    # return converted array #
    return( ml_data )

## Build Torch Tensors from the data

In [4]:
class CloudPointDataset(Dataset):
    ''' Class of cloudpoint data that will be usable by Pytorch '''

    def __init__(self, thin_dataset, thick_dataset, train, transform=None):
        """
        Args:
            mat_file (string): Path to the mat file with the data
            train (boolean): Is it the training data or the test data
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.X = thin_dataset
        self.Y = thick_dataset
        self.X = np.asarray(self.X).astype(float)
        self.Y = np.asarray(self.Y).astype(float)
        Xtrain, Xtest, Ytrain, Ytest = train_test_split(self.X, self.Y, test_size=0.3, random_state=1)
        self.Xtrain, self.Xtest = Xtrain, Xtest
        self.Ytrain, self.Ytest = Ytrain, Ytest
        self.train = train
        self.transform = transform

    def __len__(self):
        if self.train:
            return (self.Xtrain.shape[0])
        else:
            return (self.Xtest.shape[0])

    def __getitem__(self, idx):
        
        if self.train:
            sample = {'image': self.Xtrain[idx,:], 'label': self.Ytrain[idx,:]}
        else:
            sample = {'image': self.Xtest[idx,:], 'label': self.Ytest[idx,:]}
            
        if self.transform:
            sample = self.transform(sample)

        return sample

In [5]:
class ToTensor(object):
    ''' Convert ndarrays in sample to Tensors. '''

    def __call__(self, sample):
        raster, label = sample['image'], sample['label']
        raster = torch.from_numpy(raster).float()
        raster = raster.expand(1, 16, 16, 16)
        return {'image': raster, 'label': label}

In [6]:
# Define the trainset and testset and put them in the form of a torch tensor
trainset = CloudPointDataset(ml_data_low_res, ml_data_high_res, train=True, transform = ToTensor())
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 1, shuffle=False, num_workers=0)

testset = CloudPointDataset(ml_data_low_res, ml_data_high_res, train=False, transform=ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size = 1, shuffle=False, num_workers=0)

## Define the model & CNN architecture

In [7]:
class Net(nn.Module):
    def __init__(self):
        ''' Define the functions that are to be used in the CNN '''
        
        super(Net, self).__init__()
        self.conv1 = nn.Conv3d(1, 32, 3, padding = 1, stride = 1)
        self.conv2 = nn.Conv3d(32, 32, 3, padding = 1, stride = 1)
        self.conv3 = nn.Conv3d(32, 64, 3, padding = 1, stride = 1)
        self.conv4 = nn.Conv3d(64, 64, 3, padding = 1, stride = 1)
        self.conv5 = nn.Conv3d(64, 1, 3, padding = 1, stride = 1)
        self.BN = nn.BatchNorm3d(num_features = 32)
        
    def forward(self, x):
        ''' Define the architecture of the CNN '''
        
        # Empty storage vector
        x_ = []
        
        # Augment the number of layers from 1 to 32
        x = self.conv1(x)
        x = self.BN(x)
        x = F.relu(x)
        x_.append(x)
        
        # 6 residual blocks (core CNN)
        for i in range(6):
            x = self.conv2(x)
            x = self.BN(x)
            x = F.relu(x)
            x = self.conv2(x)
            x = self.BN(x)
            x = x + x_[-1]
            x_.append(x)
        
        x = self.conv2(x)
        x = self.BN(x)
        x = x + x_[0]
        
        # Augment the number of layers from 32 to 64
        x = self.conv3(x)
        x = F.relu(x)
        
        # Two-step interpolation to the higher resolution
        x = nn.functional.interpolate(x, size = (24, 24, 24), mode='trilinear')
        x = self.conv4(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = F.relu(x) 
        x = nn.functional.interpolate(x, size = (32, 32, 32), mode='trilinear')
        x = self.conv4(x)
        x = F.relu(x)
        
        # Reduce the number of layers again from 64 to 1
        x = self.conv5(x)        
        
        return x

model = Net()

In [8]:
# Define the criterion (loss function) and the optimizer (here : stochastic gradient descent)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.8)

# Train the model

In [None]:
# Empty storage variables
y_hat, y, x = [], [], []

for epoch in range(5):  # loop over the dataset multiple times
    
    # Initialize loss
    running_loss = 0.0
    
    for i, data in enumerate(trainloader, 0):
        
        # Get the inputs
        images, labels = data['image'], data['label'].float()
        
        # Wrap them in Variable
        images, labels = Variable(images), Variable(labels)

        # Clear the gradients of the variables
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(images)
        outputs = np.squeeze(outputs, axis=0)
        labels = np.squeeze(labels, axis=0)
        
        # Store the prediction, as well as the corresponding x and y rasters
        y_hat.append(outputs)
        x.append(images)
        y.append(labels)
        
        # Minimize loss
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Update statistics
        running_loss += loss.data[0]

        # Print the loss of the last mini-batch of each epoch
        if i+1 == 7:
            print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss))
        
        # Save the model iteration
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss
            }, './model/model_epoch_' + str(epoch))

print('Finished Training')

  "See the documentation of nn.Upsample for details.".format(mode))


[1,     7] loss: 0.421
[2,     7] loss: 0.417


# Print the output

In [None]:
# Check output dimension
print(len(y_hat), len(y_hat[0]), len(y_hat[0][0]), len(y_hat[0][0][0]))

In [None]:
def print_3D(y, n, mode):
    ''' Print 3D visualization of the raster n '''
    """
        Args:
            y(3 dims numpy array): pointcloud as serie of 1 and 0
            n(int): indice of the raster you want to visualize
            mode(string):  'x' if you want to visualize the inputs
                           'y' if you want to visualize the labels
    """
    
    if mode == 'x':
        Y = y[n][0][0].detach().numpy()
    else:
        Y = y[n].detach().numpy()
        
    Y = ml_raster_convert(Y)
    
    # create figure #
    ml_figure = plt.figure()

    # create figure sub-plot #
    ml_plot = ml_figure.add_subplot(111, projection='3d' )

    ml_plot.scatter(Y[:,2], Y[:,1], Y[:,0], s=8, marker='.')
    # setting axis aspect ratio #
    ml_plot.set_aspect( 'equal' )

    # setting axis limits #
    ml_plot.set_xlim( 0.0, 1.0 )
    ml_plot.set_ylim( 0.0, 1.0 )
    ml_plot.set_zlim( 0.0, 1.0 )

    # setting plot initial camera view #
    ml_plot.view_init( elev=66, azim=225 )
        
    return True

In [None]:
def print_3D_pred(y, n = 0):
    ''' Print 3D visualization of the prediction raster n '''
    """
        Args:
            y(3 dims numpy array): pointcloud as serie of 1 and 0
            n(int): indice of the raster you want to visualize

    """
    
    Y = y[n].detach().numpy()
        
    Y[Y < 0.5] = 0.
    Y[Y >= 0.5] = 1.
    
    raster = ml_raster_convert(Y)
        
    # create figure #
    ml_figure = plt.figure()

    # create figure sub-plot #
    ml_plot = ml_figure.add_subplot(111, projection='3d' )

    ml_plot.scatter(raster[:,2], raster[:,1], raster[:,0], s=8, marker='.')
    # setting axis aspect ratio #
    ml_plot.set_aspect( 'equal' )

    # setting axis limits #
    ml_plot.set_xlim( 0.0, 1.0 )
    ml_plot.set_ylim( 0.0, 1.0 )
    ml_plot.set_zlim( 0.0, 1.0 )

    # setting plot initial camera view #
    ml_plot.view_init( elev=66, azim=225 )
        
    return True

In [None]:
%matplotlib
#%matplotlib qt # comment if you want to plot inline
%matplotlib inline # comment if you want to plot in interactive 3D

In [None]:
# Pick a raster you want to see
# (NB : learning is random => ordering will change each time you train the model)
n = 2007

In [None]:
print_3D(x, n, mode = 'x')

In [None]:
print_3D(y, n, mode = 'y')

In [None]:
print_3D_pred(y_hat, n)