In [2]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [3]:
from CloudDetect.util import read_off, visualise
from CloudDetect.transform import PointCloudSample, Normalise, Tensor
from CloudDetect.models import PointNet

In [8]:
import glob
metadata = pd.read_csv('data/metadata_modelnet40.csv')
all_data_files = glob.glob('**/*.off',recursive=True)
all_data_files = set(['/'.join(x.split('/')[2:]) for x in all_data_files])
metadata = metadata[metadata['object_path'].map(lambda x: x in all_data_files)]

In [9]:
#file = ROOT + metadata['object_path'].iloc[-4465]
#verts, faces = read_off(open(file))
#fig = visualise(verts)

#new_verts = Normalise('max')(PointCloudSample(2000)(verts))
#fig = visualise(new_verts)

In [10]:
batch_size = 32
n_points = 2000
input = torch.tensor(np.random.uniform(0,1,(batch_size, 3, n_points)))

In [11]:
pnet = PointNet(n_points, classes = 10, segment = False)
classification_output = pnet(input.float())

In [12]:
# data loader

In [14]:
def preprocessing(n_sample, norm_how):
    composition = [
            PointCloudSample(n_sample),
            Normalise(norm_how),
            Tensor()
    ]
    return transforms.Compose(composition)

def generate_class_mapper(metadata):
    class_mapper = {x:i for i,x in enumerate(metadata['class'].unique())}
    return class_mapper

In [163]:
import os
import pandas as pd
from torchvision.io import read_image

class CloudDataset(Dataset):
    def __init__(self, metadata, preprocessor, root, class_mapper, one_hot = False):
        self.metadata = metadata
        self.preprocessor = preprocessor
        self.root = root
        self.class_mapper = class_mapper
        self.one_hot = one_hot
        
    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        data_instance = metadata.iloc[idx]
        class_item = self.class_mapper[data_instance['class']]
        if self.one_hot:
            class_item = torch.nn.functional.one_hot(torch.tensor(class_item), num_classes=len(self.class_mapper))
        file = self.root + data_instance['object_path']
        verts, faces = read_off(open(file))
        return {'data':self.preprocessor(verts),'category':class_item}

from sklearn.model_selection import StratifiedKFold

def assign_val_indices(df, target_name, n_splits):
    skf = StratifiedKFold(n_splits=n_splits)
    df['kfold'] = None
    for fold, (train_idx, val_idx) in enumerate(skf.split(X=df, y=df[target_name].values)):
        df.loc[val_idx, 'kfold'] = fold

In [50]:
model = PointNet(n_points, classes = len(class_mapper), segment = False)

In [193]:
metadata_train = metadata[metadata['split'] == 'train']
metadata_train = metadata_train.sample(200)# for development
metadata_test = metadata[metadata['split'] == 'test']
metadata_train = metadata_train.reset_index(drop = True)
metadata_test = metadata_test.reset_index(drop = True)
assign_val_indices(metadata_train, 'class', 3)



In [194]:
metadata_train

Unnamed: 0,object_id,class,split,object_path,kfold
0,chair_0379,chair,train,chair/train/chair_0379.off,0
1,chair_0687,chair,train,chair/train/chair_0687.off,0
2,door_0021,door,train,door/train/door_0021.off,0
3,airplane_0116,airplane,train,airplane/train/airplane_0116.off,0
4,vase_0343,vase,train,vase/train/vase_0343.off,0
...,...,...,...,...,...
195,cup_0067,cup,train,cup/train/cup_0067.off,1
196,toilet_0022,toilet,train,toilet/train/toilet_0022.off,2
197,night_stand_0144,night,train,night/train/night_stand_0144.off,2
198,bookshelf_0378,bookshelf,train,bookshelf/train/bookshelf_0378.off,2


In [195]:
metadata_train_val = metadata_train[metadata_train['kfold'] == 0]
metadata_train_train = metadata_train[metadata_train['kfold'] != 0]

In [196]:
ROOT = 'data/ModelNet40/'
preprocessor = preprocessing(1024, 'max')
class_mapper = generate_class_mapper(metadata)
cloud_train_dataset = CloudDataset(metadata_train_train, preprocessor, ROOT, class_mapper)
cloud_val_dataset = CloudDataset(metadata_train_val, preprocessor, ROOT, class_mapper)

In [197]:
CloudDataTrainLoader = DataLoader(cloud_train_dataset, batch_size=32, shuffle=True)
CloudDataValLoader = DataLoader(cloud_val_dataset, batch_size=32, shuffle=False)

In [198]:
loss_fn = torch.nn.NLLLoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.0001)

In [232]:
from sklearn.metrics import f1_score, precision_score, recall_score, balanced_accuracy_score
def get_metrics(y_true, y_pred):
    return {
        'f1_score':f1_score(y_true, y_pred, average='macro',zero_division = 0),
        'precision':precision_score(y_true, y_pred, average='macro',zero_division = 0),
        'recall':recall_score(y_true, y_pred, average='macro',zero_division = 0),
        'balanced_acc':balanced_accuracy_score(y_true, y_pred),
    }

def load_data(data):
    X,y = data['data'], data['category']
    X = torch.transpose(X.float(),1,2)
    return X, y

def perform_optimisation(optimiser, model, loss_fn, X, y):
    optimiser.zero_grad()
    model_output = model(X)
    loss = loss_fn(model_output, y)
    loss.backward()
    optimiser.step() 
    return loss

def reporting(batch_print, running_loss):
    last_loss = running_loss / batch_print # average loss per batch
    return last_loss

In [200]:
from tqdm import notebook

In [238]:
running_loss = 0
last_loss = 0
epoch_index = 0
tracker = []
batch_print = 1
for i,x in notebook.tqdm(enumerate(CloudDataTrainLoader), total=len(CloudDataTrainLoader)):
    # load data
    X,y = load_data(x)
    loss = perform_optimisation(optimiser, model, loss_fn, X, y)
    running_loss += loss.item()
    if i % batch_print == 0 and i!=0:
        last_loss = reporting(batch_print, running_loss)
        print(last_loss)
        running_loss = 0

        #validate
        all_true_output = []
        all_model_output = []
        for i,x in notebook.tqdm(enumerate(CloudDataValLoader), total=len(CloudDataValLoader)):
            # load data
            X,y = load_data(x)
            model_output = model(X)
            all_true_output.append(y)
            all_model_output.append(model_output)

        all_true = torch.concat(all_true_output)
        all_model = torch.concat(all_model_output)
        loss = loss_fn(all_model, all_true)
        print(loss.item())


#validate
all_true_output = []
all_model_output = []
for i,x in notebook.tqdm(enumerate(CloudDataValLoader), total=len(CloudDataValLoader)):
    # load data
    X,y = load_data(x)
    model_output = model(X)
    all_true_output.append(y)
    all_model_output.append(model_output)

all_true = torch.concat(all_true_output)
all_model = torch.concat(all_model_output)
loss = loss_fn(all_model, all_true)
print(loss)
classification_output = torch.argmax(torch.exp(all_model),axis = 1)
results = get_metrics(all_true, classification_output)
print(results)

  0%|          | 0/5 [00:00<?, ?it/s]

5.77838659286499


  0%|          | 0/3 [00:00<?, ?it/s]

2.893237829208374
2.8395321369171143


  0%|          | 0/3 [00:00<?, ?it/s]

2.866215944290161
2.830862522125244


  0%|          | 0/3 [00:00<?, ?it/s]

2.7969491481781006
3.1443512439727783


  0%|          | 0/3 [00:00<?, ?it/s]

2.7825820446014404


  0%|          | 0/3 [00:00<?, ?it/s]

tensor(2.7734, grad_fn=<NllLossBackward0>)
{'f1_score': 0.055677655677655674, 'precision': 0.07692307692307693, 'recall': 0.04362801377726751, 'balanced_acc': 0.5671641791044776}


