In [6]:
%%capture
!yes | pip install trimesh==3.21.6
!yes | pip install open3d==0.17.0
!yes | pip install natsort==8.3.1

In [7]:
import sys 
print(sys.version)

3.10.10 | packaged by conda-forge | (main, Mar 24 2023, 20:08:06) [GCC 11.3.0]


In [8]:
from typing import Tuple
from natsort import natsorted
from tqdm import tqdm

import os
import json
import errno
import random

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display as IPd

import plotly.express as px
import plotly.graph_objs as go

import torch
from torch.utils.data import Dataset, DataLoader

import trimesh
import open3d as o3d

from sklearn.decomposition import PCA

from sklearn.svm import SVC

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [9]:
seed = 42

random.seed(seed)
np.random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)

o3d.utility.random.seed(seed)

torch.manual_seed(seed)

<torch._C.Generator at 0x78cdca57f170>

In [10]:
BASE_PATH = '/kaggle/input/2023-ml-project3/modelnet10'

DATA_PATH   = os.path.join(BASE_PATH, 'dataset')
LABEL_PATH  = os.path.join(BASE_PATH, 'class2Label.json')
SUBMIT_PATH = os.path.join(BASE_PATH, 'sample_submit.csv')


In [11]:
class_name_list = sorted(os.listdir(os.path.join(DATA_PATH, 'train')))

scene_list = list()

for class_name in class_name_list:
    off = random.choice(os.listdir(os.path.join(DATA_PATH, 'train', class_name)))
    
    mesh = trimesh.load(os.path.join(DATA_PATH, 'train', class_name, off))
    
    scene = trimesh.Scene()
    
    scene.add_geometry(mesh)
    
    scene_list.append(scene)

In [None]:
for class_name, scene in zip(class_name_list, scene_list):
    IPd.display(IPd.HTML(f"<h4 style='text-align:center;'>{class_name}</h4>"))
    IPd.display(scene.show())

In [12]:
class ModelNet10(Dataset):
    def __init__(self, root: str, split: str, class2label:dict):
        self.root = root
        self.split = split.lower()
        assert split in ['train', 'test']
        
        if class2label is not None:
            self.class2label = class2label
        
        self.path, self.label = list(), list()
        
        if self.split == 'train':
            self.classes = sorted(os.listdir(os.path.join(self.root, self.split)))
            self.le = {self.classes[i]: self.class2label[self.classes[i]] for i in range(len(self.classes))}
        
            for class_name in self.classes:
                class_path = os.path.join(self.root, self.split, class_name)

                for off in os.listdir(class_path):
                    if off.endswith('off'):
                        self.path.append(os.path.join(class_path, off))
                        self.label.append(self.le[class_name])
                    else:
                        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), off)
                        
        elif self.split == 'test':
            fname_list = natsorted(os.listdir(os.path.join(self.root, self.split)))
            for fname in fname_list:
                self.path.append(os.path.join(self.root, self.split, fname))
            
    def __getitem__(self, index):
        if self.split == 'train':
            return self.path[index], self.label[index]
        elif self.split == 'test':
            return self.path[index]
    
    def __len__(self):
        return len(self.path)

In [13]:
with open(LABEL_PATH, 'r') as j:
    class2label = json.load(j)
    
train_dataset = ModelNet10(root=DATA_PATH, split='train', class2label=class2label)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)

test_dataset = ModelNet10(root=DATA_PATH, split='test', class2label=None)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [14]:
# 반복문을 통해 학습 데이터로더로부터 데이터 디렉토리(off)와 정수형 라벨(label)을 받음
for off, label in train_dataloader:
    print(f'TRAIN DATALOADER\npath: {off}\nlabel: {label}\n')
    print(f'TRAIN DATALOADER\npath: {off[0]}\nlabel: {label.item()}'); break
    
# 반복문을 통해 평가 데이터로더로부터 데이터 디렉토리(off)를 받음
for off in test_dataloader:
    print(f'\nTEST  DATALODER\npath: {off[0]}'); break

TRAIN DATALOADER
path: ('/kaggle/input/2023-ml-project3/modelnet10/dataset/train/bathtub/bathtub_0098.off',)
label: tensor([0])

TRAIN DATALOADER
path: /kaggle/input/2023-ml-project3/modelnet10/dataset/train/bathtub/bathtub_0098.off
label: 0

TEST  DATALODER
path: /kaggle/input/2023-ml-project3/modelnet10/dataset/test/0.off


In [15]:
def read_off(off: str) -> Tuple[np.array, np.array]:
    with open(off, 'r') as f:
        first_line = f.readline().strip()
        if first_line != 'OFF':
            raise ValueError('Not a valid OFF header')
        
        n_vertex, n_face, n_edge = map(int, f.readline().strip().split())
        #print(n_vertex, n_face, n_edge)
        vertices = list()
        for _ in range(n_vertex):
            vertex = list(map(float, f.readline().strip().split()))
            vertices.append(vertex)
        
        faces = list()
        for _ in range(n_face):
            face = list(map(int, f.readline().strip().split()[1:]))
            faces.append(face)
        
        # Vertex들의 정보를 담은 리스트(vertices)와 Face들의 정보를 담은 리스트(faces)를 각각 numpy array로 변환하여 튜플 형식으로 반환
        return np.array(vertices), np.array(faces)

In [16]:
# 3D 데이터 표현 방법 선택: 'point_cloud', 'voxel', 'mesh' 중 하나를 선택
method = 'point_cloud'

In [17]:
def estimate_normal_vector_from_point_cloud(point_cloud, radius: int = None, max_nn: int = None) -> np.array:
    pcd = o3d.geometry.PointCloud()

    pcd.points = o3d.utility.Vector3dVector(point_cloud)
    
    search_param = o3d.geometry.KDTreeSearchParamHybrid(radius=radius, max_nn=max_nn)
    pcd.estimate_normals(search_param=search_param)
    
    normal = np.asarray(pcd.normals)
    return normal

In [18]:
def compute_histogram_of_normal_vector(normal_vector, n_bins: int = 10, n_range: tuple = (-1,1)):

    hist_x, bin_edges_x = np.histogram(normal_vector[:, 0], bins=n_bins, range=n_range)
    hist_y, bin_edges_y = np.histogram(normal_vector[:, 1], bins=n_bins, range=n_range)
    hist_z, bin_edges_z = np.histogram(normal_vector[:, 2], bins=n_bins, range=n_range)

    feature = np.concatenate((hist_x, hist_y, hist_z))

    feature_normalized = feature / np.sum(feature)

    return feature_normalized

In [19]:
if method == 'point_cloud':

    x_train_point_cloud, y_train_point_cloud = list(), list()
    
    pbar = tqdm(enumerate(train_dataloader, start=1))
    for i, (off_, label_) in pbar:
        off, label = off_[0], label_.item()
        
        point_cloud, a = read_off(off)
       
        normal_vector = estimate_normal_vector_from_point_cloud(point_cloud, radius=5, max_nn=30)
        
        training_feature = compute_histogram_of_normal_vector(normal_vector, n_bins=32, n_range=(-1, 1))
        
        x_train_point_cloud.append(training_feature)
        y_train_point_cloud.append(label)

        pbar.set_description(f'Processing: {os.path.basename(off)}\tPercentage: {i / len(train_dataset) * 100:.1f}%')

    # 학습 데이터와 학습 라벨을 numpy array 형식으로 변환
    x_train_point_cloud = np.asarray(x_train_point_cloud)

Processing: desk_0057.off	Percentage: 100.0%: : 1000it [01:51,  8.96it/s]     


In [None]:
if method == 'point_cloud':
    x_test_point_cloud = list()
    
    pbar = tqdm(enumerate(test_dataloader, start=1))
    for i, (off_) in pbar:
        off = off_[0]
        
        point_cloud, faces = read_off(off)
        
        normal_vector = estimate_normal_vector_from_point_cloud(point_cloud, radius=5, max_nn=30)
        
        feature = compute_histogram_of_normal_vector(normal_vector,n_bins=32, n_range=(-1, 1))
        
        x_test_point_cloud.append(feature)
        
        pbar.set_description(f'Processing: {os.path.basename(off)}\tPercentage: {i / len(test_dataset) * 100:.1f}%')

    # 평가 데이터를 numpy array 형식으로 변환
    x_test_point_cloud = np.asarray(x_test_point_cloud)

Processing: 203.off	Percentage: 68.0%: : 204it [00:18, 13.91it/s]

- SVM classification

In [None]:
print(f'Select Method is {method}')

if method == 'point_cloud':
    x_train, y_train = x_train_point_cloud, y_train_point_cloud
    x_test = x_test_point_cloud

elif method == 'voxel':
    x_train, y_train = x_train_voxel, y_train_voxel
    x_test = x_test_voxel

elif method == 'mesh':
    x_train, y_train = x_train_mesh, y_train_mesh
    x_test = x_test_mesh
    
from sklearn.svm import SVC
svm = SVC(C=10, random_state=seed)
svm.fit(x_train, y_train)
print(sum(svm.predict(x_train) == y_train) / len(y_train)) #train set predict 정확도 체크

pred = svm.predict(x_test)

submit = pd.read_csv(SUBMIT_PATH, index_col=0)
submit['Label'] = pred

submit.to_csv(f"{method}_baseline.csv")