<a href="https://colab.research.google.com/github/Akhilesh-K-Yadav/Meta-Learning_Projects/blob/main/Unsupervised/Contrastive_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Contrastive Learning in PyTorch with Point Clouds***

In [22]:
import os
import glob
import torch
#!pip install plotly --quiet
import plotly.express as px
from torch.utils.data import Dataset, DataLoader
import pdb

In [2]:
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json



In [3]:
! kaggle datasets download -d mitkir/shapenet

Downloading shapenet.zip to /content
100% 1.36G/1.36G [00:46<00:00, 37.6MB/s]
100% 1.36G/1.36G [00:46<00:00, 31.4MB/s]


In [4]:
! unzip /content/shapenet.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: shapenetcore_partanno_segmentation_benchmark_v0_normal/04379243/86dec892a246e0c02cbf13da1d4abcc5.txt  
  inflating: shapenetcore_partanno_segmentation_benchmark_v0_normal/04379243/86dec892a246e0c02cbf13da1d4abcc5_8x8.npz  
  inflating: shapenetcore_partanno_segmentation_benchmark_v0_normal/04379243/86e6caacc1e4ddb5e762cf5917cef4ef.txt  
  inflating: shapenetcore_partanno_segmentation_benchmark_v0_normal/04379243/86e6caacc1e4ddb5e762cf5917cef4ef_8x8.npz  
  inflating: shapenetcore_partanno_segmentation_benchmark_v0_normal/04379243/86e6ef5ae3420e95963080fd7249126d.txt  
  inflating: shapenetcore_partanno_segmentation_benchmark_v0_normal/04379243/86e6ef5ae3420e95963080fd7249126d_8x8.npz  
  inflating: shapenetcore_partanno_segmentation_benchmark_v0_normal/04379243/86eea3415d5a61df490ad276cd2af3a4.txt  
  inflating: shapenetcore_partanno_segmentation_benchmark_v0_normal/04379243/86eea3415d5a61df490ad276cd2af3a4_8

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [6]:
category_ids = {
        'Airplane': '02691156',
        'Bag': '02773838',
        'Cap': '02954340',
        'Car': '02958343',
        'Chair': '03001627',
        'Earphone': '03261776',
        'Guitar': '03467517',
        'Knife': '03624134',
        'Lamp': '03636649',
        'Laptop': '03642806',
        'Motorbike': '03790512',
        'Mug': '03797390',
        'Pistol': '03948459',
        'Rocket': '04099429',
        'Skateboard': '04225987',
        'Table': '04379243',
    }

seg_classes = {
    'Airplane': [0, 1, 2, 3],
    'Bag': [4, 5],
    'Cap': [6, 7],
    'Car': [8, 9, 10, 11],
    'Chair': [12, 13, 14, 15],
    'Earphone': [16, 17, 18],
    'Guitar': [19, 20, 21],
    'Knife': [22, 23],
    'Lamp': [24, 25, 26, 27],
    'Laptop': [28, 29],
    'Motorbike': [30, 31, 32, 33, 34, 35],
    'Mug': [36, 37],
    'Pistol': [38, 39, 40],
    'Rocket': [41, 42, 43],
    'Skateboard': [44, 45, 46],
    'Table': [47, 48, 49],
}

ds_path = "/content/shapenetcore_partanno_segmentation_benchmark_v0_normal/"

In [7]:
def read_txt_array(path, sep=None, start=0, end=None, dtype=None, device=None):
    with open(path, 'r') as f:
        src = f.read().split('\n')[:-1]
    src = [[float(x) for x in line.split(sep)[start:end]] for line in src]
    src = torch.tensor(src, dtype=dtype).squeeze()
    return src

In [8]:
file = ds_path + "03624134/102982a2159226c2cc34b900bb2492e.txt"
data = read_txt_array(file)

pos = data[:, :3]
x = data[:, 3:6]
y = data[:, -1].type(torch.long)
pos[:,0]

tensor([ 0.0085,  0.0267, -0.0092,  ...,  0.0087,  0.0170, -0.0097])

In [9]:
def plot_3d_shape(shape):
    print("Number of data points: ", shape.shape[0])
    x = shape[:, 0]
    y = shape[:, 1]
    z = shape[:, 2]
    fig = px.scatter_3d(x=x, y=y, z=z, opacity=0.3)
    fig.show()


plot_3d_shape(pos)

Number of data points:  2209


In [29]:
class Shapenet(Dataset):
  def __init__(self, ds_path, categories):
    super().__init__()
    self.ds_path = ds_path
    self.categories = categories
    self.all_filenames = self._get_all_filenames()

  def _get_all_filenames(self):
    cat_ids = [category_ids.get(cat) for cat in self.categories]
    cat_wise_path = [ds_path+cat_id for cat_id in cat_ids]
    all_filenames = []
    for path in cat_wise_path:
      all_filenames.extend(glob.glob(os.path.join(path,"*.txt")))
    return all_filenames

  def _read_txt_array(self, path, sep=None, start=0, end=None, dtype=None, device=None):
    with open(path, 'r') as f:
        src = f.read().split('\n')[:-1]
    src = [[float(x) for x in line.split(sep)[start:end]] for line in src]
    src = torch.tensor(src, dtype=dtype).squeeze()
    return src

  def __len__(self):
    return len(self.all_filenames)

  def __getitem__(self, index):
    data = self._read_txt_array(self.all_filenames[index])
    return data

In [30]:
BATCH_SIZE = 32
WORKERS = 0
shapenet_ds = Shapenet(ds_path, categories=["Table", "Lamp", "Guitar", "Motorbike"])
train_size = 5000
discarded = shapenet_ds.__len__() - train_size
trainset, _ = torch.utils.data.random_split(shapenet_ds, [train_size, discarded])
print(len(trainset))
loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS)

5000


In [None]:
sample = next(iter(loader))
sample