In [1]:
import uproot
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from sklearn.model_selection import cross_val_score, cross_val_predict, train_test_split
from collections import namedtuple, defaultdict
import open3d as o3d
import random
import h5py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

## Opening files containing features in pairs of cells from same cluster (+ve e.g.) and different cluster (-ve e.g)

In [2]:
hf_pair = h5py.File("./pair_set.hdf5", 'r')
hf_nopair = h5py.File("./nopair_set.hdf5", 'r')

In [3]:
pair = hf_pair.get('pair')[:]
nopair = hf_nopair.get('nopair')[:]

In [4]:
pair.shape

(500000, 2, 10)

In [5]:
hf_pair.close()
hf_nopair.close()

In [6]:
pair.shape

(500000, 2, 10)

## Concatenating +ve and -ve examples

In [7]:
total_dataset = np.concatenate((pair,nopair),axis=0)

In [8]:
total_dataset.shape

(1000000, 2, 10)

## Making labels 1 for +ve e.g and -1 for -ve e.g

In [9]:
total_label = np.concatenate(([1]*500000,[-1]*500000),axis=0)

In [10]:
total_label.shape

(1000000,)

In [11]:
total_label

array([ 1,  1,  1, ..., -1, -1, -1])

In [12]:
total_dataset[0]

array([[0.56252339, 0.39402719, 0.16334734, 0.26086957, 0.        ,
        1.        , 0.89405496, 1.        , 0.0011919 , 0.77126392],
       [0.55990222, 0.39252812, 0.16334734, 0.26086957, 0.        ,
        0.97567125, 0.87601607, 1.        , 0.0011919 , 0.77126392]])

## Randomizing data and dividing into train, test parts

In [13]:
arr = np.arange(1000000)
np.random.shuffle(arr)

In [14]:
total_dataset_rand = total_dataset[arr]
total_label_rand =  total_label[arr]

In [15]:
features_train, features_test, truth_label_train, truth_label_test = train_test_split(
    total_dataset_rand, total_label_rand, train_size=0.70)

In [16]:
features_train.shape

(700000, 2, 10)

In [17]:
truth_label_train.shape

(700000,)

### Making Pytorch specific dataset and Network and Loss

In [27]:
# Custom dataset for pairwise training
class SiameseDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        pair = (self.data[index][0], self.data[index][1])
        label = torch.tensor(self.labels[index], dtype=torch.float32)

        if self.transform:
            pair = (self.transform(pair[0]), self.transform(pair[1]))

        return pair, label


In [19]:
# Convert data to PyTorch tensors
pairs = [(torch.Tensor(pair[0]), torch.Tensor(pair[1])) for pair in features_train]
labels = torch.tensor(truth_label_train, dtype=torch.float32)

In [1]:
#pairs

In [21]:
class SiameseNetwork(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(SiameseNetwork, self).__init__()

        # Define the architecture for one branch of the Siamese network
        self.branch = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_size, hidden_size)
        )

    def forward_one(self, x):
        # Forward pass for one branch of the Siamese network
        return self.branch(x)

    def forward(self, input1, input2):
        # Forward pass for both branches of the Siamese network
        output1 = self.forward_one(input1)
        output2 = self.forward_one(input2)
        return output1, output2

In [22]:
# Pairwise hinge loss function
class PairwiseHingeLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(PairwiseHingeLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, target):
        # Compute the pairwise hinge loss
        distance = nn.functional.pairwise_distance(output1, output2)
        loss = torch.mean(torch.clamp(self.margin + distance - target * self.margin, min=0))
        return loss


In [23]:
input_size = 10  # Adjust this based on your input data size
hidden_size = 64  # You can customize this based on your task

# Create a Siamese network
siamese_net = SiameseNetwork(input_size, hidden_size)

# Create a pairwise hinge loss criterion
criterion = PairwiseHingeLoss()

# Set up data loaders
#transform = transforms.Compose([transforms.ToTensor()])
#dataset = SiameseDataset(data=pairs, labels=labels, transform=transform)
dataset = SiameseDataset(data=pairs, labels=labels)
dataloader = DataLoader(dataset, batch_size=1024, shuffle=True)


In [24]:
dataset

<__main__.SiameseDataset at 0x1869af430>

In [25]:
# Set up optimizer
optimizer = optim.Adam(siamese_net.parameters(), lr=0.001)

In [26]:
# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    for batch in dataloader:
        inputs, target = batch
        input1, input2 = inputs

        # Forward pass
        output1, output2 = siamese_net(input1, input2)
        loss = criterion(output1, output2, target)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

  label = torch.tensor(self.labels[index], dtype=torch.float32)


Epoch [1/20], Loss: 1.0230
Epoch [2/20], Loss: 1.0526
Epoch [3/20], Loss: 0.9375
Epoch [4/20], Loss: 1.0033
Epoch [5/20], Loss: 1.0263
Epoch [6/20], Loss: 0.9967
Epoch [7/20], Loss: 1.0592
Epoch [8/20], Loss: 0.9770
Epoch [9/20], Loss: 0.9901
Epoch [10/20], Loss: 1.0493
Epoch [11/20], Loss: 1.0658
Epoch [12/20], Loss: 0.9934
Epoch [13/20], Loss: 0.9836
Epoch [14/20], Loss: 1.0822
Epoch [15/20], Loss: 1.0099
Epoch [16/20], Loss: 0.9770
Epoch [17/20], Loss: 0.9671
Epoch [18/20], Loss: 0.9803
Epoch [19/20], Loss: 0.9836
Epoch [20/20], Loss: 1.0526
