In [1]:
# IMPORT REQUIRED LIBRARIES

%matplotlib inline
import os
import random
import wandb

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.utils

from torch import optim
from torch.utils.data import DataLoader, Dataset
from torch.autograd import Variable

from sklearn.decomposition import PCA

from tqdm.notebook import tqdm

In [2]:
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')

In [3]:
train_path = "C:\\Users\\argan\\OneDrive\\Desktop\\DDP\\Data\\AugmentedNIRS.csv"
val_path = "C:\\Users\\argan\\OneDrive\\Desktop\\DDP\\Data\\nirs_val.csv"

In [4]:
train_df = pd.read_csv(train_path, header=0, index_col=0)

n_samples, n_features = train_df.shape

labels = []
for i in range(n_samples):
    if train_df["labels"][i] == 'pure':
        labels.append('pure')
    else:
        labels.append('adult')

train_df['labels_2'] = labels
train_df.drop(labels="labels", axis=1, inplace=True)

### Siamese Dataset

In [5]:
class SiameseNetworkDataset(Dataset):
    def __init__(self, path, transform=None):
        data = pd.read_csv(path, header=0, index_col=0)

        # Pairs of pure samples (positive pairs).
        pure_data = data[data['labels']=='pure']
        
        positive_indices = pure_data.index
        positive_pairs = []

        for i in range(len(pure_data)-1):
            for j in range(i+1, len(pure_data)):
                positive_pairs.append([positive_indices[i], positive_indices[j]])


        # Pairs of pure and adulterated samples each (negative pairs)
        adult_data = data[data['labels'] != 'pure']

        negative_indices = adult_data.index
        negative_pairs = []

        for i in range(len(pure_data)):
            for j in range(len(adult_data)):
                negative_pairs.append([positive_indices[i], negative_indices[j]])

        self.data = data

        self.positive_pairs = positive_pairs
        self.num_positive = len(positive_pairs)

        self.negative_pairs = negative_pairs
        self.num_negative = len(negative_pairs)
        
        self.transform = transform
        
    def __getitem__(self, index):
        label = np.random.randint(2)

        if label == 0:
            idxA, idxB = self.positive_pairs[np.random.choice(self.num_positive)]
        else:
            idxA, idxB = self.negative_pairs[np.random.choice(self.num_negative)]

        nirsA = self.data.iloc[idxA, :-1].values.reshape(1, -1)
        nirsB = self.data.iloc[idxB, :-1].values.reshape(1, -1)
        
        if self.transform is not None:
            nirsA = self.transform(nirsA)
            nirsB = self.transform(nirsB)

        pair_nirs = np.concatenate([nirsA, nirsB], axis=0)

        return (pair_nirs).astype(float), label
        
    def __len__(self):
        return min(self.num_negative+self.num_positive, 10000)

### Siamese Neural Network

In [12]:
class SiameseNetwork(nn.Module):
    def __init__(self, 
                 num_ft_maps_1=8, 
                 num_ft_maps_2=16,
                 num_ft_maps_3=16, 
                 num_ft_maps_4=32, 
                 fc_hd_1=32,
                 out_features=16,
                ):
                 
        super(SiameseNetwork, self).__init__()

        self.backbone = nn.Sequential(
                                      nn.Conv1d(in_channels=1, 
                                                out_channels=num_ft_maps_1, 
                                                kernel_size=3, 
                                                stride=1,
                                                padding='same', 
                                                bias=True),
                                      nn.ReLU(),

                                      nn.MaxPool1d(kernel_size=3, stride=2),

                                     
                                      nn.Conv1d(in_channels=num_ft_maps_1, 
                                                out_channels=num_ft_maps_3, 
                                                kernel_size=3, 
                                                stride=1,
                                                padding='same', 
                                                bias=True
                                                ),
                                      nn.ReLU(),
                                      nn.MaxPool1d(kernel_size=3, stride=2),


                                      nn.Conv1d(in_channels=num_ft_maps_3, 
                                                out_channels=num_ft_maps_4, 
                                                kernel_size=3, 
                                                stride=1,
                                                padding='same', 
                                                bias=True),
                                      nn.ReLU(),
                                      nn.AvgPool1d(kernel_size=7, stride=7),
                                     )

        self.flatten = nn.Flatten()

        self.fcc = nn.Sequential(
                                 nn.Linear(24*32, fc_hd_1, bias=True),
                                 nn.ReLU(),

                                 nn.Linear(fc_hd_1, out_features, bias=True),
                                )
        
        self.clf = nn.Sequential(
                                 nn.Linear(out_features, 1, bias=True),
                                 nn.Sigmoid()
                                )

    def forward(self, x):
        # x - [BATCH_SIZE, 2, N]
                
        output1 = self.backbone(torch.unsqueeze(x[:, 0, :].float(), 1))
        output2 = self.backbone(torch.unsqueeze(x[:, 1, :].float(), 1))

        output1 = self.flatten(output1)
        output2 = self.flatten(output2)

        output1 = self.fcc(output1)
        output2 = self.fcc(output2)

        output = torch.pow(output1-output2, exponent=2)
        output = self.clf(output)

        return output

### Train-Test

In [7]:
def train_test(model, num_epochs, train_loader, val_loader, optimizer, criterion, logger):
    batch_loss = []
    batch_accuracy = []

    val_loss = []
    val_accuracy = []

    for epoch in range(num_epochs):
        for i, ex in enumerate(tqdm(train_loader, desc="Training")):
            
            nirs = ex[0].to(DEVICE)
            labels = ex[1].to(torch.float).to(DEVICE)

            batch_size = labels.size(0)
            
            model.train()
            
            # ===================forward=====================
            
            output = model(x=nirs)
            
            loss = criterion(output.view(batch_size, -1), 
                             labels.view(batch_size, -1)
                            )

            output_labels = torch.round(output)
            accuracy = (output_labels.squeeze() == labels.squeeze()).sum().item()/batch_size

            logger.log({"batch_loss": loss.item(), "batch_accuracy": accuracy})

            batch_loss.append(loss.item())
            batch_accuracy.append(accuracy)
            
            # ===================backward====================
            
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
                    
        # ===================evaluate========================
        
        model.eval()

        # Evaluating model on validation dataset
        correct = 0
        total = 0
        val_batch_loss = []
        with torch.no_grad():
            for val_ex in val_loader:            
                nirs_val = val_ex[0].to(DEVICE)
                labels_val = val_ex[1].to(torch.float).to(DEVICE)
                
                output_val = model(x=nirs_val)

                batch_size = labels_val.size(0)
                
                val_batch_loss_ = criterion(output_val.view(batch_size, -1), 
                                            labels_val.view(batch_size, -1)
                                           )
                val_batch_loss.append(val_batch_loss_.item())

                output_val_labels = torch.round(output_val)
                correct += (output_val_labels.squeeze() == labels_val.squeeze()).sum().item()
                total += batch_size

        val_accuracy_ = correct/total
        val_loss_ = np.mean(val_batch_loss)
        val_loss.append(val_loss_)
        val_accuracy.append(val_accuracy_)
        logger.log({"val_loss": val_loss_.item(), "val_accuracy": val_accuracy_})
        
        # ===================log========================

        print("*"*50)
        print(f"End of Epoch {epoch+1}")
        print(f'Validation Loss:{round(val_loss_, 4)}')
        print(f'Validation Accuracy:{round(val_accuracy_, 4)}')
        
    return batch_loss, val_loss

In [8]:
# Fix the random seeds
SEED = 0

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

In [13]:
BATCH_SIZE = 64

train_dataset = SiameseNetworkDataset(train_path, transform=None)
val_dataset = SiameseNetworkDataset(val_path, transform=None)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

siamese_net = SiameseNetwork()

siamese_net.to(DEVICE)

optimizer = torch.optim.Adam(siamese_net.parameters(), lr=1e-3)

criterion = nn.BCELoss()

In [14]:
logger = wandb.init(project="cs6910-assignment4")

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
batch_accuracy,▂▇▅▂▄▁▅▃▄▅▃▃▃▂▅▂▃▄▆▆▆▆▃▄▃▄▅▂▂█▃▁▆▂▄▃▄▆▂▇
batch_loss,█▁▄▇▅█▄▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▅▆▅▅▅▅
val_accuracy,▇▇▅▂█▆▁
val_loss,▂▁▆█▂▂▆

0,1
batch_accuracy,0.46875
batch_loss,0.66653
val_accuracy,0.48428
val_loss,0.69316


In [15]:
batch_loss, val_loss = train_test(siamese_net, 
                                  30, 
                                  train_loader, 
                                  val_loader, 
                                  optimizer,
                                  criterion,
                                  logger
                                 )

Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 1
Validation Loss:0.6933
Validation Accuracy:0.4988


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 2
Validation Loss:0.6931
Validation Accuracy:0.513


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 3
Validation Loss:0.6591
Validation Accuracy:0.66


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 4
Validation Loss:0.6753
Validation Accuracy:0.5539


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 5
Validation Loss:0.7137
Validation Accuracy:0.5134


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 6
Validation Loss:0.7722
Validation Accuracy:0.522


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 7
Validation Loss:0.9253
Validation Accuracy:0.5212


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 8
Validation Loss:1.019
Validation Accuracy:0.4921


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 9
Validation Loss:1.1207
Validation Accuracy:0.4902


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 10
Validation Loss:1.0748
Validation Accuracy:0.4953


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 11
Validation Loss:1.0082
Validation Accuracy:0.4929


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 12
Validation Loss:1.2199
Validation Accuracy:0.4882


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 13
Validation Loss:1.121
Validation Accuracy:0.4796


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 14
Validation Loss:1.5873
Validation Accuracy:0.4898


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 15
Validation Loss:2.9343
Validation Accuracy:0.498


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 16
Validation Loss:1.7311
Validation Accuracy:0.4839


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 17
Validation Loss:2.1324
Validation Accuracy:0.478


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 18
Validation Loss:2.6364
Validation Accuracy:0.4642


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 19
Validation Loss:2.7287
Validation Accuracy:0.487


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 20
Validation Loss:2.2616
Validation Accuracy:0.4752


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 21
Validation Loss:3.6809
Validation Accuracy:0.4882


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 22
Validation Loss:3.2465
Validation Accuracy:0.4866


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 23
Validation Loss:2.5821
Validation Accuracy:0.5051


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 24
Validation Loss:2.5857
Validation Accuracy:0.5098


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 25
Validation Loss:4.5531
Validation Accuracy:0.5145


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 26
Validation Loss:4.2326
Validation Accuracy:0.5075


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 27
Validation Loss:4.2264
Validation Accuracy:0.5212


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 28
Validation Loss:5.6755
Validation Accuracy:0.5421


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 29
Validation Loss:6.1988
Validation Accuracy:0.5098


Training:   0%|          | 0/157 [00:00<?, ?it/s]

**************************************************
End of Epoch 30
Validation Loss:7.3955
Validation Accuracy:0.5153


In [12]:
torch.save(siamese_net, "C:\\Users\\argan\\OneDrive\\Desktop\\DDP\\Models\\24_64_32_16_WD_3e-6.pkl")