In [23]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [24]:
!pip install -q torchmetrics

In [25]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torchmetrics.classification import BinaryAccuracy, F1Score, Precision, Recall
from torchmetrics import MeanMetric

In [26]:
import os
print("Current working directory:", os.getcwd())

Current working directory: /content


In [27]:
data = pd.read_csv('/content/drive/MyDrive/embeddings_final.csv')

In [28]:
data.reset_index(drop=True, inplace=True)
data

Unnamed: 0,file_name,embedding,category,gender,language
0,V51X29_cut_chunk_0.wav,"-20.50690269470215,-1.6952142715454102,12.3294...",healthy,female,Dutch
1,V51X29_cut_chunk_1.wav,"-23.553922653198242,-5.940636157989502,11.0589...",healthy,female,Dutch
2,V51X29_cut_chunk_2.wav,"-26.983060836791992,-3.390190601348877,22.1760...",healthy,female,Dutch
3,V51X29_cut_chunk_3.wav,"-27.18779945373535,-9.506233215332031,14.84153...",healthy,female,Dutch
4,V51X29_cut_chunk_4.wav,"-27.49050521850586,-10.340707778930664,11.8239...",healthy,female,Dutch
...,...,...,...,...,...
10701,MMX011_chunk_14.wav,"-18.254676818847656,5.112982749938965,10.31656...",patient,male,Portuguese
10702,MMX011_chunk_15.wav,"-17.22808265686035,2.429584503173828,9.4952640...",patient,male,Portuguese
10703,MMX011_chunk_16.wav,"-18.98033905029297,8.473431587219238,18.456935...",patient,male,Portuguese
10704,MMX011_chunk_17.wav,"-17.828121185302734,5.798443794250488,9.921663...",patient,male,Portuguese


In [29]:
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [30]:
# Defining a custom dataset for Siamese network pairs
class DysphoniaPairsDataset(Dataset):
    def __init__(self, data):
        self.data = data
        self.embeddings = data['embedding'].apply(lambda x: torch.tensor(eval(x), dtype=torch.float32))  # Assuming embeddings are stored as strings
        self.labels = data['category'].map({'healthy': 0, 'patient': 1})
        self.languages = data['language']
        self.gender = data['gender']

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Get two random samples, ensure one is similar or dissimilar based on idx parity
        row1 = self.data.iloc[idx]
        label1 = self.labels.iloc[idx]
        language1 = self.languages.iloc[idx]
        gender1 = self.gender.iloc[idx]

        # Create positive or negative pairs
        if idx % 2 == 0:  # Positive pair
            row2 = self.data[(self.labels == label1) & (self.gender == gender1) & (self.languages == language1)].sample(1).iloc[0]
            label = 1
        else:  # Negative pair
            row2 = self.data[self.labels != label1].sample(1).iloc[0]
            label = 0

        # Convert to tensors
        embedding1 = torch.tensor(eval(row1['embedding']), dtype=torch.float32)
        embedding2 = torch.tensor(eval(row2['embedding']), dtype=torch.float32)

        return embedding1, embedding2, torch.tensor(label, dtype=torch.float32)



In [31]:
# Create DataLoaders for training and testing
train_dataset = DysphoniaPairsDataset(train_data)
test_dataset = DysphoniaPairsDataset(test_data)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

SIAMESE NETWORK

In [32]:
# Define the sub-network for embedding generation
class EmbeddingNet(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(EmbeddingNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim)
        )

    def forward(self, x):
        return self.fc(x)

class SiameseNetwork(nn.Module):
    def __init__(self, input_dim, embedding_dim):
        super(SiameseNetwork, self).__init__()
        self.embedding_net = EmbeddingNet(input_dim, embedding_dim)

    def forward(self, input1, input2):
        # Get embeddings for both inputs
        output1 = self.embedding_net(input1)
        output2 = self.embedding_net(input2)
        # Calculate cosine similarity
        similarity = F.cosine_similarity(output1, output2, dim=1)
        return similarity

Updating the loss function

In [33]:
class CosineContrastiveLoss(nn.Module):
    def __init__(self, margin=0.5):
        super(CosineContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, similarity, label):
        # For similar pairs (label == 1), maximize similarity
        # For dissimilar pairs (label == 0), ensure similarity is below margin
        loss = label * (1 - similarity) + (1 - label) * F.relu(similarity - self.margin)
        return loss.mean()

In [34]:
# Set parameters
input_dim = len(eval(data['embedding'].iloc[0]))  # Dimension of embeddings in the CSV
embedding_dim = 32  # Dimension of output embedding
margin = 0.5
epochs = 20
learning_rate = 0.001

# Initialize model, loss, and optimizer
model = SiameseNetwork(input_dim, embedding_dim)
criterion = CosineContrastiveLoss(margin=margin)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [35]:
# Initialize metrics
train_loss_metric = MeanMetric()  # For averaging training loss
test_loss_metric = MeanMetric()   # For averaging test loss
accuracy_metric = BinaryAccuracy()  # For calculating accuracy (binary task)
f1_score_metric = F1Score(task='binary', num_classes=2, average='macro')  # F1 score metric
precision_metric = Precision(task='binary', num_classes=2, average='macro')  # Precision metric
recall_metric = Recall(task='binary', num_classes=2, average='macro')

for epoch in range(epochs):
  #training loop
    model.train()
    train_loss_metric.reset()

    for embedding1, embedding2, label in train_loader:
        optimizer.zero_grad()
        # Forward pass
        similarity = model(embedding1, embedding2)
        loss = criterion(similarity, label)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        train_loss_metric.update(loss.item())

    # testing loop
    model.eval()
    test_loss_metric.reset()
    accuracy_metric.reset()
    f1_score_metric.reset()
    precision_metric.reset()
    recall_metric.reset()

    with torch.no_grad():
        for embedding1, embedding2, label in test_loader:
            similarity = model(embedding1, embedding2)
            loss = criterion(similarity, label)
            test_loss_metric.update(loss.item())

            predictions = (similarity > 0.5).float()
            accuracy_metric.update(predictions, label)
            f1_score_metric.update(predictions, label)
            precision_metric.update(predictions, label)
            recall_metric.update(predictions, label)

    # Print metrics
    train_loss = train_loss_metric.compute()
    test_loss = test_loss_metric.compute()
    accuracy = accuracy_metric.compute()
    f1_score = f1_score_metric.compute()
    precision = precision_metric.compute()
    recall = recall_metric.compute()

    print(f"Epoch [{epoch+1}/{epochs}], Training Loss: {train_loss/len(train_loader):.4f}, Test Loss: {test_loss/len(test_loader):.4f}, Accuracy: {accuracy:.4f}")

final_accuracy = accuracy_metric.compute()
final_f1_score = f1_score_metric.compute()
final_precision = precision_metric.compute()
final_recall = recall_metric.compute()
print("Training complete.")
# print(f"Accuracy: {final_accuracy:.4f}, Precision: {final_precision:.4f}, Recall: {final_recall:.4f}, F1 Score: {final_f1_score:.4f}")


Epoch [1/20], Training Loss: 0.0007, Test Loss: 0.0025, Accuracy: 0.6307
Epoch [2/20], Training Loss: 0.0006, Test Loss: 0.0024, Accuracy: 0.6419
Epoch [3/20], Training Loss: 0.0006, Test Loss: 0.0022, Accuracy: 0.6895
Epoch [4/20], Training Loss: 0.0006, Test Loss: 0.0022, Accuracy: 0.7222
Epoch [5/20], Training Loss: 0.0005, Test Loss: 0.0021, Accuracy: 0.7110
Epoch [6/20], Training Loss: 0.0005, Test Loss: 0.0020, Accuracy: 0.7353
Epoch [7/20], Training Loss: 0.0005, Test Loss: 0.0020, Accuracy: 0.7381
Epoch [8/20], Training Loss: 0.0005, Test Loss: 0.0019, Accuracy: 0.7390
Epoch [9/20], Training Loss: 0.0005, Test Loss: 0.0021, Accuracy: 0.7404
Epoch [10/20], Training Loss: 0.0005, Test Loss: 0.0018, Accuracy: 0.7376
Epoch [11/20], Training Loss: 0.0005, Test Loss: 0.0018, Accuracy: 0.7246
Epoch [12/20], Training Loss: 0.0004, Test Loss: 0.0018, Accuracy: 0.7530
Epoch [13/20], Training Loss: 0.0004, Test Loss: 0.0019, Accuracy: 0.6998
Epoch [14/20], Training Loss: 0.0004, Test Loss