In [1]:
## One shot learning for audio with siamese network. 
# author: Saltanat Khalyk
# modified code from @ttchengab ~~~ github.com

In [2]:
from os import walk
import torch

import matplotlib.pyplot as plt
import os

import torchaudio
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset,Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
import random
#from torch.utils.data import TensorDataset, DataLoader, Dataset
import torch.utils.data as data_utils
import os

In [3]:
path_train = 'D:/COVID_COUGH_SOUNDS/1second_chunks_458/balanced_1s_not_aug/one_shot1sec/train/'
path_test = 'D:/COVID_COUGH_SOUNDS/1second_chunks_458/balanced_1s_not_aug/one_shot1sec/test/'
root_dir = path_train
categories = [[folder, os.listdir(root_dir +'/'+ folder)] for folder in os.listdir(root_dir)  if not folder.startswith('.') ]

In [4]:
# creating the pairs of images for inputs, same character label = 1, vice versa
class sound_Dataset(Dataset):
    def __init__(self, categories, root_dir, transform=None):
        self.categories = categories
        self.root_dir = root_dir
        self.transform = transform
        
        
        audio_names =[]
        for subdir, dirs, files in os.walk(self.root_dir):
            for file in files:
                filepath =subdir+os.sep+file
                audio_names.append(filepath)
        self.audio_names = audio_names
        
    def __len__(self):
        return len(self.audio_names)
    def __getitem__(self, idx):
        audio1 = None
        audio2 = None
        label = None
        
        audio_names =[]
        for subdir, dirs, files in os.walk(self.root_dir):
            for file in files:
                filepath =subdir+os.sep+file
                audio_names.append(filepath)
        self.audio_names = audio_names
        
        class_name = self.audio_names[idx].split('\\')[1]
        list_categories = ['asthma', 'allergies', 'pneumonia', 'covid','other']
        channel_dict = {'asthma': 0, 'allergies': 1, 'pneumonia': 2, 'covid':3,'other':4}
        label = channel_dict.get(class_name)
        
        
        if idx % 2 == 0: # select the same character for both images
            category = random.choice(categories)
            character = random.choice(category[1])
            #category = random.choice(list_categories)
            audioDir = str(root_dir) + str(category[0]) 
            audio1Name = random.choice(os.listdir(audioDir))
            audio2Name = random.choice(os.listdir(audioDir))
            audio1, _ = torchaudio.load(str(audioDir) + os.sep + str(audio1Name)) 
            audio2, _ = torchaudio.load(str(audioDir) + os.sep + str(audio2Name)) 
            
            #print(audioDir + os.sep + audio1Name)
            #print(audioDir + os.sep + audio2Name)
            label = 1.0
        else: # select a different character for both images
            category1, category2 = random.choice(categories), random.choice(categories)
            category1, category2 = random.choice(categories), random.choice(categories)
            character1, character2 = random.choice(category1[1]), random.choice(category2[1])
            audioDir1, audioDir2 = str(root_dir) + str(category1[0]), str(root_dir) + str(category2[0])
            audio1Name = random.choice(os.listdir(audioDir1))
            audio2Name = random.choice(os.listdir(audioDir2))
            while audio1Name == audio2Name:
                audio2Name = random.choice(os.listdir(audioDir2))
            label = 0.0
            audio1, _ = torchaudio.load(str(audioDir1) + os.sep + str(audio1Name)) 
            audio2, _ = torchaudio.load(str(audioDir2) + os.sep+ str(audio2Name)) 
#         plt.imshow(img1)
        if self.transform:
            audio1 = self.transform(audio1)
            audio2 = self.transform(audio2)
        return audio1, audio2, torch.from_numpy(np.array([label], dtype=np.float32)) 

In [5]:
train_set = sound_Dataset(categories, root_dir, transform =None)
#train_set, val_set = random_split(omniglotDataset, [train_size, val_size])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, num_workers=0)

In [6]:
# Check if the dataloader works properly
#for i_batch, sample_batched in enumerate(train_loader):
    #print(i_batch, sample_batched)

In [21]:
# creates n-way one shot learning evaluation
class NWayOneShotEvalSet(Dataset):
    def __init__(self, categories, root_dir,  numWay, transform=None):
        self.categories = categories
        self.root_dir = root_dir
        #self.setSize = setSize
        self.numWay = numWay
        self.transform = transform
        audio_names =[]
        for subdir, dirs, files in os.walk(self.root_dir):
            for file in files:
                filepath =subdir+os.sep+file
                audio_names.append(filepath)
        self.audio_names = audio_names
        
    def __len__(self):
        return len(self.audio_names)
    def __getitem__(self, idx):
        # find one main image
        category = random.choice(categories)
        character = random.choice(category[1])
        audioDir = str(root_dir) + str(category[0])
        audioName = random.choice(os.listdir(audioDir))
        mainAudio, _ = torchaudio.load(str(audioDir) + os.sep + str(audioName))
        # print(imgDir + '/' + imgName)
        if self.transform:
            mainAudio = self.transform(mainAudio)
        
        # find n numbers of distinct images, 1 in the same set as the main
        testSet = []
        label = np.random.randint(self.numWay)
        for i in range(self.numWay):
            testAudioDir = audioDir
            testAudioName = ''
            if i == label:
                testAudioName = random.choice(os.listdir(audioDir))
            else:
                testCategory = random.choice(categories)
                testCharacter = random.choice(testCategory[1])
                testAudioDir = root_dir + testCategory[0]
                while testAudioDir == audioDir:
                    testAudioDir = root_dir + testCategory[0]
                testAudioName = random.choice(os.listdir(testAudioDir))
            testAudio, _ = torchaudio.load(str(testAudioDir) + os.sep + str(testAudioName))
            if self.transform:
                testAudio = self.transform(testAudio)
            testSet.append(testAudio)
        
        return mainAudio, testSet, torch.from_numpy(np.array([label], dtype = int))

In [27]:
numWay = 1


path_train = 'D:/COVID_COUGH_SOUNDS/1second_chunks_458/balanced_1s_not_aug/one_shot1sec/train/'
path_test = 'D:/COVID_COUGH_SOUNDS/1second_chunks_458/balanced_1s_not_aug/one_shot1sec/test/'
root_dir = path_train
categories_test = [[folder, os.listdir(path_test +'/'+ folder)] for folder in os.listdir(path_test)  if not folder.startswith('.') ]

test_set = NWayOneShotEvalSet(categories_test, path_test, numWay, transform=None)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = 1, num_workers = 0, shuffle=True)

In [26]:
categories_test = [[folder, os.listdir(path_test +'/'+ folder)] for folder in os.listdir(path_test)  if not folder.startswith('.') ]

testCategory = random.choice(categories_test)
testCharacter = random.choice(testCategory[1])
testAudioDir = root_dir + testCategory[0]
print(testAudioDir)
testAudioName = random.choice(os.listdir(testAudioDir))
print(testAudioName)
testAudio, _ = torchaudio.load(str(testAudioDir) + os.sep + str(testAudioName))

print(testAudio)

D:/COVID_COUGH_SOUNDS/1second_chunks_458/balanced_1s_not_aug/one_shot1sec/train/allergies
1604965104089_29.wav
tensor([[0.0099, 0.0099, 0.0093,  ..., 0.0614, 0.0634, 0.0114]])


In [28]:
import torch.nn as nn
import torch.nn.functional as F

In [29]:
for i_batch, sample_batched in enumerate(test_loader):
    print(i_batch, sample_batched)

0 [tensor([[[ 0.0083, -0.0072, -0.0115,  ..., -0.0402, -0.0378, -0.0391]]]), [tensor([[[ 0.0133,  0.0143,  0.0161,  ..., -0.0084, -0.0085, -0.0098]]])], tensor([[0]], dtype=torch.int32)]
1 [tensor([[[0.0365, 0.0360, 0.0348,  ..., 0.0067, 0.0084, 0.0200]]]), [tensor([[[-0.0047, -0.0048, -0.0046,  ...,  0.0042,  0.0041,  0.0042]]])], tensor([[0]], dtype=torch.int32)]
2 [tensor([[[-0.0056, -0.0051, -0.0042,  ...,  0.0136,  0.0135,  0.0136]]]), [tensor([[[ 0.0034,  0.0040,  0.0040,  ..., -0.0159, -0.0150, -0.0232]]])], tensor([[0]], dtype=torch.int32)]
3 [tensor([[[-0.0009, -0.0008, -0.0005,  ..., -0.0116, -0.0212, -0.0198]]]), [tensor([[[ 0.0086,  0.0109,  0.0142,  ..., -0.0096, -0.0092, -0.0081]]])], tensor([[0]], dtype=torch.int32)]
4 [tensor([[[-2.1362e-04, -1.2207e-04, -9.1553e-05,  ..., -6.1035e-04,
          -7.0190e-04, -7.3242e-04]]]), [tensor([[[0.5309, 0.4583, 0.2584,  ..., 0.0114, 0.0113, 0.0099]]])], tensor([[0]], dtype=torch.int32)]
5 [tensor([[[0.0055, 0.0029, 0.0070,  ..., 

45 [tensor([[[ 0.0046,  0.0033,  0.0034,  ..., -0.0942, -0.0820, -0.0876]]]), [tensor([[[-0.0054, -0.0055, -0.0086,  ...,  0.0007,  0.0004,  0.0004]]])], tensor([[0]], dtype=torch.int32)]
46 [tensor([[[ 0.0050,  0.0052,  0.0048,  ..., -0.0080,  0.0065,  0.0145]]]), [tensor([[[ 6.7139e-04, -3.6621e-04, -2.1973e-03,  ...,  9.1553e-05,
          -1.8311e-04, -3.3569e-04]]])], tensor([[0]], dtype=torch.int32)]
47 [tensor([[[ 0.0057,  0.0061, -0.0022,  ...,  0.0045,  0.0057,  0.0073]]]), [tensor([[[-0.0472, -0.0467, -0.0510,  ..., -0.0076, -0.0023, -0.0024]]])], tensor([[0]], dtype=torch.int32)]
48 [tensor([[[ 0.0035,  0.0035,  0.0034,  ..., -0.0019, -0.0020, -0.0019]]]), [tensor([[[0.0055, 0.0029, 0.0070,  ..., 0.0844, 0.0838, 0.0716]]])], tensor([[0]], dtype=torch.int32)]
49 [tensor([[[-0.1383, -0.0414, -0.2775,  ...,  0.0013,  0.0012,  0.0012]]]), [tensor([[[-0.0006, -0.0016, -0.0022,  ...,  0.0003, -0.0002, -0.0009]]])], tensor([[0]], dtype=torch.int32)]
50 [tensor([[[-0.0009, -0.0010, 

118 [tensor([[[-0.0348, -0.0436, -0.0408,  ..., -0.0094, -0.0085, -0.0067]]]), [tensor([[[ 0.0000,  0.0000,  0.0000,  ..., -0.2091, -0.2416, -0.1652]]])], tensor([[0]], dtype=torch.int32)]
119 [tensor([[[-0.0656, -0.0637, -0.0634,  ...,  0.0655,  0.0675,  0.0665]]]), [tensor([[[-0.0211,  0.0076, -0.0216,  ...,  0.0048,  0.0031, -0.0144]]])], tensor([[0]], dtype=torch.int32)]
120 [tensor([[[6.1035e-05, 4.2114e-03, 1.5564e-03,  ..., 1.4343e-03,
          2.5635e-03, 3.4790e-03]]]), [tensor([[[ 4.2725e-04,  2.4414e-04,  3.0518e-05,  ..., -3.0518e-05,
           1.5259e-04,  2.7466e-04]]])], tensor([[0]], dtype=torch.int32)]
121 [tensor([[[3.2654e-03, 3.8757e-03, 4.1199e-03,  ..., 9.1553e-05,
          3.0518e-05, 0.0000e+00]]]), [tensor([[[ 6.1035e-05,  6.1035e-05,  6.1035e-05,  ..., -5.1880e-04,
          -3.3569e-04, -6.4087e-04]]])], tensor([[0]], dtype=torch.int32)]
122 [tensor([[[-0.0014, -0.0010, -0.0019,  ..., -0.0314, -0.0321, -0.0283]]]), [tensor([[[ 3.4363e-02,  4.1504e-02,  4.4

179 [tensor([[[ 0.0044,  0.0042,  0.0044,  ..., -0.0085, -0.0087, -0.0081]]]), [tensor([[[ 0.1637,  0.1182,  0.0871,  ..., -0.0492, -0.0430, -0.0367]]])], tensor([[0]], dtype=torch.int32)]
180 [tensor([[[-0.0163, -0.0201, -0.0172,  ...,  0.0148,  0.0198,  0.0276]]]), [tensor([[[-0.1461, -0.1378, -0.1528,  ...,  0.1430,  0.1206,  0.0751]]])], tensor([[0]], dtype=torch.int32)]
181 [tensor([[[0.0036, 0.0042, 0.0059,  ..., 0.0039, 0.0045, 0.0032]]]), [tensor([[[ 0.0015,  0.0015,  0.0016,  ..., -0.0002, -0.0005, -0.0004]]])], tensor([[0]], dtype=torch.int32)]
182 [tensor([[[0.0777, 0.0750, 0.0738,  ..., 0.0039, 0.0053, 0.0057]]]), [tensor([[[ 0.1566,  0.1624,  0.1682,  ..., -0.0131, -0.0138, -0.0144]]])], tensor([[0]], dtype=torch.int32)]
183 [tensor([[[-0.0618, -0.0438, -0.0161,  ...,  0.2477,  0.2443,  0.1992]]]), [tensor([[[0.0328, 0.0345, 0.0310,  ..., 0.0001, 0.0030, 0.0047]]])], tensor([[0]], dtype=torch.int32)]
184 [tensor([[[ 0.0020,  0.0031,  0.0032,  ..., -0.0021, -0.0016, -0.0015

255 [tensor([[[-0.2626, -0.2303, -0.2342,  ...,  0.0153,  0.0150,  0.0146]]]), [tensor([[[-0.0349, -0.0346, -0.0325,  ..., -0.0078, -0.0067, -0.0249]]])], tensor([[0]], dtype=torch.int32)]
256 [tensor([[[0.0000e+00, 3.0518e-05, 3.0518e-05,  ..., 2.6550e-03,
          2.6550e-03, 3.9062e-03]]]), [tensor([[[-0.0011, -0.0009, -0.0001,  ...,  0.0087,  0.0089,  0.0131]]])], tensor([[0]], dtype=torch.int32)]
257 [tensor([[[0.0437, 0.0412, 0.0395,  ..., 0.0229, 0.0280, 0.0346]]]), [tensor([[[ 0.0000e+00, -3.0518e-05, -3.0518e-05,  ...,  3.3569e-04,
          -2.1362e-04, -6.4087e-04]]])], tensor([[0]], dtype=torch.int32)]
258 [tensor([[[-0.1119, -0.1220, -0.0134,  ...,  0.0082,  0.0082,  0.0073]]]), [tensor([[[ 0.0637,  0.0678,  0.1159,  ..., -0.0453, -0.0428, -0.0397]]])], tensor([[0]], dtype=torch.int32)]
259 [tensor([[[-0.0056, -0.0045, -0.0032,  ..., -0.0673, -0.0583, -0.0362]]]), [tensor([[[0.0049, 0.0052, 0.0040,  ..., 0.0082, 0.0046, 0.0026]]])], tensor([[0]], dtype=torch.int32)]
260 [

325 [tensor([[[0.0253, 0.0210, 0.0132,  ..., 0.1043, 0.0865, 0.0547]]]), [tensor([[[-0.0035, -0.0032, -0.0066,  ..., -0.0016, -0.0017, -0.0016]]])], tensor([[0]], dtype=torch.int32)]
326 [tensor([[[ 0.0184,  0.0151,  0.0124,  ..., -0.0074, -0.0115, -0.0125]]]), [tensor([[[-1.8311e-04, -3.0518e-04,  6.1035e-05,  ...,  2.1362e-04,
           9.1553e-05,  1.2207e-04]]])], tensor([[0]], dtype=torch.int32)]
327 [tensor([[[-2.4414e-04, -9.1553e-05, -1.2207e-04,  ...,  2.9602e-03,
           6.7139e-04,  3.9673e-04]]]), [tensor([[[ 0.1527,  0.1707,  0.1547,  ...,  0.0220, -0.0030,  0.0043]]])], tensor([[0]], dtype=torch.int32)]
328 [tensor([[[-0.0006, -0.0007, -0.0007,  ...,  0.0013,  0.0012,  0.0013]]]), [tensor([[[-0.0022, -0.0020, -0.0014,  ..., -0.0771, -0.0840, -0.0667]]])], tensor([[0]], dtype=torch.int32)]
329 [tensor([[[ 0.0003,  0.0003,  0.0002,  ..., -0.0032, -0.0032, -0.0033]]]), [tensor([[[-1.1902e-03, -4.8828e-04,  3.0518e-05,  ...,  7.1106e-03,
           8.8501e-03,  1.0162e-02

402 [tensor([[[0.0004, 0.0007, 0.0005,  ..., 0.0050, 0.0055, 0.0056]]]), [tensor([[[-0.0060,  0.2234,  0.1724,  ..., -0.0003, -0.0007, -0.0004]]])], tensor([[0]], dtype=torch.int32)]
403 [tensor([[[-0.0302, -0.0303, -0.0305,  ...,  0.1627,  0.1624,  0.1584]]]), [tensor([[[ 0.0265,  0.0270,  0.0327,  ..., -0.0163, -0.0161, -0.0156]]])], tensor([[0]], dtype=torch.int32)]
404 [tensor([[[-0.0007, -0.0004,  0.0000,  ..., -0.0006, -0.0006, -0.0005]]]), [tensor([[[-0.2908, -0.2752, -0.3100,  ..., -0.2178, -0.2341, -0.1414]]])], tensor([[0]], dtype=torch.int32)]
405 [tensor([[[ 0.0000,  0.0000,  0.0000,  ...,  0.0003,  0.0000, -0.0189]]]), [tensor([[[0.0074, 0.0077, 0.0095,  ..., 0.0064, 0.0065, 0.0083]]])], tensor([[0]], dtype=torch.int32)]
406 [tensor([[[-0.0019, -0.0016,  0.0006,  ..., -0.0062, -0.0061, -0.0079]]]), [tensor([[[ 0.1114,  0.1077,  0.0846,  ..., -0.0969, -0.0961, -0.0626]]])], tensor([[0]], dtype=torch.int32)]
407 [tensor([[[ 0.0157,  0.0158,  0.0159,  ..., -0.3068, -0.3054, -