In [1]:
!pip install pytorch-ignite

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import torch
from torch import nn
from torch.optim import Adam
import torchaudio
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from skimage.util import img_as_ubyte
import pandas as pd
import os
import glob
import numpy as np
import librosa
import matplotlib.pyplot as plt
import sys
import ignite
from sklearn.metrics import roc_auc_score

In [3]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [4]:
class MimiiDataset(Dataset):
    def __init__(self,audio_dir, n_mel = 128):
        super(MimiiDataset, self).__init__()
        self.audio_dir = audio_dir
        self.n_mel = n_mel
    
    def get_data(self,device):
        
        self.train_files, self.train_labels = self._train_file_list(device)
        self.test_files, self.test_labels = self._test_file_list(device)
        
        self.train_data = self._derive_data(self.train_files.copy())
        self.test_data = self._derive_data(self.test_files.copy())
        
        return self.train_data, self.test_data, self.train_labels, self.test_labels
    
    def _train_file_list(self, device):
        query = os.path.abspath(
                f"{self.audio_dir}/{device}/train/*_normal_*.wav"
            )
        train_normal_files = sorted(glob.glob(query))
        train_normal_labels = np.zeros(len(train_normal_files))
        
        query = os.path.abspath(
                f"{self.audio_dir}/{device}/train/*_anomaly_*.wav"
            )
        train_anomaly_files = sorted(glob.glob(query))
        train_anomaly_labels = np.ones(len(train_anomaly_files))
        
        train_file_list = np.concatenate((train_normal_files, train_anomaly_files), axis=0)
        train_labels = np.concatenate((train_normal_labels, train_anomaly_labels), axis=0)
        
        return train_file_list, train_labels
    
    def _test_file_list(self, device):
        
        query = os.path.abspath(
                f"{self.audio_dir}/{device}/target_test/*_normal_*.wav"
            )
        test_trg_normal_files = sorted(glob.glob(query))
        test_trg_normal_labels = np.zeros(len(test_trg_normal_files))
        
        query = os.path.abspath(
                f"{self.audio_dir}/{device}/target_test/*_anomaly_*.wav"
            )
        test_trg_anomaly_files = sorted(glob.glob(query))
        test_trg_anomaly_labels = np.ones(len(test_trg_anomaly_files))
        
        query = os.path.abspath(
                f"{self.audio_dir}/{device}/source_test/*_normal_*.wav"
            )
        test_src_normal_files = sorted(glob.glob(query))
        test_src_normal_labels = np.zeros(len(test_src_normal_files))
        
        query = os.path.abspath(
                f"{self.audio_dir}/{device}/source_test/*_anomaly_*.wav"
            )
        test_src_anomaly_files = sorted(glob.glob(query))
        test_src_anomaly_labels = np.ones(len(test_src_anomaly_files))
        
        test_file_list = np.concatenate((test_trg_normal_files, 
                                         test_trg_anomaly_files, 
                                         test_src_normal_files,
                                         test_src_anomaly_files), axis=0)
        test_labels = np.concatenate((test_trg_normal_labels,
                                      test_trg_anomaly_labels, 
                                      test_src_normal_labels,
                                      test_src_anomaly_labels), axis=0)
        
        return test_file_list, test_labels

    def normalize(self,tensor):
        tensor_minusmean = tensor - tensor.mean()
        return tensor_minusmean/np.absolute(tensor_minusmean).max()

    def make0min(self,tensornd):
        tensor = tensornd.numpy()
        res = np.where(tensor == 0, 1E-19 , tensor)
        return torch.from_numpy(res)

    def spectrogrameToImage(self,waveform):
        specgram = torchaudio.transforms.MelSpectrogram(n_fft=1024, win_length=1024, 
                                                        hop_length=512, power=2, 
                                                        normalized=True, n_mels=128)(waveform )
        specgram= self.make0min(specgram)
        specgram = specgram.log2()[0,:,:].numpy()
        
        tr2image = transforms.Compose([transforms.ToPILImage()])

        specgram= self.normalize(specgram)
        # specgram = img_as_ubyte(specgram)
        specgramImage = tr2image(specgram)
        return specgramImage

    
    def _derive_data(self, file_list):
        tr2tensor = transforms.Compose([transforms.PILToTensor()])
        data = []
        for i in range(len(file_list)):
                y, sr = torchaudio.load(file_list[i])  
                spec = self.spectrogrameToImage(y)
                spec = spec.convert('RGB')
                vectors = tr2tensor(spec)
                
                data.append(vectors)
                
        return data

In [5]:
ds = MimiiDataset('/content/drive/MyDrive/mimii')

In [6]:
df_train, df_test, y_train, y_test = ds.get_data('fan')

In [7]:
train_ds = list(map(lambda x,y: (x.float(),torch.FloatTensor([y])) , df_train, y_train))
test_ds = list(map(lambda x,y: (x.float(),torch.FloatTensor([y])) , df_test, y_test))

In [8]:
train_data = DataLoader(train_ds, batch_size = 32, shuffle=True)
test_data = DataLoader(test_ds, batch_size = 32, shuffle=False)

In [9]:
class Block(nn.Module):

    def __init__(self, in_channels, out_channels, stride=1, padding = 1,downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels,
                               out_channels=out_channels, kernel_size=3,
                               stride=stride, padding=padding)
        self.batchnorm1 = nn.BatchNorm2d(out_channels)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(in_channels=out_channels,
                               out_channels=out_channels, kernel_size=3,
                               stride=stride,padding=padding)
        self.batchnorm2 = nn.BatchNorm2d(out_channels)
        self.relu2 = nn.ReLU(inplace=True)

        self.downsample = downsample

    def forward(self, x):
        identity = x
        x1 = self.relu1(self.batchnorm1(self.conv1(x)))
        x2 = self.relu2(self.batchnorm2(self.conv2(x1)))
        print(x2.shape)

        if self.downsample is not None:
            identity = self.downsample(x)
        out = x2 + identity
        # print(out.shape)
        return out


class ResNet34(nn.Module):

    def layer(self, num_blocks, in_channels, out_channels, downsample=None,
              block=Block, stride=1):
        layers = []
        if in_channels != out_channels or stride != 1:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
                          stride=stride, kernel_size=1),
                nn.BatchNorm2d(out_channels))

        layers.append(block(in_channels=in_channels, out_channels=out_channels,
                            downsample=downsample, stride=stride))
        for i in range(num_blocks - 1):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def __init__(self, in_channels=3, out_channels=1000):
        super().__init__()

        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=64,
                               kernel_size=7, stride=2, padding = 3)
        self.batchnorm = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)

        self.conv2 = self.layer(num_blocks=3, in_channels=64, out_channels=64)
        self.conv3 = self.layer(num_blocks=4, in_channels=64, out_channels=128)
        self.conv4 = self.layer(num_blocks=6, in_channels=128,
                                out_channels=256)
        self.conv5 = self.layer(num_blocks=3, in_channels=256,
                                out_channels=512)

        self.avgpool = nn.AvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(in_features=512, out_features=out_channels)
        # self.fc1 = nn.Linear(in_features = out_channels, out_features = 2)

    def feature_extraction(self, x):
        x1 = self.maxpool(self.batchnorm(self.conv1(x)))
        # print(x1.shape)
        x2 = self.conv2(x1)
        x3 = self.conv3(x2)
        x4 = self.conv4(x3)
        x5 = self.conv5(x4)

        return self.avgpool(x5)

    def forward(self, x):
        features = self.feature_extraction(x)

        flat = torch.flatten(features, 1) # torch.flatten(features, 1)

        out = self.fc(flat)
        return out

In [10]:
def train(model,optimizer,criterion,train_data,valid_data,epochs,device='cpu',scheduler = None):
  train_losses = []

  for epoch in range(epochs):
    cur_loss = 0
    bigsum = 0
    overall = 0
    for batch in train_data:
      inputs = batch[0].to(device)
      labels = batch[1].to(device)
      optimizer.zero_grad()

      predictions = torch.sigmoid(model(inputs))
      # print(labels)
      # print('preds',prediction.shape,'labels',labels.shape)
      loss = criterion(predictions, labels)
      loss.backward()
      optimizer.step()
      if scheduler:
        scheduler.step()
      cur_loss += loss

      cur_preds = (predictions>0.5).float()
      bigsum += (cur_preds.cpu() == labels.cpu()).sum()
      overall += cur_preds.shape[0]


    
    cur_loss = cur_loss/len(train_data)
    train_losses.append(cur_loss)
    print("{}/{} loss: {}, accuracy: {}".format(epoch+1, epochs,cur_loss, bigsum/overall))


  return train_losses

In [11]:
# model = ResNet34(in_channels = 1, out_channels = 2)
# optimizer = Adam(params = model.parameters())
# criterion = nn.BCELoss()

In [12]:
# train(model,optimizer,criterion,train_data,test_data,epochs = 3)

In [13]:
model = torchvision.models.resnet34(pretrained=True)


num_features = model.fc.in_features
num_classes = 1
model.fc = nn.Linear(num_features, num_classes)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
myModel = model.to(device)

optimizer = Adam(params = model.parameters())
criterion = nn.BCELoss()



In [14]:
# losses = train(myModel,optimizer,criterion,train_data,test_data,epochs = 3, device = device)

In [15]:
def validate(model, valid_data, device = 'cpu'):
  predictions = []
  actuals = []
  with torch.no_grad():
        for batch in valid_data:
            inputs, labels = batch[0].to(device),batch[1].to(device)
            model.eval()
            outputs = torch.sigmoid(model(inputs).cpu())
            predictions.append(outputs)
            actuals.append(labels)

  return predictions, actuals


In [16]:
predictions, actuals = validate(myModel,test_data,device = device)

In [17]:
bigsum = 0
overall = 0
preds = []
y = []
for i in range(len(predictions)):
  cur_preds = (predictions[i]>0.5).float()
  bigsum += (cur_preds == actuals[i].cpu()).sum()
  overall += cur_preds.shape[0]

accuracy = bigsum/overall

print('accuracy:', float(accuracy))

accuracy: 0.5


In [18]:
acts = []
preds = []
for i in actuals:
  vec = i.detach().cpu().numpy().tolist()
  vec = list(map(lambda x: x[0], vec))
  acts += vec

for i in predictions:
  vec = i.detach().cpu().numpy().tolist()
  vec = list(map(lambda x: x[0], vec))
  preds += vec

acts, preds = np.array(acts), np.array(preds)

In [19]:
roc_auc_score(acts, preds)

0.514888888888889

In [20]:
from sklearn.mixture import GaussianMixture

In [22]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

myModel.fc = Identity()

In [31]:
def get_hidden_vecs(model,ds):
  vecs = []
  for i in ds:
    # print(i.shape)
    obj = torch.FloatTensor([i[0].numpy().tolist()]).cuda()
    hidden = model(obj).detach().cpu().numpy().tolist()
    vecs.append(hidden)
  
  return np.array(vecs)

In [43]:
train_gm = get_hidden_vecs(myModel,train_ds)
train_gm = train_gm.reshape(3014,512)

In [44]:
test_gm = get_hidden_vecs(myModel,test_ds)
test_gm = test_gm.reshape(1200,512)

In [69]:
gm = GaussianMixture(n_components=2)

In [70]:
gm.fit(train_gm, y_train)

In [71]:
predictions = gm.predict(train_gm)

In [72]:
(predictions == y_train).sum() / len(y_train)

0.5013271400132714

In [73]:
test_predictions = gm.predict(test_gm)

In [74]:
(test_predictions == y_test).sum() / len(y_test)

0.49333333333333335