In [None]:
from google.colab import drive
import sys
drive.mount('/content/drive/',force_remount = False)

Mounted at /content/drive/


In [None]:
from random import sample
import torch
from torch.nn import functional as F
import torch.nn as nn
import numpy as np
import pandas as pd
import torchaudio.transforms as ta_trans

In [1]:

'''
hyperparams for VGGish and YAMNet, plus common configs
vggish params are retrieved from:
    https://github.com/tensorflow/models/blob/bd488858d610e44df69da6f89277e9de8a03722c/research/audioset/vggish/vggish_params.py
yamnet params are retrieved from:
    https://github.com/tensorflow/models/blob/bd488858d610e44df69da6f89277e9de8a03722c/research/audioset/yamnet/params.py
'''
import json

# project_dir = {
#     "sipl-gpu2-u.staff.technion.ac.il":"/home/chenka/VoiceDisorerIdentification"
# }

data_location = {'raw_data':'/content/drive/MyDrive/Study_materials/Voice_disorder_detection_project/data/raw_data/patients',
                 'data_spreadsheet':'/content/drive/MyDrive/Study_materials/Voice_disorder_detection_project/data/raw_data/patients/CRF table.xlsx',
                     'preprocessed_data':'/content/drive/MyDrive/Study_materials/Voice_disorder_detection_project/data/preproccessed_data'
                     }

class Pathologies():
  def __init__(self):
    pat_data_sheet = pd.read_excel(data_location['data_spreadsheet'])
    pat_data_sheet = pat_data_sheet[pat_data_sheet['Age'].notna()]
    # Create a bin list of known diseases
    list_of_diseases = []
    for index, row in pat_data_sheet.iterrows():
      diagnosis = row['dysphonia diagnosis']
      if type(diagnosis) == str:
        for word in diagnosis.split("+"):
          stripped = word.strip()
          if stripped not in list_of_diseases:
            list_of_diseases.append(stripped)

    list_of_diseases = ['Healthy' if disease.lower() == 'none' else disease for disease in list_of_diseases]
    disease_to_int = {}
    for i, disease in enumerate(list_of_diseases):
      if disease == 'Healthy':
        disease_to_int[disease] = 0
      else:
        disease_to_int[disease] = i + 1
    PathologiesToIndex = {k.lower(): v for k, v in sorted(disease_to_int.items(), key=lambda item: item[1])}
    self.PathologiesToIndex = PathologiesToIndex

  def get_pathologies_to_index(self):
    return self.PathologiesToIndex

  def print(self):
    for key,val in self.PathologiesToIndex.items():
      print(f'{key}: {val}')
class webrtcvadParams():
  AGGRESSIVE = 3
  FRAME_DURATION = 10   # in ms
  PADDING_DURATION = 150
  MIN_CHUNK_LENGTH = 0.90

class CommonParams():
    # for STFT
    TARGET_SAMPLE_RATE = 16000
    STFT_WINDOW_LENGTH_SECONDS = 0.025
    STFT_HOP_LENGTH_SECONDS = 0.010

    # for log mel spectrogram
    NUM_MEL_BANDS = 64
    MEL_MIN_HZ = 125
    MEL_MAX_HZ = 7500
    LOG_OFFSET = 0.001  # NOTE 0.01 for vggish, and 0.001 for yamnet

    # convert input audio to segments
    PATCH_WINDOW_IN_SECONDS = 0.48

    # largest feedforward chunk size at test time
    VGGISH_CHUNK_SIZE = 128
    YAMNET_CHUNK_SIZE = 256

    # num of data loading threads
    NUM_LOADERS = 4

    VOICE_SAMPLE_MIN_LENGTH = 0.96
    SVD_SAMPLE_RATE = 50000

class YAMNetParams():
    # Copyright 2019 The TensorFlow Authors All Rights Reserved.
    #
    # Licensed under the Apache License, Version 2.0 (the "License");
    # you may not use this file except in compliance with the License.
    # You may obtain a copy of the License at
    #
    #     http://www.apache.org/licenses/LICENSE-2.0
    #
    # Unless required by applicable law or agreed to in writing, software
    # distributed under the License is distributed on an "AS IS" BASIS,
    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    # See the License for the specific language governing permissions and
    # limitations under the License.
    # ==============================================================================

    """Hyperparameters for YAMNet."""

    # The following hyperparameters (except PATCH_HOP_SECONDS) were used to train YAMNet,
    # so expect some variability in performance if you change these. The patch hop can
    # be changed arbitrarily: a smaller hop should give you more patches from the same
    # clip and possibly better performance at a larger computational cost.
    SAMPLE_RATE = 16000
    STFT_WINDOW_SECONDS = 0.025
    STFT_HOP_SECONDS = 0.010
    MEL_BANDS = 64
    MEL_MIN_HZ = 125
    MEL_MAX_HZ = 7500
    LOG_OFFSET = 0.001
    PATCH_WINDOW_SECONDS = 0.48
    PATCH_HOP_SECONDS = 0.2

    PATCH_FRAMES = int(round(PATCH_WINDOW_SECONDS / STFT_HOP_SECONDS))
    PATCH_BANDS = MEL_BANDS
    NUM_CLASSES = 521
    CONV_PADDING = 'same'
    BATCHNORM_CENTER = True
    BATCHNORM_SCALE = False
    BATCHNORM_EPSILON = 1e-4
    CLASSIFIER_ACTIVATION = 'sigmoid'

    FEATURES_LAYER_NAME = 'features'
    EXAMPLE_PREDICTIONS_LAYER_NAME = 'predictions'


# NOTE for our inference, don't need overlapping windows
# YAMNetParams.PATCH_HOP_SECONDS = YAMNetParams.PATCH_WINDOW_SECONDS
YAMNetParams.PATCH_HOP_SECONDS = 1.0
