In [61]:
%load_ext autoreload
%autoreload 2
import sys
import os
import numpy as np
sys.path.append(os.path.abspath('../software/AB_imports/'))
sys.path.append(os.path.abspath('../software/fitness_functions/'))

# Import the rest of the GpyT subpackage functions for the demo here
from Frontend.readWav import readWavFunc
from Frontend.tdFilter import tdFilterFunc
from Agc.dualLoopTdAgc import dualLoopTdAgcFunc
from WinBuf.winBuf import winBufFunc
from Filterbank.fftFilterbank import fftFilterbankFunc
from Filterbank.hilbertEnvelope import hilbertEnvelopeFunc
from Filterbank.channelEnergy import channelEnergyFunc
from NoiseReduction.noiseReduction import noiseReductionFunc
from PostFilterbank.specPeakLocator import specPeakLocatorFunc
from PostFilterbank.currentSteeringWeights import currentSteeringWeightsFunc
from PostFilterbank.carrierSynthesis import carrierSynthesisFunc
from Mapping.f120Mapping import f120MappingFunc
from Electrodogram.f120Electrodogram import f120ElectrodogramFunc
from Validation.validateOutput import validateOutputFunc
from Vocoder.vocoder import vocoderFunc

from delta_wav import convert_sample_rate

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
wavefile=os.path.abspath('../sample_data/sentence1_55_clean.wav')
stratWindow = 0.5 * (np.blackman(256) + np.hanning(256))
stratWindow = stratWindow.reshape(1, stratWindow.size)
# all parameters
parStrat = {
    'wavFile': wavefile,  # this should be a complete absolute path to your sound file of choice
    'fs': 17400,  # this value matches implant internal audio rate. incoming wav files resampled to match
    'nFft': 256,
    'nHop': 20,
    'nChan': 15,  # do not change
    'startBin': 6,
    'nBinLims': np.array([2, 2, 1, 2, 2, 2, 3, 4, 4, 5, 6, 7, 8, 10, 56]),
    'window': stratWindow,
    'pulseWidth': 18,  # DO NOT CHANGE
    'verbose': 0
}

parReadWav = {
    'parent': parStrat,
    'tStartEnd': [],
    'iChannel': 1,
}

parPre = {
    'parent': parStrat,
    'coeffNum': np.array([.7688, -1.5376, .7688]),
    'coeffDenom': np.array([1, -1.5299, .5453]),
}

envCoefs = np.array([-19, 55, 153, 277, 426, 596, 784, 983,
                     1189, 1393, 1587, 1763, 1915, 2035, 2118, 2160,
                     2160, 2118, 2035, 1915, 1763, 1587, 1393, 1189,
                     983, 784, 596, 426, 277, 153, 55, -19]) / (2 ** 16)

parAgc = {
    'parent': parStrat,
    'kneePt': 4.476,
    'compRatio': 12,
    'tauRelFast': -8 / (17400 * np.log(.9901)) * 1000,
    'tauAttFast': -8 / (17400 * np.log(.25)) * 1000,
    'tauRelSlow': -8 / (17400 * np.log(.9988)) * 1000,
    'tauAttSlow': -8 / (17400 * np.log(.9967)) * 1000,
    'maxHold': 1305,
    'g0': 6.908,
    'fastThreshRel': 8,
    'cSlowInit': 0.5e-3,
    'cFastInit': 0.5e-3,
    'controlMode': 'naida',
    'clipMode': 'limit',
    'decFact': 8,
    'envBufLen': 32,
    'gainBufLen': 16,
    'envCoefs': envCoefs
}

parWinBuf = {
    'parent': parStrat,
    'bufOpt': []
}

parFft = {
    'parent': parStrat,
    'combineDcNy': False,
    'compensateFftLength': False,
    'includeNyquistBin': False
}

parHilbert = {
    'parent': parStrat,
    'outputOffset': 0,
    'outputLowerBound': 0,
    'outputUpperBound': np.inf
}

parEnergy = {
    'parent': parStrat,
    'gainDomain': 'linear'
}

parNoiseReduction = {
    'parent': parStrat,
    'gainDomain': 'log2',
    'tau_speech': .0258,
    'tau_noise': .219,
    'threshHold': 3,
    'durHold': 1.6,
    'maxAtt': -12,
    'snrFloor': -2,
    'snrCeil': 45,
    'snrSlope': 6.5,
    'slopeFact': 0.2,
    'noiseEstDecimation': 1,
    'enableContinuous': False,
    'initState': {'V_s': -30 * np.ones((15, 1)), 'V_n': -30 * np.ones((15, 1))},
}

parPeak = {
    'parent': parStrat,
    'binToLocMap': np.concatenate((np.zeros(6, ), np.array([256, 640, 896, 1280, 1664, 1920, 2176,
                                                            # 1 x nBin vector of nominal cochlear locations for the center frequencies of each STFT bin
                                                            2432, 2688, 2944, 3157, 3328, 3499, 3648, 3776, 3904,
                                                            4032,  # values from 0 .. 15 in Q9 format
                                                            4160, 4288, 4416, 4544, 4659, 4762, 4864, 4966, 5069,
                                                            5163,
                                                            # corresponding to the nominal steering location for each
                                                            5248, 5333, 5419, 5504, 5589, 5669, 5742, 5815, 5888,
                                                            5961,  # FFT bin
                                                            6034, 6107, 6176, 6240, 6304, 6368, 6432, 6496, 6560,
                                                            6624,
                                                            6682, 6733, 6784, 6835, 6886, 6938, 6989, 7040, 7091,
                                                            7142,
                                                            7189, 7232, 7275, 7317, 7360, 7403, 7445, 7488, 7531,
                                                            7573,
                                                            7616, 7659]), 7679 * np.ones((53,)))) / 512
}

parSteer = {
    'parent': parStrat,
    'nDiscreteSteps': 9,
    'steeringRange': 1.0
}

parCarrierSynth = {
    'parent': parStrat,
    'fModOn': .5,
    'fModOff': 1.0,
    'maxModDepth': 1.0,
    'deltaPhaseMax': 0.5
}

parMapper = {
    'parent': parStrat,
    'mapM': 500 * np.ones(16),
    'mapT': 50 * np.ones(16),
    'mapIdr': 60 * np.ones(16),
    'mapGain': 0 * np.ones(16),
    'mapClip': 2048 * np.ones(16),
    'chanToElecPair': np.arange(16),
    'carrierMode': 1
}

parElectrodogram = {
    'parent': parStrat,
    'cathodicFirst': True,
    'channelOrder': np.array([1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, 4, 8, 12]),
# DO NOT CHANGE (different order of pulses will have no effect in vocoder output)
    'enablePlot': True,
    'outputFs': 55556,
# DO NOT CHANGE (validation depends on matched output rate, vocoder would not produce different results at higher or lower Fs when parameters match accordingly)
}

parValidate = {
    'parent': parStrat,
    'lengthTolerance': 50,
    'saveIfSimilar': True,  # save even if the are too similar to default strategy
    'differenceThreshold': 1,
    'maxSimilarChannels': 8,
    'elGramFs': parElectrodogram['outputFs'],
# this is linked to the previous electrodogram generation step, it should always match [55556 Hz]
    'outFile': None
# This should be the full path including filename to a location where electrode matrix output will be saved after validation
}

# step by step 
In the default alogorithm, there are two types of operations: those that simply rescale and modulate the original wave file, and those that perform more complex operations

import
* readWavFunc (1, 90480)

simple preprocessing
* tdFilterFunc (1, 90480)
* dualLoopTdAgcFunc (1, 90479)


complex processes
* winBufFunc (256, 4524)
* fftFilterbankFunc (128, 4524)
* hilbertEnvelopeFunc (15, 4524)

new simple processes
* channelEnergyFunc (15, 4524)
* noiseReductionFunc (15, 4524)
*

synthesis function (bringing together peak finding operations and hlibert envelope)
* f120MappingFunc (30, 9629)


final result
* f120ElectrodogramFunc (16, 288872)

# Loading in simple wav

In [14]:
results = {}  # initialize demo results structure

# read specified wav file and scale
results['sig_smp_wavIn'], results['sourceName'] = readWavFunc(
    parReadWav)  # load the file specified in parReadWav; assume correct scaling in wav file (111.6 dB SPL peak full-scale)


In [27]:
results['sig_smp_wavIn'].shape

(1, 90480)

# applying premphesis

In [16]:
results['sig_smp_wavPre'] = tdFilterFunc(parPre, results['sig_smp_wavIn'])

In [28]:
results['sig_smp_wavPre'].shape

(1, 90480)

# applying gain control

In [18]:
results['agc'] = dualLoopTdAgcFunc(parAgc, results['sig_smp_wavPre'])  # agc


In [30]:
results['agc']['wavOut'].shape

(1, 90479)

In [20]:
dualLoopTdAgcFunc(parAgc, results['sig_smp_wavIn'])

{'Env': array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        5.69692674e-06, 5.96758486e-06, 5.55531295e-06]),
 'CSlow': array([0.00067914, 0.00067914, 0.00067914, ..., 0.00067914, 0.00067914,
        0.00067914]),
 'CFast': array([0.00049505, 0.00049015, 0.0004853 , ..., 0.00021476, 0.00021476,
        0.00021476]),
 'C': array([0.00067914, 0.00067914, 0.00067914, ..., 0.00067914, 0.00067914,
        0.00067914]),
 'G': array([120.09231315, 120.09231315, 120.09231315, ..., 120.09231315,
        120.09231315, 120.09231315]),
 'Hold': array([0., 0., 0., ..., 0., 0., 0.]),
 'State': array([0., 0., 0., ..., 0., 0., 0.]),
 'EnvFast': array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        2.26798738e-06, 2.37573832e-06, 2.21160992e-06]),
 'smpGain': array([[ 15.01153914,  22.51730872,  30.02307829, ..., 120.09231315,
         120.09231315, 120.09231315]]),
 'wavOut': array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
         -1.22693543e-03, -2.3894

In [32]:
results['sig_frm_audBuffers'] = winBufFunc(parWinBuf, results['agc']['wavOut'])

In [34]:
results['sig_frm_audBuffers'].shape

(256, 4524)

In [24]:
results['agc']['wavOut'].shape

(1, 90479)

In [26]:
results['sig_smp_wavPre'].shape

(1, 90480)

In [33]:
results['sig_frm_fft'] = fftFilterbankFunc(parFft, results['sig_frm_audBuffers'])

In [35]:
results['sig_frm_fft'].shape

(128, 4524)

In [36]:
results['sig_frm_hilbert'] = hilbertEnvelopeFunc(parHilbert, results['sig_frm_fft']) 

  env[i,:] = np.log2(env[i,:]);


In [37]:
results['sig_frm_hilbert'].shape

(15, 4524)

In [38]:
results['sig_frm_energy'] = channelEnergyFunc(parEnergy, results['sig_frm_fft'],
                                                  results['agc']['smpGain'])

In [39]:
results['sig_frm_energy'].shape

(15, 4524)

In [41]:
results['sig_frm_gainNr'] = noiseReductionFunc(parNoiseReduction, results['sig_frm_energy'])[0]

  logA = np.maximum(-100,20*np.log10(A))


In [42]:
results['sig_frm_gainNr'].shape

(15, 4524)

In [43]:
results['sig_frm_hilbertMod'] = results['sig_frm_hilbert'] + results[
    'sig_frm_gainNr']

In [44]:
results['sig_3frm_fft'] = results['sig_frm_fft'][:, 2::3]

In [45]:
results['sig_3frm_fft'].shape

(128, 1508)

In [46]:
results['sig_frm_hilbertMod'].shape

(15, 4524)

In [48]:
results['sig_3frm_fft'] = results['sig_frm_fft'][:, 2::3]

# find spectral peaks
results['sig_3frm_peakFreq'], results['sig_3frm_peakLoc'] = specPeakLocatorFunc(parPeak, results['sig_3frm_fft'])

# upsample back to full framerate (and add padding)
results['sig_frm_peakFreq'] = np.repeat(np.repeat(results['sig_3frm_peakFreq'], 1, axis=0), 3, axis=1)
results['sig_frm_peakFreq'] = np.concatenate(
    (np.zeros((results['sig_frm_peakFreq'].shape[0], 2)), results['sig_frm_peakFreq']), axis=1)
results['sig_frm_peakFreq'] = results['sig_frm_peakFreq'][:, :results['sig_frm_fft'].shape[1]]
results['sig_frm_peakLoc'] = np.repeat(np.repeat(results['sig_3frm_peakLoc'], 1, axis=0), 3, axis=1)
results['sig_frm_peakLoc'] = np.concatenate(
    (np.zeros((results['sig_frm_peakLoc'].shape[0], 2)), results['sig_frm_peakLoc']), axis=1)
results['sig_frm_peakLoc'] = results['sig_frm_peakLoc'][:, :results['sig_frm_fft'].shape[1]]

# Calculate current steering weights and synthesize the carrier signals
results['sig_frm_steerWeights'] = currentSteeringWeightsFunc(parSteer, results[
    'sig_frm_peakLoc'])  # steer current based on peak location
results['sig_ft_carrier'], results['sig_ft_idxFtToFrm'] = carrierSynthesisFunc(parCarrierSynth, results[
    'sig_frm_peakFreq'])  # carrier synthesis based on peak frequencies

# map to f120 stimulation strategy
results['sig_ft_ampWords'] = f120MappingFunc(parMapper, results['sig_ft_carrier'],
                                             # combine envelopes, carrier, current steering weights and compute outputs
                                             results['sig_frm_hilbertMod'], results['sig_frm_steerWeights'],
                                             results['sig_ft_idxFtToFrm'])

# convert amplitude words to simulated electrodogram for vocoder imput
results['elGram'] = f120ElectrodogramFunc(parElectrodogram, results['sig_ft_ampWords'])

In [49]:
results['elGram'].shape

(16, 288872)

In [50]:
results['sig_ft_ampWords'].shape

(30, 9629)

In [54]:
results['sig_ft_carrier'].shape

(15, 9629)

In [56]:
results['elGram'].shape

(16, 288872)

# How does the input shape transoform to the output shape?

In [58]:
input_wav_shape=results['sig_smp_wavIn'].shape
output_elgram_shape=results['elGram'].shape

In [59]:
implant_internal_audio_rate=17400
output_elgram_rate=55556

(16, 288872)

In [62]:
results

{'sig_smp_wavIn': array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
         -1.77670478e-05, -6.26945669e-06, -9.32084686e-06]]),
 'sourceName': '/Users/nicholas.rossi/Documents/Personal/Cochlear_Implant_Hackathon/sample_data/sentence1_55_clean.wav',
 'sig_smp_wavPre': array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
         -3.23234051e-06,  7.20988513e-06,  1.60774174e-06]]),
 'agc': {'Env': array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         2.46788149e-06, 2.51419197e-06, 2.96150599e-06]),
  'CSlow': array([0.00067914, 0.00067914, 0.00067914, ..., 0.00067914, 0.00067914,
         0.00067914]),
  'CFast': array([0.00049505, 0.00049015, 0.0004853 , ..., 0.00021476, 0.00021476,
         0.00021476]),
  'C': array([0.00067914, 0.00067914, 0.00067914, ..., 0.00067914, 0.00067914,
         0.00067914]),
  'G': array([120.09231315, 120.09231315, 120.09231315, ..., 120.09231315,
         120.09231315, 120.09231315]),
  'Hold': array([0., 0., 

In [63]:
results['sig_smp_wavIn']

array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
        -1.77670478e-05, -6.26945669e-06, -9.32084686e-06]])