In [None]:
import sys
import numpy as np
import os                                             # for reading files from disk.
import random as rd                                   # for the Gaussian Montecarlo.

from scipy.io import wavfile                          # for reading sound files.

sq = lambda x: x*x
deltaT = 0.2
lim1 = 5; lim2 = 305
noisy = 0.05   # We have a very clean speech sample!

def getData(fil):
  sampFreq, snd = wavfile.read(fil)
  snd = snd/2E15                         # put in range (-1,1)
  duration = snd.shape[0]/sampFreq
  numChunks = int(duration/deltaT)
  sizeChunk = int(len(snd)/numChunks)
  
  # Frequencies.
  freqs  = np.fft.rfftfreq(sizeChunk,deltaT)
  chunksF = []
  for lp in range(0,numChunks):    
    chunk = snd[lp*sizeChunk:(lp+1)*sizeChunk]      # get a chunk of speech.     
    chunksF.append(1E9*np.abs(np.fft.rfft(chunk)))  # take the fft,
                                                    # conveniently normalized.  
  mu = np.mean(chunksF)
  newMean = 0.
  ctr = 0
  for i in range(0,numChunks):
    for j in range(lim1,lim2):
      if abs(chunksF[i][j]) > noisy*mu:         # ignore silent portions.
        newMean += chunksF[i][j]
        ctr += 1

  # Delete portions of the sound file when the user is not speaking.
  mu = newMean/ctr
  zeros = []
  for lp in range(0,numChunks): 
    if np.mean(chunksF[lp]) < noisy*mu:  zeros.append(lp)

  data = []
  ctr = 0
  for i in range(0,numChunks):
    if i in zeros: continue                        # silent part.
    tmp = []
    for j in range(lim1,lim2): tmp.append(chunksF[i][j])
    data.append(tmp)
  
  return data

def writeData(data,outFile):

  # Write the speech waveform to file, for plotting purposes.
  
  l = len(data[0])
  f = open(outFile, "w")
  for i in range(0,len(data)):
    for j in range(0,l):
      f.write(str(i) + " " + str(j) + " " + str(data[i][j]) + "\n")
    f.write("\n")
  f.close()    


def getSamples(folders):

  # Each sound file is divided into a number of samples.
  samples = []
  for folder in folders:
    tmp = [v for v in os.listdir(folder) if v <> ".DS_Store"]
    samples.append(tmp)

  # Now samples[0] contains all the files for speaker 1,
  # samples[1] contains all the files for speaker 2, and so on.
  
  speechSpeakers = []
  ctr = 0
  for sample in samples:
    speechSpeaker = []
    for file in sample:
      lsts = getData(folders[ctr]+file)
      for lst in lsts: speechSpeaker.append(lst)
    speechSpeakers.append(speechSpeaker)
    ctr += 1

  return speechSpeakers
      
def XY(speech,numFeatures,N):      
  
  tX = np.mat(np.zeros((N,numFeatures)))
  tY = []
  ctr = 0
  for i in range(0,len(speech)):
    for j in range(0,len(speech[i])):
      for k in range(0,numFeatures):
        tX[ctr,k] = speech[i][j][k]
      ctr += 1  
      tY.append(i)            
      
  return tX,tY
        

In [None]:

trainingFoldersList = [    "samples/train/speaker1/",
                           "samples/train/speaker2/",
                           "samples/train/speaker3/",
                           "samples/train/speaker4/",
                           "samples/train/speaker5/",
                           "samples/train/speaker6/",
                           "samples/train/speaker7/",
                           "samples/train/speaker8/",
                           "samples/train/speaker9/",
                           "samples/train/speaker10/"                           
                        ]

tstFoldersList = [   "samples/test/speaker1/",
                     "samples/test/speaker2/",
                     "samples/test/speaker3/",
                     "samples/test/speaker4/",
                     "samples/test/speaker5/",
                     "samples/test/speaker6/",
                     "samples/test/speaker7/",
                     "samples/test/speaker8/",
                     "samples/test/speaker9/",
                     "samples/test/speaker10/"                     
                   ]

# Choose which speakers to work with   
whichSpeakers = [0,1,2,3,4,5,6,7,8,9]   # in this case, all 10. 
numSpeakers = len(whichSpeakers)


trainingFolders = [trainingFoldersList[speaker] for speaker in whichSpeakers]  
speech = getSamples(trainingFolders)  
numSpeakers = len(whichSpeakers)
    
N1 = 0; numFeatures = lim2-lim1
for idx in range(0,len(speech)):
  N1 += len(speech[idx])
trainX,trainY = XY(speech,numFeatures,N1)    

minArray = []; maxArray = []
for i in range(0,numFeatures):
  minArray.append(np.min(trainX[:,i]))
  maxArray.append(np.max(trainX[:,i]))    

f = open("min_max_values.dat", "w")
for i in range(0,numFeatures):
  f.write(str(maxArray[i]) + " " + str(minArray[i]) + "\n")
f.close()      
    
for i in range(0,N1):
  for j in range(0,numFeatures):
    trainX[i,j] = (trainX[i,j]-minArray[j])/(maxArray[j]-minArray[j])  
       
tstFolders      = [tstFoldersList[speaker] for speaker in whichSpeakers]          
speech = getSamples(tstFolders)
N2 = 0; 
for idx in range(0,len(speech)):
  N2 += len(speech[idx])
  
tstX,tstY = XY(speech,numFeatures,N2)
for i in range(0,N2):
  for j in range(0,numFeatures):
    tstX[i,j] = (tstX[i,j]-minArray[j])/(maxArray[j]-minArray[j])  

trainLabels = np.mat(np.zeros((len(trainY),numSpeakers)))
tstLabels = np.mat(np.zeros((len(tstY),numSpeakers)))

for i in range(0,len(trainY)):
  for j in range(0,numSpeakers):
    if trainY[i] == j: trainLabels[i,j] = 1
    else: trainLabels[i,j] = 0

for i in range(0,len(tstY)):
  for j in range(0,numSpeakers):
    if tstY[i] == j: tstLabels[i,j] = 1
    else: tstLabels[i,j] = 0


In [None]:
def tstClassifier(nn,tstX,tstY,ss):

  limits = [0]  
  for lp in range(1,numSpeakers):
    for i in range(0,len(tstY)):
      if tstY[i] == lp:  break
    limits.append(i)
  limits.append(len(tstY))
    
  answers = []  
  yes = 0; total = 0  
  step = ss
  for speaker in range(0,numSpeakers):
    for lp in range(limits[speaker],limits[speaker+1]-step,step):
      pred = []
      for x in range(lp,lp+step):
        A = [tstX[x,v] for v in range(0,numFeatures)]
        res = list(nn.activate(A))
        pred.append(res.index(np.max(res)))

      ctr = {}
      for v in pred:
        if v not in ctr: ctr[v] = 1
        else: ctr[v] += 1
      kys = ctr.keys()
      m = 0; best = 0
      for ky in kys:
        if ctr[ky] > m:
          m = ctr[ky]
          best = ky
    
      if best == tstY[lp]: yes += 1
      total += 1

    #print speaker,yes*1./total            
    answers.append(yes*1./total)

  return answers



In [None]:
from pybrain.datasets            import ClassificationDataSet
from pybrain.tools.shortcuts     import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules   import SoftmaxLayer
from pybrain.tools.xml.networkreader import NetworkReader
from pybrain.tools.xml.networkwriter import NetworkWriter

from pybrain.structure import FullConnection
from pybrain.structure import SigmoidLayer
from pybrain.structure import FeedForwardNetwork
from pybrain.structure import LinearLayer
from pybrain.structure import SoftmaxLayer
from pybrain.structure import TanhLayer

numClasses = len(speech)
numHiddenNodes = 10       # 1 hidden layer, 8 neurons.
numHiddenLayers = 5
possibleOutputs = 1      # Only 1 output: 
numTrainingEpochs = 30  # Train 100 times.

trndata = ClassificationDataSet(numFeatures, possibleOutputs, nb_classes=numClasses)
ctr = 0
for row in range(0,N1):
  tempListOut = []; tempListIn = []
  tempListOut.append(int(trainY[row]))
  for i in range(0,numFeatures): 
    tempListIn.append(trainX[row,i])
    
  trndata.addSample(tempListIn,tempListOut)
trndata._convertToOneOfMany()

first = False
nnFile = "nn17.xml"
st = 18
if first:
  nn = FeedForwardNetwork()
  inputLayer = LinearLayer(numFeatures)
  nn.addInputModule(inputLayer)
  hiddenLayers = []
  for x in range(numHiddenLayers):
    hiddenLayer = SigmoidLayer(numHiddenNodes)
    nn.addModule(hiddenLayer)
    hiddenLayers.append(hiddenLayer)
  outputLayer = SoftmaxLayer(len(speech))
  nn.addOutputModule(outputLayer)
  
  inputConnection = FullConnection(inputLayer,hiddenLayers[0])
  outputConnection = FullConnection(hiddenLayers[numHiddenLayers-1],outputLayer)
  for x in range(numHiddenLayers-1):
    connect = FullConnection(hiddenLayers[x],hiddenLayers[x-1])
    nn.addConnection(connect)
  nn.addConnection(inputConnection)
  nn.addConnection(outputConnection)
  nn.sortModules()

else:
  nn = NetworkReader.readFrom(nnFile)
    
trainer = BackpropTrainer(nn, dataset=trndata, momentum=0., verbose=True, weightdecay=0.)  
for i in range(numTrainingEpochs): 
  trainer.trainOnDataset(dataset=trndata)    
  if (i+1)%5 == 0:
    A = tstClassifier(nn,tstX,tstY)
    print np.mean(A)
    NetworkWriter.writeToFile(nn, "nn" + str(st)+".xml")     
    st += 1
# nn20


In [None]:
nn = NetworkReader.readFrom("trainedNet.xml")    
for st in range(1,31):
  A = tstClassifier(nn,tstX,tstY,st)
  print st*0.2,np.mean(A)
