In [2]:
import numpy as np
from hmmlearn import hmm
import copy
from collections import defaultdict
from sklearn.metrics import confusion_matrix

In [3]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
"""GaussianHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
      covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
      min_covar=0.001, n_components=10, n_iter=2000, params='stmc',
      random_state=None, startprob_prior=1.0, tol=0.01, transmat_prior=1.0,
      verbose=False)"""

In [4]:
def GHMM_train(dataset, states = 4, trials = 10,cov = "diag",max_iter=1000, randomSeed = 100, min_cov = 1e-3):
    rndNum = 10
    prob = -np.inf
    best_g = None
    score = 0
    np.random.seed(randomSeed)
    random_state = np.random.random(trials)
    for i in range(len(random_state)):
        state = int(random_state[i]*1000)
        g = hmm.GaussianHMM(n_components=states, covariance_type = cov, n_iter=max_iter, min_covar = min_cov)
        #print g.random_state
        g.fit(dataset)
        score = g.score(dataset)
        if score > prob:
            prob = score
            best_g = copy.deepcopy(g)
        #print "score",sum(best_g.score(imcN))
        #t = best_g.predict(imcN)
    return best_g

In [33]:
#model = GHMM_train(dataset = birds, states = 6, trials = 1,cov = "full",max_iter=1000, randomSeed = 120)

In [6]:
def GHMM_evaluate(testset, GMMmodel):
    #test set is a list of sets of name-date collections
    #GMMmodel is a dictionary of test data
    truth = []
    pred = []
    for dataset in testset:
        truth.append(dataset[0])
        test_score = -np.inf
        pred_name = None
        for modelName, model in GMMmodel.items():
            score = model.score(dataset[1])
            if score > test_score:
                test_score = score
                pred_name = modelName
        pred.append(pred_name)
    label = []
    for item in truth:
        if item not in label:
            label.append(item)
    matrix = confusion_matrix(truth, pred, labels=label)
    print ("truth: ",truth)
    print ("prediction: ", pred)
    print ("item order in table", label)
    return matrix
        
        

####testing
first, training data with 7 dimensions, magnitude and its frequency, and frequencies of 20th, 60th and 80th quantile in magnitude

In [498]:
#load in data f
trainA = np.load("project_data/DataSetWithMAS/BirdATraining.npy")
trainB = np.load("project_data/DataSetWithMAS/BirdBTraining.npy")
trainC = np.load("project_data/DataSetWithMAS/BirdCTraining.npy")
trainD = np.load("project_data/DataSetWithMAS/BirdDTraining.npy")
trainE = np.load("project_data/DataSetWithMAS/BirdETraining.npy")
trainF = np.load("project_data/DataSetWithMAS/BirdFTraining.npy")
trainG = np.load("project_data/DataSetWithMAS/BirdGTraining.npy")
trainH = np.load("project_data/DataSetWithMAS/BirdHTraining.npy")
trainset =[('A',trainA),('B',trainB),('C',trainC),('D',trainD),('E',trainE),('F',trainF),('G',trainG),('H',trainH)]

In [None]:
modelDict = dict()
for dataset in trainset:
    model = GHMM_train(dataset = dataset[1], states = 15, trials = 1,cov = "diag",max_iter=1000, randomSeed = 120, min_cov = 1e-5)
    modelDict[dataset[0]] = model

In [499]:
testA1 = np.load("project_data/DataSetWithMAS/TestA09.npy")
testA2 = np.load("project_data/DataSetWithMAS/TestA10.npy")
testB = np.load("project_data/DataSetWithMAS/TestB05.npy")
testC = np.load("project_data/DataSetWithMAS/TestC06.npy")
testD1 = np.load("project_data/DataSetWithMAS/TestD09.npy")
testD2 = np.load("project_data/DataSetWithMAS/TestD10.npy")
testE1 = np.load("project_data/DataSetWithMAS/TestE11.npy")
testE2 = np.load("project_data/DataSetWithMAS/TestE12.npy")
testF1 = np.load("project_data/DataSetWithMAS/TestF09.npy")
testF2 = np.load("project_data/DataSetWithMAS/TestF10.npy")
testG = np.load("project_data/DataSetWithMAS/TestG07.npy")
testH1 = np.load("project_data/DataSetWithMAS/TestH16.npy")
testH2 = np.load("project_data/DataSetWithMAS/TestH17.npy")
testH3 = np.load("project_data/DataSetWithMAS/TestH18.npy")

In [None]:
GHMM_evaluate(testset,modelDict)

In [None]:
#try different settings hidden states

###testing with 2 dimensions, magnitude and its frequency

In [500]:
a = [[x[0],x[1]] for x in trainA]
b = [[x[0],x[1]] for x in trainB]
c = [[x[0],x[1]] for x in trainC]
d = [[x[0],x[1]] for x in trainD]
e = [[x[0],x[1]] for x in trainE]
f = [[x[0],x[1]] for x in trainF]
g = [[x[0],x[1]] for x in trainG]
h = [[x[0],x[1]] for x in trainH]
trainset = [('A',a),('B',b),('C',c),('D',d),('E',e),('F',f),('G',g),('H',h)]

In [527]:
modelDict = dict()
for dataset in trainset:
    model = GHMM_train(dataset = dataset[1], states = 15, trials = 1,cov = "diag",max_iter=100, randomSeed = 120, min_cov = 1e-5)
    modelDict[dataset[0]] = model

In [525]:
ta1 = [[x[0],x[1]] for x in testA1]
ta2 = [[x[0],x[1]] for x in testA2]
tb = [[x[0],x[1]] for x in testB]
tc = [[x[0],x[1]] for x in testC]
td1 = [[x[0],x[1]] for x in testD1]
td2 = [[x[0],x[1]] for x in testD2]
te1 = [[x[0],x[1]] for x in testE1]
te2 = [[x[0],x[1]] for x in testE2]
tf1 = [[x[0],x[1]] for x in testF1]
tf2 = [[x[0],x[1]] for x in testF2]
tg = [[x[0],x[1]] for x in testG]
th1 = [[x[0],x[1]] for x in testH1]
th2 = [[x[0],x[1]] for x in testH2]
th3 = [[x[0],x[1]] for x in testH3]
testset = [("A",ta1),("A",ta2),("B",tb),("C",tc),("D",td1),("D",td2),("E",te1),("E",te2),("F",tf1), ("F",tf2), ("G",tg),("H", th1),("H", th2),("H", th3)]

In [526]:
GHMM_evaluate(testset,modelDict)

truth:  ['A', 'A', 'B', 'C', 'D', 'D', 'E', 'E', 'F', 'F', 'G', 'H', 'H', 'H']
prediction:  ['A', 'A', 'B', 'C', 'H', 'F', 'E', 'E', 'F', 'H', 'F', 'H', 'H', 'H']
item order in table ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']


array([[2, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 1],
       [0, 0, 0, 0, 2, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 1],
       [0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 3]])

In [None]:
#try different states

#testing with 20 dimensions, pick five peaks in each frame time, for each peak, collect its magnitude and frequency,
#and the two frequencies of magnitudes at 80th percentile of the peak value GMM models

In [423]:
trainA = np.load("project_data/DataSetWithMultipeak/BirdATraining.npy")
trainB = np.load("project_data/DataSetWithMultipeak/BirdBTraining.npy")
trainC = np.load("project_data/DataSetWithMultipeak/BirdCTraining.npy")
trainD = np.load("project_data/DataSetWithMultipeak/BirdDTraining.npy")
trainE = np.load("project_data/DataSetWithMultipeak/BirdETraining.npy")
trainF = np.load("project_data/DataSetWithMultipeak/BirdFTraining.npy")
trainG = np.load("project_data/DataSetWithMultipeak/BirdGTraining.npy")
trainH = np.load("project_data/DataSetWithMultipeak/BirdHTraining.npy")
trainset =[('A',trainA),('B',trainB),('C',trainC),('D',trainD),('E',trainE),('F',trainF),('G',trainG),('H',trainH)]

In [441]:
modelDict = dict()
for dataset in trainset:
    model = GMMHMM_train(dataset = dataset[1], mix = 5, states = 15, trials = 1,cov = "diag",max_iter=5, randomSeed = 1024)
    modelDict[dataset[0]] = model

In [459]:
testA1 = np.load("project_data/DataSetWithMultipeak/TestA09.npy")
testA2 = np.load("project_data/DataSetWithMultipeak/TestA10.npy")
testB1 = np.load("project_data/DataSetWithMultipeak/TestB05.npy")
testC1 = np.load("project_data/DataSetWithMultipeak/TestC06.npy")
testD1 = np.load("project_data/DataSetWithMultipeak/TestD09.npy")
testD2 = np.load("project_data/DataSetWithMultipeak/TestD10.npy")
testE1 = np.load("project_data/DataSetWithMultipeak/TestE11.npy")
testE2 = np.load("project_data/DataSetWithMultipeak/TestE12.npy")
testF1 = np.load("project_data/DataSetWithMultipeak/TestF09.npy")
testF2 = np.load("project_data/DataSetWithMultipeak/TestF10.npy")
testG1 = np.load("project_data/DataSetWithMultipeak/TestG07.npy")
testH1 = np.load("project_data/DataSetWithMultipeak/TestH16.npy")
testH2 = np.load("project_data/DataSetWithMultipeak/TestH17.npy")
testH3 = np.load("project_data/DataSetWithMultipeak/TestH18.npy")
testset = [('A',testA1),('A',testA2),('B',testB1),("C",testC1),("D",testD1),("D",testD2),("E",testE1),("E",testE2),("F",testF1), ("F",testF2), ("G",testG1),("H", testH1),("H", testH2),("H", testH3)]

In [443]:
GHMM_evaluate(testset,modelDict)

truth:  ['A', 'A', 'B', 'C', 'D', 'D', 'E', 'E', 'F', 'F', 'G', 'H', 'H', 'H']
prediction:  ['A', 'A', 'B', 'G', 'H', 'F', 'E', 'E', 'F', 'B', 'H', 'F', 'H', 'H']
item order in table ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']


array([[2, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1, 0, 1],
       [0, 0, 0, 0, 2, 0, 0, 0],
       [0, 1, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1, 0, 2]])

In [445]:
#####testing with 10 dimensions, pick five peaks in each frame time, magnitude and frequency

In [446]:
trainA[1]

array([  85.        ,   11.6632142 ,   11.51303269,   11.38571693,
         61.        ,    8.29771025,    8.0202668 ,    7.78114792,
         68.        ,    7.44013258,    7.38778344,    7.08121227,
        121.        ,    6.06926918,    5.63240736,    5.24867266,
        103.        ,    5.74161046,    5.64747872,    5.533117  ])

In [465]:
a = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in trainA]
b = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in trainB]
c = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in trainC]
d = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in trainD]
e = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in trainE]
f = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in trainF]
g = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in trainG]
h = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in trainH]
trainset = [('A',a),('B',b),('C',c),('D',d),('E',e),('F',f),('G',g),('H',h)]

In [473]:
print e[10:20]

[[0.0, 161.34150888612274, 640.0, 20.67457418939124, 652.0, 14.984776583449952, 648.0, 12.679452475689653, 4.0, 12.578563618606053], [0.0, 197.86671283602729, 648.0, 1.7228789905533801, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 199.99933096207931, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 200.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 200.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 200.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 200.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 200.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 199.41960069274273, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 196.91615730004878, 96.0, 2.7200174188596176, 44.0, 2.3718836834777348, 52.0, 2.3647450137082489, 58.0, 2.1432631841638741]]


In [477]:
ta1 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testA1]
ta2 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testA2]
tb1 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testB1]
tc1 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testC1]
td1 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testD1]
td2 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testD2]
te1 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testE1]
te2 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testE2]
tf1 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testF1]
tf2 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testF2]
tg1 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testG1]
th1 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testH1]
th2 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testH2]
th3 = [[x[0],x[1],x[4],x[5],x[8],x[9],x[12],x[13],x[16],x[17]] for x in testH3]
testset = [("A",ta1),("A",ta2),("B",tb1),("C",tc1),("D",td1),("D",td2),("E",te1),("E",te2),("F",tf1), ("F",tf2), ("G",tg1),("H", th1),("H", th2),("H", th3)]

In [479]:
#testset

In [480]:
modelDict = dict()
for dataset in trainset:
    model = GMMHMM_train(dataset = dataset[1], mix = 5, states = 15, trials = 1,cov = "diag",max_iter=5, randomSeed = 1024)
    modelDict[dataset[0]] = model

In [475]:
modelDict

{'A': GMMHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
     init_params='stmcw', n_components=15, n_iter=5, n_mix=5,
     params='stmcw', random_state=None, startprob_prior=1.0, tol=0.1,
     transmat_prior=1.0, verbose=False),
 'B': GMMHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
     init_params='stmcw', n_components=15, n_iter=5, n_mix=5,
     params='stmcw', random_state=None, startprob_prior=1.0, tol=0.1,
     transmat_prior=1.0, verbose=False),
 'C': GMMHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
     init_params='stmcw', n_components=15, n_iter=5, n_mix=5,
     params='stmcw', random_state=None, startprob_prior=1.0, tol=0.1,
     transmat_prior=1.0, verbose=False),
 'D': GMMHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
     init_params='stmcw', n_components=15, n_iter=5, n_mix=5,
     params='stmcw', random_state=None, startprob_prior=1.0, tol=0.1,
     transmat_prior=1.0, verbose=False),


In [316]:
a = [[x[1],x[2],x[6]] for x in trainA]
b = [[x[1],x[2],x[6]] for x in trainB]
c = [[x[1],x[2],x[6]] for x in trainC]
d = [[x[1],x[2],x[6]] for x in trainD]
e = [[x[1],x[2],x[6]] for x in trainE]
f = [[x[1],x[2],x[6]] for x in trainF]
g = [[x[1],x[2],x[6]] for x in trainG]
h = [[x[1],x[2],x[6]] for x in trainH]



In [346]:
a = [[x[0],x[1]] for x in trainA]
b = [[x[0],x[1]] for x in trainB]
c = [[x[0],x[1]] for x in trainC]
d = [[x[0],x[1]] for x in trainD]
e = [[x[0],x[1]] for x in trainE]
f = [[x[0],x[1]] for x in trainF]
g = [[x[0],x[1]] for x in trainG]
h = [[x[0],x[1]] for x in trainH]

In [347]:
trainset = [('A',a),('B',b),('C',c),('D',d),('E',e),('F',f),('G',g),('H',h)]

In [222]:
trainset =[('A',trainA),('B',trainB),('C',trainC),('D',trainD),('E',trainE),('F',trainF),('G',trainG),('H',trainH)]

In [311]:
modela = hmm.GaussianHMM(n_components=10, covariance_type="diag", n_iter=2000).fit(a)

In [315]:
modela.score(a)

-52166.264746875953

In [145]:
"""testB = np.load("project_data/DataSet2/BirdBTesting.npy")
testD1 = np.load("project_data/DataSet2/BirdDTesting.npy")
testD2 = np.load("project_data/DataSet2/BirdDTesting(2).npy")
testF1 = np.load("project_data/DataSet2/BirdFTesting(1).npy")
testF2 = np.load("project_data/DataSet2/BirdFTesting(2).npy")
testG = np.load("project_data/DataSet2/BirdGTesting.npy")
testH1 = np.load("project_data/DataSet2/BirdHTesting(1).npy")
testH2 = np.load("project_data/DataSet2/BirdHTesting(2).npy")
testH3 = np.load("project_data/DataSet2/BirdHTesting(3).npy")"""

In [226]:
testA1 = np.load("project_data/DataSetWithMAS/TestA09.npy")
testA2 = np.load("project_data/DataSetWithMAS/TestA10.npy")
testB = np.load("project_data/DataSetWithMAS/TestB05.npy")
testC = np.load("project_data/DataSetWithMAS/TestC06.npy")
testD1 = np.load("project_data/DataSetWithMAS/TestD09.npy")
testD2 = np.load("project_data/DataSetWithMAS/TestD10.npy")
testE1 = np.load("project_data/DataSetWithMAS/TestE11.npy")
testE2 = np.load("project_data/DataSetWithMAS/TestE12.npy")
testF1 = np.load("project_data/DataSetWithMAS/TestF09.npy")
testF2 = np.load("project_data/DataSetWithMAS/TestF10.npy")
testG = np.load("project_data/DataSetWithMAS/TestG07.npy")
testH1 = np.load("project_data/DataSetWithMAS/TestH16.npy")
testH2 = np.load("project_data/DataSetWithMAS/TestH17.npy")
testH3 = np.load("project_data/DataSetWithMAS/TestH18.npy")

In [321]:
ta1 = [[x[1],x[2],x[6]] for x in testA1]
ta2 = [[x[1],x[2],x[6]] for x in testA2]
tb = [[x[1],x[2],x[6]] for x in testB]
tc = [[x[1],x[2],x[6]] for x in testC]
td1 = [[x[1],x[2],x[6]] for x in testD1]
td2 = [[x[1],x[2],x[6]] for x in testD2]
te1 = [[x[1],x[2],x[6]] for x in testE1]
te2 = [[x[1],x[2],x[6]] for x in testE2]
tf1 = [[x[1],x[2],x[6]] for x in testF1]
tf2 = [[x[1],x[2],x[6]] for x in testF2]
tg = [[x[1],x[2],x[6]] for x in testG]
th1 = [[x[1],x[2],x[6]] for x in testH1]
th2 = [[x[1],x[2],x[6]] for x in testH2]
th3 = [[x[1],x[2],x[6]] for x in testH3]



In [350]:
ta1 = [[x[0],x[1]] for x in testA1]
ta2 = [[x[0],x[1]] for x in testA2]
tb = [[x[0],x[1]] for x in testB]
tc = [[x[0],x[1]] for x in testC]
td1 = [[x[0],x[1]] for x in testD1]
td2 = [[x[0],x[1]] for x in testD2]
te1 = [[x[0],x[1]] for x in testE1]
te2 = [[x[0],x[1]] for x in testE2]
tf1 = [[x[0],x[1]] for x in testF1]
tf2 = [[x[0],x[1]] for x in testF2]
tg = [[x[0],x[1]] for x in testG]
th1 = [[x[0],x[1]] for x in testH1]
th2 = [[x[0],x[1]] for x in testH2]
th3 = [[x[0],x[1]] for x in testH3]

In [351]:
testset = [("A",ta1),("A",ta2),("B",tb),("C",tc),("D",td1),("D",td2),("E",te1),("E",te2),("F",tf1), ("F",tf2), ("G",tg),("H", th1),("H", th2),("H", th3)]

In [227]:
testset = [("A",testA1),("A",testA2),("B",testB),("C",testC),("D",testD1),("D",testD2),("E",testE1),("E",testE2),("F",testF1), ("F",testF2), ("G",testG),("H", testH1),("H", testH2),("H", testH3)]

In [370]:
modelDict = dict()
for dataset in trainset:
    model = GHMM_train(dataset = dataset[1], states = 15, trials = 1,cov = "diag",max_iter=1000, randomSeed = 120, min_cov = 1e-5)
    modelDict[dataset[0]] = model

In [369]:
GHMM_evaluate(testset,modelDict)

truth:  ['A', 'A', 'B', 'C', 'D', 'D', 'E', 'E', 'F', 'F', 'G', 'H', 'H', 'H']
prediction:  ['A', 'A', 'B', 'C', 'F', 'G', 'E', 'E', 'F', 'F', 'G', 'H', 'H', 'H']
item order in table ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']


array([[2, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 0],
       [0, 0, 0, 0, 2, 0, 0, 0],
       [0, 0, 0, 0, 0, 2, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 3]])

In [18]:
def inverse_logLike_ratio(testset, GMMmodel):
    #test set is a set of name-date collections
    #GMMmodel is a dictionary of test data
    truth = testset[0]
    test_score = []
    inverseLLR = dict()
    pred_name = []
    for modelName, model in GMMmodel.items():
        score = model.score(testset[1])
        test_score.append(score)
        pred_name.append(modelName)
    #print test_score
    test_score = np.reciprocal(test_score)
    #print test_score
    for i in range(len(test_score)):
        inverseLLR[pred_name[i]] = test_score[i]*1./sum(test_score)
    return truth, inverseLLR

In [355]:
for test in testset:
    print inverse_logLike_ratio(test, modelDict)
    print 

('A', {'A': 0.31007220090030013, 'C': 0.026053744816223006, 'B': 0.2058311129303112, 'E': 0.13695679729858626, 'D': 0.00050567823371857105, 'G': 0.15834725405188668, 'F': 5.6559393254539184e-08, 'H': 0.16223315520958093})

('A', {'A': 0.2714082293134305, 'C': 0.15335308175703516, 'B': 0.10716024277427623, 'E': 0.10119020799291874, 'D': 0.077157068957780628, 'G': 0.14309610621612387, 'F': 1.5680257138322098e-05, 'H': 0.14661938273129657})

('B', {'A': 0.086648786148046178, 'C': 0.11517798584170734, 'B': 0.25506715651591477, 'E': 0.1016589360967271, 'D': 0.090879544419188746, 'G': 0.12461072843163774, 'F': 0.10283657995847237, 'H': 0.12312028258830574})

('C', {'A': 0.03105732972811091, 'C': 0.24263564159905798, 'B': 0.022904774414175506, 'E': 0.20955387224359159, 'D': 0.081376674636476742, 'G': 0.22406117791888061, 'F': 1.502469762692489e-05, 'H': 0.18839550476207975})

('D', {'A': 0.024534626723163769, 'C': 0.21684773103626098, 'B': 0.0062399159534284685, 'E': 0.20995279255105653, 'D':

In [126]:
modelF = GHMM_train(trainF, states = 6, trials = 1,cov = "full",max_iter=1000, randomSeed = 3, min_cov = 1e-4)

In [87]:
modelB = GHMM_train(trainB, states = 30, trials = 1,cov = "",max_iter=1000, randomSeed = 1)

In [92]:
modelG = GHMM_train(trainG, states = 2, trials = 1,cov = "full",max_iter=1000, randomSeed = 1)

In [433]:
def GMMHMM_train(dataset, mix = 3, states = 4, trials = 10,cov = "diag",max_iter=1000, randomSeed = 100):
    rndNum = 10
    prob = -np.inf
    best_g = None
    score = 0
    np.random.seed(randomSeed)
    random_state = np.random.random(trials)
    for i in range(len(random_state)):
        state = int(random_state[i]*1000)
        g = hmm.GMMHMM(n_components=states, n_mix = mix, covariance_type = cov, n_iter=max_iter, tol=0.1)
        #print g.random_state
        g.fit(dataset)
        score = g.score(dataset)
        if score > prob:
            prob = score
            best_g = copy.deepcopy(g)
        #print "score",sum(best_g.score(imcN))
        #t = best_g.predict(imcN)
    return best_g

In [213]:
#

In [193]:
modelGMM_A = GMMHMM_train(dataset = trainA, mix = 3, states = 4, trials = 1,cov = "diag",max_iter=1000, randomSeed = 120)

In [202]:
modelGHMM_A = GHMM_train(dataset=trainA, states = 4, trials = 1,cov = "diag",max_iter=1000, randomSeed = 100, min_cov = 1e-3)

In [194]:
dir(modelGMM_A)

['__class__',
 '__delattr__',
 '__dict__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__getstate__',
 '__hash__',
 '__init__',
 '__module__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_accumulate_sufficient_statistics',
 '_check',
 '_compute_log_likelihood',
 '_compute_posteriors',
 '_decode_map',
 '_decode_viterbi',
 '_do_backward_pass',
 '_do_forward_pass',
 '_do_mstep',
 '_do_viterbi_pass',
 '_generate_sample_from_state',
 '_get_param_names',
 '_init',
 '_initialize_sufficient_statistics',
 'algorithm',
 'covariance_type',
 'covars_prior',
 'decode',
 'fit',
 'get_params',
 'gmms_',
 'init_params',
 'monitor_',
 'n_components',
 'n_iter',
 'n_mix',
 'params',
 'predict',
 'predict_proba',
 'random_state',
 'sample',
 'score',
 'score_samples',
 'set_params',
 'startprob_',
 'startprob_prior',
 'tol',
 'transmat_',
 'transmat_prior',
 'verbose']

In [205]:
modelGMM_A.score(test)

-3144.8947375598059

In [204]:
modelGHMM_A.score(testA2)

-3592.8816959576939

In [218]:
modelDict = dict()
for dataset in trainset:
    model = GMMHMM_train(dataset = dataset[1], mix = 6, states = 6, trials = 1,cov = "diag",max_iter=2000, randomSeed = 1024)
    modelDict[dataset[0]] = model

In [219]:
GHMM_evaluate(trainset,modelDict)

truth:  ['A', 'B', 'D', 'E', 'F', 'G', 'H']
prediction:  ['A', 'B', 'D', 'E', 'F', 'G', 'H']
item order in table ['A', 'B', 'D', 'E', 'F', 'G', 'H']


array([[1, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 1]])

In [220]:
GHMM_evaluate(testset,modelDict)

truth:  ['A', 'A', 'B', 'D', 'D', 'F', 'F', 'G', 'H', 'H', 'H']
prediction:  ['B', 'A', 'H', 'F', 'F', 'F', 'D', 'E', 'F', 'H', 'H']
item order in table ['A', 'B', 'D', 'F', 'G', 'H']


array([[1, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1],
       [0, 0, 0, 2, 0, 0],
       [0, 0, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 2]])

In [212]:
modelDict

{'A': GMMHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
     init_params='stmcw', n_components=6, n_iter=1000, n_mix=3,
     params='stmcw', random_state=None, startprob_prior=1.0, tol=0.01,
     transmat_prior=1.0, verbose=False),
 'B': GMMHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
     init_params='stmcw', n_components=6, n_iter=1000, n_mix=3,
     params='stmcw', random_state=None, startprob_prior=1.0, tol=0.01,
     transmat_prior=1.0, verbose=False),
 'D': GMMHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
     init_params='stmcw', n_components=6, n_iter=1000, n_mix=3,
     params='stmcw', random_state=None, startprob_prior=1.0, tol=0.01,
     transmat_prior=1.0, verbose=False),
 'E': GMMHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
     init_params='stmcw', n_components=6, n_iter=1000, n_mix=3,
     params='stmcw', random_state=None, startprob_prior=1.0, tol=0.01,
     transmat_prior=1.0, verb

In [52]:
testB = np.load("project_data/DataSet/BirdBTesting.npy")
testD1 = np.load("project_data/DataSet/BirdDTesting1.npy")
testD2 = np.load("project_data/DataSet/BirdDTesting2.npy")
testE1 = np.load("project_data/DataSet/samples11.npy")
testE2 = np.load("project_data/DataSet/samples12.npy")

In [56]:
testset = [('B',testB),('D',testD1),('D',testD2),('E',testE1),('E',testE2)]

In [88]:
GHMM_evaluate(testset,modelDict)

truth:  ['B', 'D', 'D', 'E', 'E']
prediction:  ['B', 'B', 'B', 'D', 'D']
item order in table ['B', 'D', 'E']


array([[1, 0, 0],
       [2, 0, 0],
       [0, 2, 0]])

In [81]:
sum([1,2,3])

6

In [59]:
list(set(a))

['a', 'b', 'd']

In [None]:
#load in data
birdA
birdB
birdC
birdD
birdE
birdF
birdG
birdH
#train model for each bird


In [230]:
ModelA_s2 = hmm.GaussianHMM(n_components=2, covariance_type="diag", n_iter=2000).fit(trainA)
ModelA_s4 = hmm.GaussianHMM(n_components=4, covariance_type="diag", n_iter=2000).fit(trainA)
ModelA_s6 = hmm.GaussianHMM(n_components=6, covariance_type="diag", n_iter=2000).fit(trainA)
ModelA_s8 = hmm.GaussianHMM(n_components=8, covariance_type="diag", n_iter=2000).fit(trainA)
ModelA_s10 = hmm.GaussianHMM(n_components=10, covariance_type="diag", n_iter=2000).fit(trainA)

In [231]:
ModelB_s2 = hmm.GaussianHMM(n_components=2, covariance_type="diag", n_iter=2000).fit(trainB)
ModelB_s4 = hmm.GaussianHMM(n_components=4, covariance_type="diag", n_iter=2000).fit(trainB)
ModelB_s6 = hmm.GaussianHMM(n_components=6, covariance_type="diag", n_iter=2000).fit(trainB)
ModelB_s8 = hmm.GaussianHMM(n_components=8, covariance_type="diag", n_iter=2000).fit(trainB)
ModelB_s10 = hmm.GaussianHMM(n_components=10, covariance_type="diag", n_iter=2000).fit(trainB)

In [232]:
ModelC_s2 = hmm.GaussianHMM(n_components=2, covariance_type="diag", n_iter=2000).fit(trainC)
ModelC_s4 = hmm.GaussianHMM(n_components=4, covariance_type="diag", n_iter=2000).fit(trainC)
ModelC_s6 = hmm.GaussianHMM(n_components=6, covariance_type="diag", n_iter=2000).fit(trainC)
ModelC_s8 = hmm.GaussianHMM(n_components=8, covariance_type="diag", n_iter=2000).fit(trainC)
ModelC_s10 = hmm.GaussianHMM(n_components=10, covariance_type="diag", n_iter=2000).fit(trainC)

In [233]:
ModelD_s2 = hmm.GaussianHMM(n_components=2, covariance_type="diag", n_iter=2000).fit(trainD)
ModelD_s4 = hmm.GaussianHMM(n_components=4, covariance_type="diag", n_iter=2000).fit(trainD)
ModelD_s6 = hmm.GaussianHMM(n_components=6, covariance_type="diag", n_iter=2000).fit(trainD)
ModelD_s8 = hmm.GaussianHMM(n_components=8, covariance_type="diag", n_iter=2000).fit(trainD)
ModelD_s10 = hmm.GaussianHMM(n_components=10, covariance_type="diag", n_iter=2000).fit(trainD)

In [234]:
ModelE_s2 = hmm.GaussianHMM(n_components=2, covariance_type="diag", n_iter=2000).fit(trainE)
ModelE_s4 = hmm.GaussianHMM(n_components=4, covariance_type="diag", n_iter=2000).fit(trainE)
ModelE_s6 = hmm.GaussianHMM(n_components=6, covariance_type="diag", n_iter=2000).fit(trainE)
ModelE_s8 = hmm.GaussianHMM(n_components=8, covariance_type="diag", n_iter=2000).fit(trainE)
ModelE_s10 = hmm.GaussianHMM(n_components=10, covariance_type="diag", n_iter=2000).fit(trainE)

In [235]:
ModelF_s2 = hmm.GaussianHMM(n_components=2, covariance_type="diag", n_iter=2000).fit(trainF)
ModelF_s4 = hmm.GaussianHMM(n_components=4, covariance_type="diag", n_iter=2000).fit(trainF)
ModelF_s6 = hmm.GaussianHMM(n_components=6, covariance_type="diag", n_iter=2000).fit(trainF)
ModelF_s8 = hmm.GaussianHMM(n_components=8, covariance_type="diag", n_iter=2000).fit(trainF)
ModelF_s10 = hmm.GaussianHMM(n_components=10, covariance_type="diag", n_iter=2000).fit(trainF)

In [236]:
ModelG_s2 = hmm.GaussianHMM(n_components=2, covariance_type="diag", n_iter=2000).fit(trainG)
ModelG_s4 = hmm.GaussianHMM(n_components=4, covariance_type="diag", n_iter=2000).fit(trainG)
ModelG_s6 = hmm.GaussianHMM(n_components=6, covariance_type="diag", n_iter=2000).fit(trainG)
ModelG_s8 = hmm.GaussianHMM(n_components=8, covariance_type="diag", n_iter=2000).fit(trainG)
ModelG_s10 = hmm.GaussianHMM(n_components=10, covariance_type="diag", n_iter=2000).fit(trainG)

In [237]:
ModelH_s2 = hmm.GaussianHMM(n_components=2, covariance_type="diag", n_iter=2000).fit(trainH)
ModelH_s4 = hmm.GaussianHMM(n_components=4, covariance_type="diag", n_iter=2000).fit(trainH)
ModelH_s6 = hmm.GaussianHMM(n_components=6, covariance_type="diag", n_iter=2000).fit(trainH)
ModelH_s8 = hmm.GaussianHMM(n_components=8, covariance_type="diag", n_iter=2000).fit(trainH)
ModelH_s10 = hmm.GaussianHMM(n_components=10, covariance_type="diag", n_iter=2000).fit(trainH)

In [238]:
modelDict2 = {'A': ModelA_s2,'B': ModelB_s2,'C': ModelC_s2,'D': ModelD_s2,'E': ModelE_s2,'F':ModelF_s2,'G':ModelG_s2,'H':ModelH_s2}
modelDict4 = {'A': ModelA_s4,'B': ModelB_s4,'C': ModelC_s4,'D': ModelD_s4,'E': ModelE_s4,'F':ModelF_s4,'G':ModelG_s4,'H':ModelH_s4}
modelDict6 = {'A': ModelA_s6,'B': ModelB_s6,'C': ModelC_s6,'D': ModelD_s6,'E': ModelE_s6,'F':ModelF_s6,'G':ModelG_s6,'H':ModelH_s6}
modelDict8 = {'A': ModelA_s8,'B': ModelB_s8,'C': ModelC_s8,'D': ModelD_s8,'E': ModelE_s8,'F':ModelF_s8,'G':ModelG_s8,'H':ModelH_s8}
modelDict10 = {'A': ModelA_s10,'B': ModelB_s10,'C': ModelC_s10,'D': ModelD_s10,'E': ModelE_s10,'F':ModelF_s10,'G':ModelG_s10,'H':ModelH_s10}

In [239]:
GHMM_evaluate(testset,modelDict2)

truth:  ['A', 'A', 'B', 'C', 'D', 'D', 'E', 'E', 'F', 'F', 'G', 'H', 'H', 'H']
prediction:  ['A', 'A', 'B', 'C', 'H', 'H', 'E', 'E', 'H', 'H', 'H', 'H', 'H', 'H']
item order in table ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']


array([[2, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 2],
       [0, 0, 0, 0, 2, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 2],
       [0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 3]])

In [295]:
ModelF_s4 = hmm.GaussianHMM(n_components=4, covariance_type="diag", n_iter=500)
ModelF_s4.start_prob_ = np.array([0.6, 0.3, 0.1, 0.0])
ModelF_s4.fit(trainF)

GaussianHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
      covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
      min_covar=0.001, n_components=4, n_iter=500, params='stmc',
      random_state=None, startprob_prior=1.0, tol=0.01, transmat_prior=1.0,
      verbose=False)

In [390]:
trainA = np.load("project_data/DataSetWithMultipeak/BirdATraining.npy")
trainB = np.load("project_data/DataSetWithMultipeak/BirdBTraining.npy")
trainC = np.load("project_data/DataSetWithMultipeak/BirdCTraining.npy")
trainD = np.load("project_data/DataSetWithMultipeak/BirdDTraining.npy")
trainE = np.load("project_data/DataSetWithMultipeak/BirdETraining.npy")
trainF = np.load("project_data/DataSetWithMultipeak/BirdFTraining.npy")
trainG = np.load("project_data/DataSetWithMultipeak/BirdGTraining.npy")
trainH = np.load("project_data/DataSetWithMultipeak/BirdHTraining.npy")

In [391]:
len(trainH)#('A',trainA),('B',trainB),

13604

In [392]:
trainset =[('C',trainC),('D',trainD),('E',trainE),('F',trainF),('G',trainG),('H',trainH)]

In [393]:
modelDict = dict()
for dataset in trainset:
    model = GMMHMM_train(dataset = dataset[1], mix = 5, states = 2, trials = 1,cov = "diag",max_iter=1000, randomSeed = 1024)
    modelDict[dataset[0]] = model

In [394]:
testA1 = np.load("project_data/DataSetWithMultipeak/TestA09.npy")
testA2 = np.load("project_data/DataSetWithMultipeak/TestA10.npy")
testB1 = np.load("project_data/DataSetWithMultipeak/TestB05.npy")
testC1 = np.load("project_data/DataSetWithMultipeak/TestC06.npy")
testD1 = np.load("project_data/DataSetWithMultipeak/TestD09.npy")
testD2 = np.load("project_data/DataSetWithMultipeak/TestD10.npy")
testE1 = np.load("project_data/DataSetWithMultipeak/TestE11.npy")
testE2 = np.load("project_data/DataSetWithMultipeak/TestE12.npy")
testF1 = np.load("project_data/DataSetWithMultipeak/TestF09.npy")
testF2 = np.load("project_data/DataSetWithMultipeak/TestF10.npy")
testG1 = np.load("project_data/DataSetWithMultipeak/TestG07.npy")
testH1 = np.load("project_data/DataSetWithMultipeak/TestH16.npy")
testH2 = np.load("project_data/DataSetWithMultipeak/TestH17.npy")
testH3 = np.load("project_data/DataSetWithMultipeak/TestH18.npy")

In [397]:
testset = [("C",testC1),("D",testD1),("D",testD2),("E",testE1),("E",testE2),("F",testF1), ("F",testF2), ("G",testG1),("H", testH1),("H", testH2),("H", testH3)]

In [382]:
#testset = [('F',testF1),('F',testF2),('H',testH1),('H',testH2),('H',testH3)] ("A",testA1),("A",testA2),("B",testB1),

In [398]:
GHMM_evaluate(testset,modelDict)

truth:  ['C', 'D', 'D', 'E', 'E', 'F', 'F', 'G', 'H', 'H', 'H']
prediction:  ['G', 'G', 'H', 'E', 'E', 'F', 'C', 'H', 'H', 'G', 'H']
item order in table ['C', 'D', 'E', 'F', 'G', 'H']


array([[0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1, 1],
       [0, 0, 2, 0, 0, 0],
       [1, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1, 2]])

In [400]:
for test in trainset:
    print inverse_logLike_ratio(test, modelDict)
    print 

('C', {'C': 0.21160435396762661, 'E': 0.15268102749216575, 'D': 0.1250347705462401, 'G': 0.21385977992548055, 'F': 0.16681953216834028, 'H': 0.13000053590014679})

('D', {'C': 0.13269170024918947, 'E': 0.068848024558604615, 'D': 0.21784088239870045, 'G': 0.19506816460411541, 'F': 0.18556148730190691, 'H': 0.19998974088748311})

('E', {'C': 0.84885213033657059, 'E': -0.019444551737001815, 'D': 0.013721217396212694, 'G': 0.11592691989927613, 'F': 0.021106720901140501, 'H': 0.019837563203801992})

('F', {'C': 0.14486772094705133, 'E': 0.075871377392778189, 'D': 0.19718517267727484, 'G': 0.19718986029946253, 'F': 0.19228591935041561, 'H': 0.19259994933301758})

('G', {'C': 0.16657112685568853, 'E': 0.082684605321982546, 'D': 0.15906771141005235, 'G': 0.23091391625754717, 'F': 0.18792628662212396, 'H': 0.17283635353260549})

('H', {'C': 0.11300089398864428, 'E': 0.044272921650065085, 'D': 0.19486673375752261, 'G': 0.21537072544573208, 'F': 0.1776774901818664, 'H': 0.25481123497616964})



In [404]:
prob = dict()

for t, model in modelDict.items():
    prob[t] = model.predict_proba(testE1)

In [412]:
positive = [max(x) for x in prob['E']]

In [422]:
len(testE1)

4007

In [418]:
dir(modelDict['C'])

['__class__',
 '__delattr__',
 '__dict__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__getstate__',
 '__hash__',
 '__init__',
 '__module__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_accumulate_sufficient_statistics',
 '_check',
 '_compute_log_likelihood',
 '_compute_posteriors',
 '_decode_map',
 '_decode_viterbi',
 '_do_backward_pass',
 '_do_forward_pass',
 '_do_mstep',
 '_do_viterbi_pass',
 '_generate_sample_from_state',
 '_get_param_names',
 '_init',
 '_initialize_sufficient_statistics',
 'algorithm',
 'covariance_type',
 'covars_prior',
 'decode',
 'fit',
 'get_params',
 'gmms_',
 'init_params',
 'monitor_',
 'n_components',
 'n_iter',
 'n_mix',
 'params',
 'predict',
 'predict_proba',
 'random_state',
 'sample',
 'score',
 'score_samples',
 'set_params',
 'startprob_',
 'startprob_prior',
 'tol',
 'transmat_',
 'transmat_prior',
 'verbose']