-
Notifications
You must be signed in to change notification settings - Fork 66
/
evaluation.py
123 lines (94 loc) · 4.43 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2013 Music Technology Group - Universitat Pompeu Fabra
#
# This file is part of Gaia
#
# Gaia is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation (FSF), either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the Affero GNU General Public License
# version 3 along with this program. If not, see http://www.gnu.org/licenses/
from __future__ import absolute_import
from gaia2 import DataSet
from gaia2.utils import TextProgress
from .groundtruth import GroundTruth
from .confusionmatrix import ConfusionMatrix
import random
import logging
log = logging.getLogger('gaia2.classification.Evaluation')
def evaluate(classifier, dataset, groundTruth, confusion = None, nfold=None, verbose=True):
"""Evaluate the classifier on the given dataset and returns the confusion matrix.
Uses only the points that are in the groundTruth parameter for the evaluation.
Parameters
----------
classifier : a function which given a point returns its class
dataset : the dataset from which to get the points
groundTruth : a map from the points to classify to their respective class
"""
progress = TextProgress(len(groundTruth))
done = 0
confusion = confusion or ConfusionMatrix()
for pointId, expected in groundTruth.items():
try:
found = classifier(dataset.point(pointId))
if nfold is None:
confusion.add(expected, found, pointId)
else:
confusion.addNfold(expected, found, pointId, nfold)
except Exception as e:
log.warning('Could not classify point "%s" because %s' % (pointId, str(e)))
raise
done += 1
if verbose: progress.update(done)
return confusion
def evaluateNfold(nfold, dataset, groundTruth, trainingFunc, seed=None, *args, **kwargs):
"""Evaluate the classifier on the given dataset and returns the confusion matrix.
The evaluation is performed using n-fold cross validation.
Uses only the points that are in the groundTruth parameter for the evaluation.
Parameters
----------
nfold : the number of folds to use for the cross-validation
dataset : the dataset from which to get the points
groundTruth : a map from the points to classify to their respective class
trainingFunc : a function which will train and return a classifier given a dataset,
the groundtruth, and the *args and **kwargs arguments
"""
log.info('Doing %d-fold cross validation' % nfold)
classes = set(groundTruth.values())
progress = TextProgress(nfold, 'Evaluating fold %(current)d/%(total)d')
# get map from class to point names
iclasses = {}
for c in classes:
iclasses[c] = [ p for p in groundTruth.keys() if groundTruth[p] == c ]
random.seed(a=seed)
random.shuffle(iclasses[c])
# get folds
folds = {}
for i in range(nfold):
folds[i] = []
for c in iclasses.values():
foldsize = (len(c)-1)//nfold + 1 # -1/+1 so we take all instances into account, last fold might have fewer instances
folds[i] += c[ foldsize * i : foldsize * (i+1) ]
# build sub-datasets and run evaluation on them
confusion = None
pnames = [ p.name() for p in dataset.points() ]
for i in range(nfold):
if log.isEnabledFor(logging.INFO):
progress.update(i+1)
trainds = DataSet()
trainds.addPoints([ dataset.point(pname) for pname in pnames if pname not in folds[i] ])
traingt = GroundTruth(groundTruth.className, dict([ (p, c) for p, c in groundTruth.items() if p not in folds[i] ]))
testds = DataSet()
testds.addPoints([ dataset.point(str(pname)) for pname in folds[i] ])
testgt = GroundTruth(groundTruth.className, dict([ (p, c) for p, c in groundTruth.items() if p in folds[i] ]))
classifier = trainingFunc(trainds, traingt, *args, **kwargs)
confusion = evaluate(classifier, testds, testgt, confusion, nfold=i, verbose=False)
return confusion