forked from WeatherGod/NNforZR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
histtools.py
61 lines (43 loc) · 2.37 KB
/
histtools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy
import scipy.stats # for scipy.stats.scoreatpercentile()
# BUG: If the data array has a lot of elements with the same value,
# then .scoreatpercentile() may report the same score for both
# the first and third quartile, which means that the interquartile
# range is zero.
def OptimalBinSize(vals) :
if (len(vals) > 4) :
binSize = 2.0*(scipy.stats.scoreatpercentile(vals, 75)
- scipy.stats.scoreatpercentile(vals, 25)) * (len(vals) ** (-1.0/3.0))
else :
StandDev = std(Values);
binSize = 3.49 * numpy.std(vals) * (len(vals) ** (-1.0/3.0))
# Don't forget to re-adjust the binsize estimate to make sure the
# size will produce equally-spaced bins for the data.
return((max(vals) - min(vals)) / max(numpy.ceil((max(vals) - min(vals)) / binSize), 1))
def OptimalBinCount(vals) :
binSize = OptimalBinSize(vals)
return(max(numpy.ceil((vals.max() - vals.min()) / binSize), 1))
# BUG: The .arange() function isn't quite doing what it claims to do.
# A temporary solution is to put the maximum value at the last
# bin, but this will likely result in a near double-sized bin
def MakeBins(vals, binSize) :
tempHold = numpy.arange(min(vals), max(vals), binSize)
tempHold[-1] = max(vals)
return(tempHold)
def Hist2d(vals1, bins1, vals2, bins2) :
lims1 = [bins1[0:(len(bins1) - 1)], bins1[1:len(bins1)]]
lims1[1][-1] += 100 *numpy.finfo(float).eps
lims2 = [bins2[0:(len(bins2) - 1)], bins2[1:len(bins2)]]
lims2[1][-1] += 100 *numpy.finfo(float).eps
# Returns a two column vector with the index location (j, i) for each value.
binLocs = zip([numpy.nonzero(numpy.logical_and(lims2[0] <= val, lims2[1] > val))[0][0] for val in vals2],
[numpy.nonzero(numpy.logical_and(lims1[0] <= val, lims1[1] > val))[0][0] for val in vals1])
n_2d = numpy.zeros((len(lims2[0]), len(lims1[0])))
for aCoord in binLocs : n_2d[aCoord] += 1
"""
(n_2d, edges1, edges2) = numpy.histogram2d(vals1, vals2, bins=[bins1, bins2])
# Returns a two column vector with the index location (j, i) for each value.
binLocs = zip([numpy.nonzero(numpy.logical_and(edges2[0] <= val, edges2[1] > val))[0][0] for val in vals2],
[numpy.nonzero(numpy.logical_and(edges1[0] <= val, edges1[1] > val))[0][0] for val in vals1])
"""
return(n_2d, binLocs)