Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 0 additions & 27 deletions Package.resolved

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ let package = Package(
dependencies: [
// Dependencies declare other packages that this package depends on.
.package(url: "https://github.com/apple/swift-numerics", from: "1.0.0"),
.package(url: "https://github.com/ordo-one/TextTable", .upToNextMajor(from: "0.0.1")),
.package(url: "https://github.com/apple/swift-docc-plugin", from: "1.1.0"),
.package(url: "https://github.com/SwiftPackageIndex/SPIManifest.git", from: "0.12.0"),
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
Expand All @@ -33,7 +31,6 @@ let package = Package(
name: "Histogram",
dependencies: [
.product(name: "Numerics", package: "swift-numerics"),
.product(name: "TextTable", package: "TextTable"),
]),
.executableTarget(
name: "HistogramExample",
Expand Down
158 changes: 85 additions & 73 deletions Sources/Histogram/Histogram.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
// http://www.apache.org/licenses/LICENSE-2.0
//

// swiftlint:disable file_length type_body_length line_length identifier_name

import Foundation
import Numerics
import TextTable

/**
* Number of significant digits for values recorded in histogram.
Expand Down Expand Up @@ -49,7 +51,6 @@ public enum HistogramOutputFormat {
* they are encountered. Note that recording calls that cause auto-resizing may take longer to execute, as resizing
* incurs allocation and copying of internal data structures.
*/

public struct Histogram<Count: FixedWidthInteger> {
/// The lowest value that can be discerned (distinguished from 0) by the histogram.
public let lowestDiscernibleValue: UInt64
Expand All @@ -69,17 +70,13 @@ public struct Histogram<Count: FixedWidthInteger> {
// Biggest value that can fit in bucket 0
let subBucketMask: UInt64

@usableFromInline
var maxValue: UInt64 = 0
@usableFromInline var maxValue: UInt64 = 0

@usableFromInline
var minNonZeroValue: UInt64 = .max
@usableFromInline var minNonZeroValue: UInt64 = .max

@usableFromInline
var counts: [Count]
@usableFromInline var counts: [Count]

@usableFromInline
var _totalCount: UInt64 = 0
@usableFromInline var _totalCount: UInt64 = 0

/// Total count of all recorded values in the histogram
public var totalCount: UInt64 { _totalCount }
Expand All @@ -88,7 +85,7 @@ public struct Histogram<Count: FixedWidthInteger> {
public let numberOfSignificantValueDigits: SignificantDigits

/// Control whether or not the histogram can auto-resize and auto-adjust its ``highestTrackableValue``.
public var autoResize: Bool = false
public var autoResize = false

let subBucketHalfCountMagnitude: UInt8

Expand Down Expand Up @@ -167,7 +164,7 @@ public struct Histogram<Count: FixedWidthInteger> {
// fits in 62 bits is debatable, and it makes it harder to work through the logic.
// Sums larger than 64 are totally broken as leadingZeroCountBase would go negative.
precondition(unitMagnitude + subBucketHalfCountMagnitude <= 61,
"Invalid arguments: Cannot represent numberOfSignificantValueDigits worth of values beyond lowestDiscernibleValue")
"Invalid arguments: Cannot represent numberOfSignificantValueDigits worth of values beyond lowestDiscernibleValue")

// Establish leadingZeroCountBase, used in bucketIndexForValue() fast path:
// subtract the bits that would be used by the largest value in bucket 0.
Expand Down Expand Up @@ -226,8 +223,12 @@ public struct Histogram<Count: FixedWidthInteger> {
return false
}

if index >= counts.count && autoResize {
resize(newHighestTrackableValue: value)
if index >= counts.count {
if autoResize {
resize(newHighestTrackableValue: value)
} else {
return false
}
}

incrementCountForIndex(index, by: count)
Expand Down Expand Up @@ -472,7 +473,7 @@ public struct Histogram<Count: FixedWidthInteger> {
* - Returns: The mean value (in value units) of the histogram data.
*/
public var mean: Double {
if (totalCount == 0) {
if totalCount == 0 {
return 0.0
}
var totalValue: Double = 0
Expand All @@ -488,7 +489,7 @@ public struct Histogram<Count: FixedWidthInteger> {
* - Returns: The standard deviation (in value units) of the histogram data.
*/
public var stdDeviation: Double {
if (totalCount == 0) {
if totalCount == 0 {
return 0.0
}

Expand All @@ -515,7 +516,7 @@ public struct Histogram<Count: FixedWidthInteger> {
/**
* Represents a value point iterated through in a Histogram, with associated stats.
*/
public struct IterationValue {
public struct IterationValue: Equatable {
/**
* The actual value level that was iterated to by the iterator.
*/
Expand All @@ -537,6 +538,15 @@ public struct Histogram<Count: FixedWidthInteger> {
*/
public let percentile: Double

/**
* The percentile level that the iterator returning this ``IterationValue`` had iterated to.
* Generally, `percentileLevelIteratedTo` will be equal to or smaller than `percentile`,
* but the same value point can contain multiple iteration levels for some iterators. E.g. a
* percentile iterator can stop multiple times in the exact same value point (if the count at
* that value covers a range of multiple percentiles in the requested percentile iteration points).
*/
public let percentileLevelIteratedTo: Double

/**
* The count of recorded values in the histogram that were added to the ``totalCountToThisValue`` as a result
* on this iteration step. Since multiple iteration steps may occur with overlapping equivalent value ranges,
Expand Down Expand Up @@ -577,7 +587,7 @@ public struct Histogram<Count: FixedWidthInteger> {

var countAtThisValue: Count = 0

private var freshSubBucket: Bool = true
private var freshSubBucket = true

init(histogram: Histogram) {
self.histogram = histogram
Expand All @@ -600,16 +610,19 @@ public struct Histogram<Count: FixedWidthInteger> {
}
}

mutating func makeIterationValueAndUpdatePrev(value: UInt64? = nil) -> IterationValue {
mutating func makeIterationValueAndUpdatePrev(value: UInt64? = nil, percentileIteratedTo: Double? = nil) -> IterationValue {
let valueIteratedTo = value ?? self.valueIteratedTo

defer {
prevValueIteratedTo = valueIteratedTo
totalCountToPrevIndex = totalCountToCurrentIndex
}

return IterationValue(value: valueIteratedTo, prevValue: prevValueIteratedTo, count: countAtThisValue,
percentile: (100.0 * Double(totalCountToCurrentIndex)) / Double(arrayTotalCount),
let percentile = (100.0 * Double(totalCountToCurrentIndex)) / Double(arrayTotalCount)

return IterationValue(
value: valueIteratedTo, prevValue: prevValueIteratedTo, count: countAtThisValue,
percentile: percentile, percentileLevelIteratedTo: percentileIteratedTo ?? percentile,
countAddedInThisIterationStep: totalCountToCurrentIndex - totalCountToPrevIndex,
totalCountToThisValue: totalCountToCurrentIndex, totalValueToThisValue: totalValueToCurrentIndex)
}
Expand Down Expand Up @@ -665,7 +678,7 @@ public struct Histogram<Count: FixedWidthInteger> {
defer {
incrementIterationLevel()
}
return impl.makeIterationValueAndUpdatePrev()
return impl.makeIterationValueAndUpdatePrev(percentileIteratedTo: percentileLevelToIterateTo)
}
impl.incrementSubBucket()
}
Expand Down Expand Up @@ -1012,69 +1025,66 @@ public struct Histogram<Count: FixedWidthInteger> {
outputValueUnitScalingRatio: Double,
percentileTicksPerHalfDistance ticks: Int = 5,
format: HistogramOutputFormat = .plainText) {
// small helper to pad strings to specific widths, for some reason "%10s"/"%10@" doesn't work in String.init(format:)
func padded(_ s: String, to: Int) -> String {
if s.count < to {
return String(repeating: " ", count: to - s.count) + s
}
return s
}

if format == .csv {
return outputPercentileDistributionCsv(to: &stream, outputValueUnitScalingRatio: outputValueUnitScalingRatio, percentileTicksPerHalfDistance: ticks)
stream.write("\"Value\",\"Percentile\",\"TotalCount\",\"1/(1-Percentile)\"\n")
} else {
stream.write("\(padded("Value", to: 12)) \(padded("Percentile", to: 14)) \(padded("TotalCount", to: 10)) \(padded("1/(1-Percentile)", to: 14))\n\n")
}

let table = TextTable<IterationValue> {
let lastLine = ($0.percentile == 100.0)
let percentileFormatString = format == .csv ?
"%.\(numberOfSignificantValueDigits.rawValue)f,%.12f,%d,%.2f\n" :
"%12.\(numberOfSignificantValueDigits.rawValue)f %2.12f %10d %14.2f\n"

return [
Column("Value" <- "%.\(self.numberOfSignificantValueDigits.rawValue)f".format(Double($0.value) / outputValueUnitScalingRatio), width: 12, align: .right),
Column("Percentile" <- "%.12f".format($0.percentile / 100.0), width: 14, align: .right),
Column("TotalCount" <- $0.totalCountToThisValue, width: 10, align: .right),
Column("1/(1-Percentile)" <- (lastLine ? "" : "%.2f".format(1.0 / (1.0 - ($0.percentile / 100.0)))), align: .right)
]
}

let data = [IterationValue](percentiles(ticksPerHalfDistance: ticks))
stream.write(table.string(for: data) ?? "unable to render percentile table")

// Calculate and output mean and std. deviation.
// Note: mean/std. deviation numbers are very often completely irrelevant when
// data is extremely non-normal in distribution (e.g. in cases of strong multi-modal
// response time distribution associated with GC pauses). However, reporting these numbers
// can be very useful for contrasting with the detailed percentile distribution
// reported by outputPercentileDistribution(). It is not at all surprising to find
// percentile distributions where results fall many tens or even hundreds of standard
// deviations away from the mean - such results simply indicate that the data sampled
// exhibits a very non-normal distribution, highlighting situations for which the std.
// deviation metric is a useless indicator.

let mean = self.mean / outputValueUnitScalingRatio
let stdDeviation = self.stdDeviation / outputValueUnitScalingRatio

stream.write(("#[Mean = %12.\(numberOfSignificantValueDigits.rawValue)f," +
" StdDeviation = %12.\(numberOfSignificantValueDigits.rawValue)f]\n").format(mean, stdDeviation))
stream.write(("#[Max = %12.\(numberOfSignificantValueDigits.rawValue)f," +
" Total count = %12d]\n").format(Double(max) / outputValueUnitScalingRatio, totalCount))
stream.write("#[Buckets = %12d, SubBuckets = %12d]\n".format(bucketCount, subBucketCount))
}

private func outputPercentileDistributionCsv<Stream: TextOutputStream>(
to stream: inout Stream,
outputValueUnitScalingRatio: Double,
percentileTicksPerHalfDistance ticks: Int = 5) {
stream.write("\"Value\",\"Percentile\",\"TotalCount\",\"1/(1-Percentile)\"\n")

let percentileFormatString = "%.\(numberOfSignificantValueDigits)f,%.12f,%d,%.2f\n"
let lastLinePercentileFormatString = "%.\(numberOfSignificantValueDigits)f,%.12f,%d,Infinity\n"
let lastLinePercentileFormatString = format == .csv ?
"%.\(numberOfSignificantValueDigits.rawValue)f,%.12f,%d,Infinity\n" :
"%12.\(numberOfSignificantValueDigits.rawValue)f %2.12f %10d\n"

for iv in percentiles(ticksPerHalfDistance: ticks) {
if iv.percentile != 100.0 {
stream.write(percentileFormatString.format(
if iv.percentileLevelIteratedTo != 100.0 {
stream.write(String(
format: percentileFormatString,
Double(iv.value) / outputValueUnitScalingRatio,
iv.percentile / 100.0,
iv.percentileLevelIteratedTo / 100.0,
iv.totalCountToThisValue,
1.0 / (1.0 - (iv.percentile / 100.0))))
1.0 / (1.0 - (iv.percentileLevelIteratedTo / 100.0))))
} else {
stream.write(lastLinePercentileFormatString.format(
stream.write(String(
format: lastLinePercentileFormatString,
Double(iv.value) / outputValueUnitScalingRatio,
iv.percentile / 100.0,
iv.percentileLevelIteratedTo / 100.0,
iv.totalCountToThisValue))
}
}

if format != .csv {
// Calculate and output mean and std. deviation.
// Note: mean/std. deviation numbers are very often completely irrelevant when
// data is extremely non-normal in distribution (e.g. in cases of strong multi-modal
// response time distribution associated with GC pauses). However, reporting these numbers
// can be very useful for contrasting with the detailed percentile distribution
// reported by outputPercentileDistribution(). It is not at all surprising to find
// percentile distributions where results fall many tens or even hundreds of standard
// deviations away from the mean - such results simply indicate that the data sampled
// exhibits a very non-normal distribution, highlighting situations for which the std.
// deviation metric is a useless indicator.
//

let mean = self.mean / outputValueUnitScalingRatio
let stdDeviation = self.stdDeviation / outputValueUnitScalingRatio
stream.write(String(format: "#[Mean = %12.\(numberOfSignificantValueDigits.rawValue)f," +
" StdDeviation = %12.\(numberOfSignificantValueDigits.rawValue)f]\n", mean, stdDeviation))
stream.write(String(format: "#[Max = %12.\(numberOfSignificantValueDigits.rawValue)f," +
" Total count = %12d]\n", Double(max) / outputValueUnitScalingRatio, totalCount))
stream.write(String(format: "#[Buckets = %12d, SubBuckets = %12d]\n", bucketCount, subBucketCount))
}
}

// MARK: Structure querying support.
Expand Down Expand Up @@ -1257,8 +1267,8 @@ public struct Histogram<Count: FixedWidthInteger> {
private static func bucketsNeededToCoverValue(_ value: UInt64, subBucketCount: Int, unitMagnitude: UInt8) -> Int {
var smallestUntrackableValue = UInt64(subBucketCount) << unitMagnitude
var bucketsNeeded = 1
while (smallestUntrackableValue <= value) {
if (smallestUntrackableValue > UInt64.max / 2) {
while smallestUntrackableValue <= value {
if smallestUntrackableValue > UInt64.max / 2 {
return bucketsNeeded + 1
}
smallestUntrackableValue <<= 1
Expand Down Expand Up @@ -1301,6 +1311,7 @@ extension Histogram: Equatable {
// resizing.
if lhs.counts.count == rhs.counts.count {
for i in 0..<lhs.counts.count {
// swiftlint:disable:next for_where
if lhs.counts[i] != rhs.counts[i] {
return false
}
Expand All @@ -1309,6 +1320,7 @@ extension Histogram: Equatable {
// Comparing the values is valid here because we have already confirmed the histograms have the same total
// count. It would not be correct otherwise.
for iv in lhs.recordedValues() {
// swiftlint:disable:next for_where
if rhs.countForValue(iv.value) != iv.count {
return false
}
Expand Down
Loading