-
Notifications
You must be signed in to change notification settings - Fork 194
/
AudioAnalysing.swift
161 lines (139 loc) 路 7.01 KB
/
AudioAnalysing.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
//
// Copyright 漏 2024 Stream.io Inc. All rights reserved.
//
import AVFoundation
/// Describes an object that given an `AudioAnalysisContext` will analyse and process it in order to
/// generate a set of data points that describe some characteristics of the audio track provided from the
/// context.
protocol AudioAnalysing {
/// Analyse and process the provided context and provide data points limited to the number of the
/// targetSamples.
/// - Parameters:
/// - context: The `AudioAnalysisContext` which we want to analyse
/// - targetSamples: The number of expected data points we want the analyser to output
/// - Returns: The processed samples limited by the number of targetSamples
func analyse(
audioAnalysisContext context: AudioAnalysisContext,
for targetSamples: Int
) throws -> [Float]
}
/// An implementation of `AudioAnalysing` that processes an `AudioAnalysisContext` in order
/// to provide information for the visualisation of the audio's waveform.
final class StreamAudioWaveformAnalyser: AudioAnalysing {
private let audioSamplesExtractor: AudioSamplesExtractor
private let audioSamplesProcessor: AudioSamplesProcessor
private let audioSamplesPercentageNormaliser: AudioValuePercentageNormaliser
private let outputSettings: [String: Any]
init(
audioSamplesExtractor: AudioSamplesExtractor,
audioSamplesProcessor: AudioSamplesProcessor,
audioSamplesPercentageNormaliser: AudioValuePercentageNormaliser,
outputSettings: [String: Any]
) {
self.audioSamplesExtractor = audioSamplesExtractor
self.audioSamplesProcessor = audioSamplesProcessor
self.audioSamplesPercentageNormaliser = audioSamplesPercentageNormaliser
self.outputSettings = outputSettings
}
func analyse(
audioAnalysisContext context: AudioAnalysisContext,
for targetSamples: Int
) throws -> [Float] {
guard
let reader = try? AVAssetReader(asset: context.asset)
else {
throw AudioAnalysingError.failedToReadAsset()
}
let totalSamples = context.totalSamples
let sampleRange = 0..<totalSamples
let startTime = CMTime(value: Int64(sampleRange.lowerBound), timescale: context.asset.duration.timescale)
let duration = CMTime(value: Int64(sampleRange.count), timescale: context.asset.duration.timescale)
guard
let assetTrack = context.assetTrack
else {
throw AudioAnalysingError.failedToReadAsset()
}
let readerOutput = AVAssetReaderTrackOutput(
track: assetTrack,
outputSettings: outputSettings
)
readerOutput.alwaysCopiesSampleData = false
reader.timeRange = CMTimeRange(start: startTime, duration: duration)
reader.add(readerOutput)
/// Calculate the downsampling rate, which is the factor by which the sample rate will be reduced
/// to achieve the desired target sample rate. The channelCount variable is the number of audio
/// channels, and sampleRange.count is the number of audio samples in the selected range.
/// The max(1, ...) part ensures that the downsampling rate is always at least 1, which means that
/// the audio will be processed at the original sample rate if the target sample rate is higher than
/// the current one.
let downsamplingRate = max(1, sampleRange.count / targetSamples)
/// The filter array is a low-pass filter kernel that emphasises lower frequencies and attenuates
/// higher frequencies, to remove high-frequency noise and avoid aliasing artifacts during the
/// downsampling process.
let filter = [Float](repeating: 1.0 / Float(downsamplingRate), count: downsamplingRate)
var outputSamples = [Float]()
var sampleBuffer = Data()
// 16-bit samples
reader.startReading()
defer { reader.cancelReading() }
while reader.status == .reading {
/// Extract the audio samples from the next sample buffer read by the reader, downsample
/// them using the specified downsampling rate, and store them in the sampleBuffer variable.
/// The extractionResult variable returned by the method contains information about the
/// number of samples extracted and the length of the downsampled audio.
let extractionResult = audioSamplesExtractor.extractSamples(
from: readerOutput.copyNextSampleBuffer(),
sampleBuffer: &sampleBuffer,
downsamplingRate: downsamplingRate
)
/// Skip the current iteration of the loop if no samples were extracted from the current
/// sample buffer.
guard extractionResult.samplesToProcess > 0 else { continue }
/// Process the audio samples stored in sampleBuffer, apply the low-pass filter, downsample
/// them further, and store them in the outputSamples array.
audioSamplesProcessor.processSamples(
fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: extractionResult.samplesToProcess,
downSampledLength: extractionResult.downSampledLength,
downsamplingRate: downsamplingRate,
filter: filter
)
}
/// Process the remaining samples at the end which didn't fit into samplesPerPixel. This is
/// necessary to ensure that all audio data is processed.
let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size
if samplesToProcess > 0 {
let downSampledLength = 1
let samplesPerPixel = samplesToProcess
let filter = [Float](
repeating: 1.0 / Float(samplesPerPixel),
count: samplesPerPixel
)
audioSamplesProcessor.processSamples(
fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
downsamplingRate: samplesPerPixel,
filter: filter
)
}
guard reader.status == .completed || true else {
throw AudioAnalysingError.failedToReadAudioFile()
}
/// Return the output samples after applying a final transformation into percentages.
return audioSamplesPercentageNormaliser.normalise(outputSamples)
}
}
// MARK: - Errors
final class AudioAnalysingError: ClientError {
/// Failed to read the asset provided by the `AudioAnalysisContext`
static func failedToReadAsset(file: StaticString = #file, line: UInt = #line) -> AudioAnalysingError {
.init("Failed to read AVAsset.", file, line)
}
/// Failed to read the data from the provided Audio file
static func failedToReadAudioFile(file: StaticString = #file, line: UInt = #line) -> AudioAnalysingError {
.init("Failed to read audio file.", file, line)
}
}