-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.kt
305 lines (268 loc) · 10.3 KB
/
main.kt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
@file:Suppress("DEPRECATION")
package com.jakewharton.videoswatch
import com.github.ajalt.clikt.core.CliktCommand
import com.github.ajalt.clikt.parameters.arguments.argument
import com.github.ajalt.clikt.parameters.options.convert
import com.github.ajalt.clikt.parameters.options.flag
import com.github.ajalt.clikt.parameters.options.option
import com.github.ajalt.clikt.parameters.types.int
import com.jakewharton.videoswatch.ffmpeg.AVCodec
import com.jakewharton.videoswatch.ffmpeg.AVERROR_EOF
import com.jakewharton.videoswatch.ffmpeg.AVFormatContext
import com.jakewharton.videoswatch.ffmpeg.AVMEDIA_TYPE_VIDEO
import com.jakewharton.videoswatch.ffmpeg.AVPacket
import com.jakewharton.videoswatch.ffmpeg.AV_PIX_FMT_RGB0
import com.jakewharton.videoswatch.ffmpeg.SWS_BILINEAR
import com.jakewharton.videoswatch.ffmpeg.av_dump_format
import com.jakewharton.videoswatch.ffmpeg.av_find_best_stream
import com.jakewharton.videoswatch.ffmpeg.av_frame_alloc
import com.jakewharton.videoswatch.ffmpeg.av_free
import com.jakewharton.videoswatch.ffmpeg.av_image_fill_arrays
import com.jakewharton.videoswatch.ffmpeg.av_image_get_buffer_size
import com.jakewharton.videoswatch.ffmpeg.av_packet_unref
import com.jakewharton.videoswatch.ffmpeg.av_read_frame
import com.jakewharton.videoswatch.ffmpeg.avcodec_alloc_context3
import com.jakewharton.videoswatch.ffmpeg.avcodec_free_context2
import com.jakewharton.videoswatch.ffmpeg.avcodec_open2
import com.jakewharton.videoswatch.ffmpeg.avcodec_parameters_to_context
import com.jakewharton.videoswatch.ffmpeg.avcodec_receive_frame
import com.jakewharton.videoswatch.ffmpeg.avcodec_send_packet
import com.jakewharton.videoswatch.ffmpeg.avformat_close_input2
import com.jakewharton.videoswatch.ffmpeg.avformat_find_stream_info
import com.jakewharton.videoswatch.ffmpeg.avformat_open_input
import com.jakewharton.videoswatch.ffmpeg.sws_getContext
import com.jakewharton.videoswatch.ffmpeg.sws_scale
import kotlin.system.getTimeNanos
import kotlin.time.Duration.Companion.nanoseconds
import kotlin.time.measureTime
import kotlinx.cinterop.alloc
import kotlinx.cinterop.allocArray
import kotlinx.cinterop.allocPointerTo
import kotlinx.cinterop.get
import kotlinx.cinterop.memScoped
import kotlinx.cinterop.pointed
import kotlinx.cinterop.ptr
import kotlinx.cinterop.value
import kotlinx.datetime.Clock
import kotlinx.datetime.TimeZone
import kotlinx.datetime.toLocalDateTime
import okio.FileSystem
import okio.Path.Companion.toPath
import platform.posix.EAGAIN
import platform.posix.uint8_tVar
fun main(vararg args: String) {
SwatchCommand(
clock = Clock.System,
timeZone = TimeZone.currentSystemDefault(),
outputFs = FileSystem.SYSTEM,
).main(args)
}
private class SwatchCommand(
private val clock: Clock,
private val timeZone: TimeZone,
private val outputFs: FileSystem,
) : CliktCommand(name = "video-swatch") {
private val fileName by argument(name = "VIDEO")
private val outputPng by option(metavar = "FILE").convert { it.toPath() }
private val outputTxt by option(metavar = "FILE").convert { it.toPath() }
private val cropHeight by option(metavar = "PIXELS").int()
private val cropWidth by option(metavar = "PIXELS").int()
private val debug by option().flag()
private fun debugLog(message: () -> String) {
if (debug) {
val time = clock.now().toLocalDateTime(timeZone).time.toString()
val indented = message()
.replace("\n", "\n" + " ".repeat(time.length + 3))
println("[$time] $indented")
}
}
override fun run(): Unit = closeFinallyScope {
memScoped {
val formatContextVar = allocPointerTo<AVFormatContext>()
avformat_open_input(formatContextVar.ptr, fileName, null, null).checkReturn {
"Unable to open $fileName"
}
val formatContext = formatContextVar.value!!
closer += { avformat_close_input2(formatContext) }
debugLog { "Opened input" }
avformat_find_stream_info(formatContext, null).checkReturn {
"Unable to get stream info for $fileName"
}
debugLog { "Got stream info" }
if (debug) {
av_dump_format(formatContext, 0, fileName, 0)
}
val codecVar = allocPointerTo<AVCodec>()
val videoIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_VIDEO, -1, -1, codecVar.ptr, 0).checkReturn {
"Didn't find a video stream"
}
val codec = codecVar.value!!
debugLog { "Found video stream (index: $videoIndex)" }
val codecParameters = formatContext.pointed.streams!![videoIndex]!!.pointed.codecpar!!
val codecContext = avcodec_alloc_context3(codec)
.checkAlloc("codecContext")
.scopedUseWithClose(::avcodec_free_context2)
avcodec_parameters_to_context(codecContext, codecParameters).checkReturn {
"Cannot copy parameters to context"
}
debugLog { "Parameters copied to context" }
avcodec_open2(codecContext, codec, null).checkReturn {
"Cannot open codec"
}
debugLog { "Opened codec" }
val frameWidth = codecContext.pointed.width
val frameHeight = codecContext.pointed.height
val encodedFormat = codecContext.pointed.pix_fmt
val decodedFormat = AV_PIX_FMT_RGB0
val swsContext = sws_getContext(
frameWidth,
frameHeight,
encodedFormat,
frameWidth,
frameHeight,
decodedFormat,
SWS_BILINEAR,
null,
null,
null,
).checkAlloc("swsContext")
val frameRate = codecParameters.pointed.framerate.run { num.toFloat() / den }
val bufferSize = av_image_get_buffer_size(decodedFormat, frameWidth, frameHeight, 1)
check(bufferSize == frameWidth * frameHeight * 4)
val decodedBuffer = allocArray<uint8_tVar>(bufferSize)
.checkAlloc("decodedBuffer")
val decodedFrame = av_frame_alloc()
.checkAlloc("decodedFrame")
.scopedUseWithClose(::av_free)
.pointed
av_image_fill_arrays(decodedFrame.data, decodedFrame.linesize, decodedBuffer, decodedFormat, frameWidth, frameHeight, 1)
val encodedFrame = av_frame_alloc()
.checkAlloc("encodedFrame")
.scopedUseWithClose(::av_free)
.pointed
val frameXStart = cropWidth?.let { (frameWidth - it) / 2 } ?: 0
val frameXEnd = frameXStart + (cropWidth ?: frameWidth)
val frameYStart = cropHeight?.let { (frameHeight - it) / 2 } ?: 0
val frameYEnd = frameYStart + (cropHeight ?: frameHeight)
require(frameXStart >= 0) { "Expected crop width $cropWidth <= frame width $frameWidth" }
require(frameYStart >= 0) { "Expected crop height $cropHeight <= frame height $frameHeight" }
debugLog {
"Sampling pixel rows $frameYStart..${frameYEnd - 1}, columns $frameXStart..${frameXEnd - 1}"
}
val framePixelCount = (cropWidth ?: frameWidth) * (cropHeight ?: frameHeight)
val sliceSummarizer = SliceSummarizer(framePixelCount)
var sliceRemainingFrames = frameRate
var sliceIndex = 0
var frameIndex = 0
var lastFrameIndex = frameIndex
val firstFrameTime = getTimeNanos()
var lastFrameTime = firstFrameTime
val avPacket = alloc<AVPacket>()
while (av_read_frame(formatContext, avPacket.ptr) >= 0) {
if (avPacket.stream_index == videoIndex) {
while (true) {
when (val sendPacketResult = avcodec_send_packet(codecContext, avPacket.ptr)) {
// Packet was accepted by decoder. Break to outer loop to read another.
0 -> break
// Decoder buffers are full. Continue to inner drain loop before retrying this one.
-EAGAIN -> {}
else -> throw IllegalStateException("Error sending packet to decoder: $sendPacketResult")
}
while (true) {
val receiveFrameResult: Int
val receiveFrameTook = measureTime {
receiveFrameResult = avcodec_receive_frame(codecContext, encodedFrame.ptr)
}
when (receiveFrameResult) {
0 -> {
val conversionTook = measureTime {
sws_scale(
swsContext,
encodedFrame.data,
encodedFrame.linesize,
0,
frameHeight,
decodedFrame.data,
decodedFrame.linesize,
)
}
val scanPixelsTook = measureTime {
val data = decodedFrame.data[0]!!
var frameRedSum = 0L
var frameGreenSum = 0L
var frameBlueSum = 0L
for (y in frameYStart until frameYEnd) {
val yOffset = y * frameWidth * 4
for (x in frameXStart until frameXEnd) {
val offset = yOffset + x * 4
val red = data[offset].toInt()
frameRedSum += red * red
val green = data[offset + 1].toInt()
frameGreenSum += green * green
val blue = data[offset + 2].toInt()
frameBlueSum += blue * blue
}
}
sliceSummarizer += FrameSummary(
slice = sliceIndex,
red = frameRedSum,
green = frameGreenSum,
blue = frameBlueSum,
)
}
val timeNanos = getTimeNanos()
val timeDelta = timeNanos - lastFrameTime
if (timeDelta > 1_000_000_000L) {
lastFrameTime = timeNanos
val frames = frameIndex - lastFrameIndex
val avg = frameIndex / ((timeNanos - firstFrameTime) / 1_000_000_000)
println("${frameIndex + 1} frames processed, $frames fps ($avg average)")
lastFrameIndex = frameIndex
}
debugLog {
"""
|FRAME $frameIndex
| slice index: $sliceIndex
| slice frames remaining: $sliceRemainingFrames
| receiveFrame: $receiveFrameTook
| conversion: $conversionTook
| scanPixels: $scanPixelsTook
""".trimMargin()
}
sliceRemainingFrames--
if (sliceRemainingFrames < 0) {
sliceIndex++
// Add instead of assigning to retain fractional remainder.
sliceRemainingFrames += frameRate
}
frameIndex++
}
AVERROR_EOF, -EAGAIN -> break
else -> throw IllegalStateException("Error receiving frame $receiveFrameResult")
}
}
}
}
av_packet_unref(avPacket.ptr)
}
val totalNanos = getTimeNanos() - firstFrameTime
val totalDuration = totalNanos.nanoseconds
val totalFps = frameIndex / (totalNanos / 1_000_000_000)
println()
println("${frameIndex + 1} frames, $totalFps fps, $totalDuration")
val colors = sliceSummarizer.summarize()
outputPng?.let { outputPng ->
val png = renderPng(colors)
outputFs.write(outputPng) {
write(png)
}
}
outputTxt?.let { outputTxt ->
val txt = renderTxt(colors)
outputFs.write(outputTxt) {
writeUtf8(txt)
}
}
}
}
}