forked from go-gitea/gitea
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtypesniffer_test.go
166 lines (142 loc) · 6.8 KB
/
typesniffer_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package typesniffer
import (
"bytes"
"encoding/base64"
"encoding/hex"
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
// Pre-condition: Shorter than sniffLen detects SVG.
assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType)
// Longer than sniffLen detects something else.
assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType)
}
func TestIsTextFile(t *testing.T) {
assert.True(t, DetectContentType([]byte{}).IsText())
assert.True(t, DetectContentType([]byte("lorem ipsum")).IsText())
}
func TestIsSvgImage(t *testing.T) {
assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage())
assert.True(t, DetectContentType([]byte(" <svg></svg>")).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<!-- Comment -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<!-- Multiple -->
<!-- Comments -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<!-- Multiline
Comment -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd">
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Comment -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Multiple -->
<!-- Comments -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Multline
Comment -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Multline
Comment -->
<svg></svg>`)).IsSvgImage())
// the DetectContentType should work for incomplete data, because only beginning bytes are used for detection
assert.True(t, DetectContentType([]byte(`<svg>....`)).IsSvgImage())
assert.False(t, DetectContentType([]byte{}).IsSvgImage())
assert.False(t, DetectContentType([]byte("svg")).IsSvgImage())
assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage())
assert.False(t, DetectContentType([]byte("text<svg></svg>")).IsSvgImage())
assert.False(t, DetectContentType([]byte("<html><body><svg></svg></body></html>")).IsSvgImage())
assert.False(t, DetectContentType([]byte(`<script>"<svg></svg>"</script>`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`<!-- <svg></svg> inside comment -->
<foo></foo>`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- <svg></svg> inside comment -->
<foo></foo>`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`
<!-- comment1 -->
<div>
<!-- comment2 -->
<svg></svg>
</div>
`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`
<!-- comment1
-->
<div>
<!-- comment2
-->
<svg></svg>
</div>
`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`<html><body><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg></svg></body></html>`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`<html><body><?xml version="1.0" encoding="UTF-8"?><svg></svg></body></html>`)).IsSvgImage())
}
func TestIsPDF(t *testing.T) {
pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe")
assert.True(t, DetectContentType(pdf).IsPDF())
assert.False(t, DetectContentType([]byte("plain text")).IsPDF())
}
func TestIsVideo(t *testing.T) {
mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA")
assert.True(t, DetectContentType(mp4).IsVideo())
assert.False(t, DetectContentType([]byte("plain text")).IsVideo())
}
func TestIsAudio(t *testing.T) {
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
assert.True(t, DetectContentType(mp3).IsAudio())
assert.False(t, DetectContentType([]byte("plain text")).IsAudio())
assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio())
assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText()) // test ID3 tag for plain text
assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
}
func TestDetectContentTypeFromReader(t *testing.T) {
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
st, err := DetectContentTypeFromReader(bytes.NewReader(mp3))
assert.NoError(t, err)
assert.True(t, st.IsAudio())
}
func TestDetectContentTypeOgg(t *testing.T) {
oggAudio, _ := hex.DecodeString("4f67675300020000000000000000352f0000000000007dc39163011e01766f72626973000000000244ac0000000000000071020000000000b8014f6767530000")
st, err := DetectContentTypeFromReader(bytes.NewReader(oggAudio))
assert.NoError(t, err)
assert.True(t, st.IsAudio())
oggVideo, _ := hex.DecodeString("4f676753000200000000000000007d9747ef000000009b59daf3012a807468656f7261030201001e00110001e000010e00020000001e00000001000001000001")
st, err = DetectContentTypeFromReader(bytes.NewReader(oggVideo))
assert.NoError(t, err)
assert.True(t, st.IsVideo())
}
func TestDetectFileTypeBox(t *testing.T) {
_, found := detectFileTypeBox([]byte("\x00\x00\xff\xffftypAAAA...."))
assert.False(t, found)
brands, found := detectFileTypeBox([]byte("\x00\x00\x00\x0cftypAAAA"))
assert.True(t, found)
assert.Equal(t, []string{"AAAA"}, brands)
brands, found = detectFileTypeBox([]byte("\x00\x00\x00\x10ftypAAAA....BBBB"))
assert.True(t, found)
assert.Equal(t, []string{"AAAA"}, brands)
brands, found = detectFileTypeBox([]byte("\x00\x00\x00\x14ftypAAAA....BBBB"))
assert.True(t, found)
assert.Equal(t, []string{"AAAA", "BBBB"}, brands)
_, found = detectFileTypeBox([]byte("\x00\x00\x00\x14ftypAAAA....BBB"))
assert.False(t, found)
brands, found = detectFileTypeBox([]byte("\x00\x00\x00\x13ftypAAAA....BBB"))
assert.True(t, found)
assert.Equal(t, []string{"AAAA"}, brands)
}
func TestDetectContentTypeAvif(t *testing.T) {
buf := []byte("\x00\x00\x00\x20ftypavif.......................")
st := DetectContentType(buf)
assert.Equal(t, MimeTypeImageAvif, st.contentType)
}