22
33const { URL } = require ( 'url' )
44
5+ const HTTP2_HEADER_AUTHORITY = ':authority'
6+ const HTTP2_HEADER_SCHEME = ':scheme'
7+ const HTTP2_HEADER_PATH = ':path'
8+
9+ const PATH_REGEX = / ^ (?: [ a - z ] + : \/ \/ (?: [ ^ ? / ] + ) ) ? (?< path > \/ [ ^ ? ] * ) (?: ( \? ) .* ) ? $ /
10+
11+ const INT_SEGMENT = / ^ [ 1 - 9 ] [ 0 - 9 ] + $ / // Integer of size at least 2 (>=10)
12+ const INT_ID_SEGMENT = / ^ (? = .* [ 0 - 9 ] .* ) [ 0 - 9 . _ - ] { 3 , } $ / // Mixed string with digits and delimiters
13+ const HEX_SEGMENT = / ^ (? = .* [ 0 - 9 ] .* ) [ A - F a - f 0 - 9 ] { 6 , } $ / // Hexadecimal digits of size at least 6 with at least one decimal digit
14+ const HEX_ID_SEGMENT = / ^ (? = .* [ 0 - 9 ] .* ) [ A - F a - f 0 - 9 . _ - ] { 6 , } $ / // Mixed string with hex digits and delimiters
15+ const STRING_SEGMENT = / ^ .{ 20 , } | .* [ % & ' ( ) * + , : = @ ] .* $ / // Long string or a string containing special characters
16+
17+ /**
18+ * Extract full URL from HTTP request
19+ * @param {import('http').IncomingMessage } req
20+ * @returns {string } Full URL
21+ */
22+ function extractURL ( req ) {
23+ const headers = req . headers
24+
25+ if ( req . stream ) {
26+ return `${ headers [ HTTP2_HEADER_SCHEME ] } ://${ headers [ HTTP2_HEADER_AUTHORITY ] } ${ headers [ HTTP2_HEADER_PATH ] } `
27+ }
28+
29+ const protocol = getProtocol ( req )
30+ return `${ protocol } ://${ req . headers . host } ${ req . originalUrl || req . url } `
31+ }
32+
33+ function getProtocol ( req ) {
34+ return ( req . socket ?. encrypted || req . connection ?. encrypted ) ? 'https' : 'http'
35+ }
36+
37+ /**
38+ * Obfuscate query string
39+ *
40+ * @param {object } config
41+ * @param {string } url
42+ * @returns {string } obfuscated URL
43+ */
44+ function obfuscateQs ( config , url ) {
45+ const { queryStringObfuscation } = config
46+
47+ if ( queryStringObfuscation === false ) return url
48+
49+ const i = url . indexOf ( '?' )
50+ if ( i === - 1 ) return url
51+
52+ const path = url . slice ( 0 , i )
53+ if ( queryStringObfuscation === true ) return path
54+
55+ let qs = url . slice ( i + 1 )
56+
57+ qs = qs . replace ( queryStringObfuscation , '<redacted>' )
58+
59+ return `${ path } ?${ qs } `
60+ }
61+
62+ /**
63+ * Extract URL path from URL using regex pattern instead of Node.js URL API because:
64+ *
65+ * - Handles edge cases like malformed URLs
66+ * - Works with relative paths
67+ * - Cross tracers compatibility
68+ *
69+ * @param {string } url
70+ * @returns {string } Url path
71+ */
72+ function extractPathFromUrl ( url ) {
73+ if ( ! url ) return '/'
74+ const match = url . match ( PATH_REGEX )
75+
76+ return match ?. groups ?. path || '/'
77+ }
78+
79+ /**
80+ * Calculate http.endpoint from URL path
81+ *
82+ * @param {string } url
83+ * @returns {string } The normalized endpoint
84+ */
85+ function calculateHttpEndpoint ( url ) {
86+ const path = extractPathFromUrl ( url )
87+
88+ // Split path by '/' and filter empty elements
89+ const elements = path . split ( '/' ) . filter ( Boolean )
90+
91+ // Keep only first 8 non-empty elements
92+ const limitedElements = elements . slice ( 0 , 8 )
93+
94+ // Apply regex replacements to each element respecting this order
95+ const normalizedElements = limitedElements . map ( element => {
96+ if ( INT_SEGMENT . test ( element ) ) return '{param:int}'
97+
98+ if ( INT_ID_SEGMENT . test ( element ) ) return '{param:int_id}'
99+
100+ if ( HEX_SEGMENT . test ( element ) ) return '{param:hex}'
101+
102+ if ( HEX_ID_SEGMENT . test ( element ) ) return '{param:hex_id}'
103+
104+ if ( STRING_SEGMENT . test ( element ) ) return '{param:str}'
105+
106+ // No match
107+ return element
108+ } )
109+
110+ const endpoint = normalizedElements . length > 0
111+ ? '/' + normalizedElements . join ( '/' )
112+ : '/'
113+
114+ return endpoint
115+ }
116+
5117function filterSensitiveInfoFromRepository ( repositoryUrl ) {
6118 if ( ! repositoryUrl ) {
7119 return ''
@@ -25,4 +137,10 @@ function filterSensitiveInfoFromRepository (repositoryUrl) {
25137 }
26138}
27139
28- module . exports = { filterSensitiveInfoFromRepository }
140+ module . exports = {
141+ extractURL,
142+ obfuscateQs,
143+ calculateHttpEndpoint,
144+ filterSensitiveInfoFromRepository,
145+ extractPathFromUrl // test only
146+ }
0 commit comments