|
| 1 | +function Get-MathML |
| 2 | +{ |
| 3 | + <# |
| 4 | + .SYNOPSIS |
| 5 | + Gets MathML |
| 6 | + .DESCRIPTION |
| 7 | + Gets MathML from a file or page |
| 8 | + .EXAMPLE |
| 9 | + MathML https://dlmf.nist.gov/2.1 |
| 10 | + .EXAMPLE |
| 11 | + MathML 'https://en.wikipedia.org/wiki/Rose_(mathematics)' |
| 12 | + .EXAMPLE |
| 13 | + MathML "<math xmlns='http://www.w3.org/1998/Math/MathML'> |
| 14 | + <semantics> |
| 15 | + <mrow> |
| 16 | + <mn>1</mn> |
| 17 | + <mo>+</mo> |
| 18 | + <mn>1</mn> |
| 19 | + <mo>=</mo> |
| 20 | + <mn>2</mn> |
| 21 | + </mrow> |
| 22 | + </semantics> |
| 23 | + </math>" |
| 24 | + #> |
| 25 | + [Alias('MathML')] |
| 26 | + param( |
| 27 | + # A url or file path that hopefully contains MathML |
| 28 | + # The response from this URL will be cached. |
| 29 | + [Parameter(ValueFromPipelineByPropertyName)] |
| 30 | + [Alias('Uri','FilePath','Fullname')] |
| 31 | + [string] |
| 32 | + $Url, |
| 33 | + |
| 34 | + # If set, will request the URL, even if it has been cached. |
| 35 | + [Parameter(ValueFromPipelineByPropertyName)] |
| 36 | + [switch] |
| 37 | + $Force, |
| 38 | + |
| 39 | + # If set, will use chromium to request the page, and will |
| 40 | + [Parameter(ValueFromPipelineByPropertyName)] |
| 41 | + [switch] |
| 42 | + $UseChromium, |
| 43 | + |
| 44 | + # The path to a chromium browser. |
| 45 | + [Parameter(ValueFromPipelineByPropertyName)] |
| 46 | + [string] |
| 47 | + $ChromiumPath = 'chromium' |
| 48 | + ) |
| 49 | + |
| 50 | + begin { |
| 51 | + if (-not $script:MathMLCache) { |
| 52 | + $script:MathMLCache = [Ordered]@{} |
| 53 | + } |
| 54 | + |
| 55 | + $mathMlPattern = [Regex]::new('<math[\s\S]+?</math>','IgnoreCase') |
| 56 | + } |
| 57 | + |
| 58 | + process { |
| 59 | + # If we have no URL |
| 60 | + if (-not $PSBoundParameters.Url) { |
| 61 | + # get any loaded MathML |
| 62 | + $mathMLValues = @($script:MathMLCache.Values.MathML) |
| 63 | + if ($mathMLValues) { |
| 64 | + # unroll each result |
| 65 | + foreach ($value in $mathMLValues) { |
| 66 | + if (-not $value) { continue } |
| 67 | + # and return non-null values |
| 68 | + $value |
| 69 | + } |
| 70 | + } |
| 71 | + return |
| 72 | + } |
| 73 | + |
| 74 | + # If we have not yet cached this URL, or we are using the `-Force` |
| 75 | + if (-not $script:MathMLCache["$url"] -or $Force) { |
| 76 | + # Create a cache object |
| 77 | + $script:MathMLCache["$url"] = [Ordered]@{ |
| 78 | + Response = |
| 79 | + # If the URL could be XML |
| 80 | + if ($url -as [xml]) { |
| 81 | + # use that as the source. |
| 82 | + ($url -as [xml]).OuterXml |
| 83 | + } |
| 84 | + # If the URL was actually a file path |
| 85 | + elseif (Test-Path $url) |
| 86 | + { |
| 87 | + # get it's content. |
| 88 | + Get-Content -Raw $Url |
| 89 | + } |
| 90 | + # If we are not using chromium, |
| 91 | + elseif (-not $UseChromium) |
| 92 | + { |
| 93 | + # use Invoke-RestMethod to get the URL |
| 94 | + Invoke-RestMethod $url |
| 95 | + } |
| 96 | + # If we are using chromium |
| 97 | + else |
| 98 | + { |
| 99 | + # Call chromium in headless mode and dump DOM |
| 100 | + & $ChromiumPath --headless --disable-gpu --no-sandbox --dump-dom "$url" *>&1 | |
| 101 | + # strip out any chromium trace messages |
| 102 | + Where-Object { $_ -notmatch '^\[\d+:\d+' } | |
| 103 | + # and stringify the whole response. |
| 104 | + Out-String -Width 1mb |
| 105 | + } |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + # If we have a response for this URL, but no MathML yet |
| 110 | + if ( |
| 111 | + $script:MathMLCache["$url"].Response -and -not |
| 112 | + $script:MathMLCache["$url"].MathML |
| 113 | + ) { |
| 114 | + $script:MathMLCache["$url"].MathML = |
| 115 | + # find any matches for our pattern |
| 116 | + @(foreach ($match in $mathMlPattern.Matches("$( |
| 117 | + $script:MathMLCache["$url"].Response |
| 118 | + )")) { |
| 119 | + # and cast them into XML. |
| 120 | + $matchXml = $match.Value -as [xml] |
| 121 | + |
| 122 | + if (-not $matchXML) { continue } |
| 123 | + # If they do not have the xml namespace |
| 124 | + if (-not $matchXML.math.xmlns) { |
| 125 | + # add it |
| 126 | + $matchXML.math.setAttribute('xmlns', 'http://www.w3.org/1998/Math/MathML') |
| 127 | + } |
| 128 | + # decorate the return as MathML |
| 129 | + $matchXml.pstypenames.insert(0, 'MathML') |
| 130 | + # and output it to the cache |
| 131 | + $matchXml |
| 132 | + }) |
| 133 | + |
| 134 | + } |
| 135 | + |
| 136 | + # Last but not least, output any MathML objects in the cache for this URL. |
| 137 | + $script:MathMLCache["$url"].MathML |
| 138 | + } |
| 139 | +} |
| 140 | + |
0 commit comments