diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..36bd853 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [StartAutomating] diff --git a/.github/workflows/BuildOpenXML.yml b/.github/workflows/BuildOpenXML.yml new file mode 100644 index 0000000..ce9e3bf --- /dev/null +++ b/.github/workflows/BuildOpenXML.yml @@ -0,0 +1,498 @@ + +name: Build OpenXML Module +on: + push: + pull_request: + workflow_dispatch: +jobs: + TestPowerShellOnLinux: + runs-on: ubuntu-latest + steps: + - name: InstallPester + id: InstallPester + shell: pwsh + run: | + $Parameters = @{} + $Parameters.PesterMaxVersion = ${env:PesterMaxVersion} + foreach ($k in @($parameters.Keys)) { + if ([String]::IsNullOrEmpty($parameters[$k])) { + $parameters.Remove($k) + } + } + Write-Host "::debug:: InstallPester $(@(foreach ($p in $Parameters.GetEnumerator()) {'-' + $p.Key + ' ' + $p.Value}) -join ' ')" + & {<# + .Synopsis + Installs Pester + .Description + Installs Pester + #> + param( + # The maximum pester version. Defaults to 4.99.99. + [string] + $PesterMaxVersion = '4.99.99' + ) + [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 + Install-Module -Name Pester -Repository PSGallery -Force -Scope CurrentUser -MaximumVersion $PesterMaxVersion -SkipPublisherCheck -AllowClobber + Import-Module Pester -Force -PassThru -MaximumVersion $PesterMaxVersion} @Parameters + - name: Check out repository + uses: actions/checkout@v4 + - name: RunPester + id: RunPester + shell: pwsh + run: | + $Parameters = @{} + $Parameters.ModulePath = ${env:ModulePath} + $Parameters.PesterMaxVersion = ${env:PesterMaxVersion} + $Parameters.NoCoverage = ${env:NoCoverage} + $Parameters.NoCoverage = $parameters.NoCoverage -match 'true'; + foreach ($k in @($parameters.Keys)) { + if ([String]::IsNullOrEmpty($parameters[$k])) { + $parameters.Remove($k) + } + } + Write-Host "::debug:: RunPester $(@(foreach ($p in $Parameters.GetEnumerator()) {'-' + $p.Key + ' ' + $p.Value}) -join ' ')" + & {<# + .Synopsis + Runs Pester + .Description + Runs Pester tests after importing a PowerShell module + #> + param( + # The module path. If not provided, will default to the second half of the repository ID. + [string] + $ModulePath, + # The Pester max version. By default, this is pinned to 4.99.99. + [string] + $PesterMaxVersion = '4.99.99', + + # If set, will not collect code coverage. + [switch] + $NoCoverage + ) + + $global:ErrorActionPreference = 'continue' + $global:ProgressPreference = 'silentlycontinue' + + $orgName, $moduleName = $env:GITHUB_REPOSITORY -split "/" + if (-not $ModulePath) { $ModulePath = ".\$moduleName.psd1" } + $importedPester = Import-Module Pester -Force -PassThru -MaximumVersion $PesterMaxVersion + $importedModule = Import-Module $ModulePath -Force -PassThru + $importedPester, $importedModule | Out-Host + + $codeCoverageParameters = @{ + CodeCoverage = "$($importedModule | Split-Path)\*-*.ps1" + CodeCoverageOutputFile = ".\$moduleName.Coverage.xml" + } + + if ($NoCoverage) { + $codeCoverageParameters = @{} + } + + + $result = + Invoke-Pester -PassThru -Verbose -OutputFile ".\$moduleName.TestResults.xml" -OutputFormat NUnitXml @codeCoverageParameters + + if ($result.FailedCount -gt 0) { + "::debug:: $($result.FailedCount) tests failed" + foreach ($r in $result.TestResult) { + if (-not $r.Passed) { + "::error::$($r.describe, $r.context, $r.name -join ' ') $($r.FailureMessage)" + } + } + throw "::error:: $($result.FailedCount) tests failed" + } + } @Parameters + - name: PublishTestResults + uses: actions/upload-artifact@main + with: + name: PesterResults + path: '**.TestResults.xml' + if: ${{always()}} + TagReleaseAndPublish: + runs-on: ubuntu-latest + if: ${{ success() }} + steps: + - name: Check out repository + uses: actions/checkout@v2 + - name: TagModuleVersion + id: TagModuleVersion + shell: pwsh + run: | + $Parameters = @{} + $Parameters.ModulePath = ${env:ModulePath} + $Parameters.UserEmail = ${env:UserEmail} + $Parameters.UserName = ${env:UserName} + $Parameters.TagVersionFormat = ${env:TagVersionFormat} + $Parameters.TagAnnotationFormat = ${env:TagAnnotationFormat} + foreach ($k in @($parameters.Keys)) { + if ([String]::IsNullOrEmpty($parameters[$k])) { + $parameters.Remove($k) + } + } + Write-Host "::debug:: TagModuleVersion $(@(foreach ($p in $Parameters.GetEnumerator()) {'-' + $p.Key + ' ' + $p.Value}) -join ' ')" + & {param( + [string] + $ModulePath, + + # The user email associated with a git commit. + [string] + $UserEmail, + + # The user name associated with a git commit. + [string] + $UserName, + + # The tag version format (default value: 'v$(imported.Version)') + # This can expand variables. $imported will contain the imported module. + [string] + $TagVersionFormat = 'v$($imported.Version)', + + # The tag version format (default value: '$($imported.Name) $(imported.Version)') + # This can expand variables. $imported will contain the imported module. + [string] + $TagAnnotationFormat = '$($imported.Name) $($imported.Version)' + ) + + + $gitHubEvent = if ($env:GITHUB_EVENT_PATH) { + [IO.File]::ReadAllText($env:GITHUB_EVENT_PATH) | ConvertFrom-Json + } else { $null } + + + @" + ::group::GitHubEvent + $($gitHubEvent | ConvertTo-Json -Depth 100) + ::endgroup:: + "@ | Out-Host + + if (-not ($gitHubEvent.head_commit.message -match "Merge Pull Request #(?\d+)") -and + (-not $gitHubEvent.psobject.properties['inputs'])) { + "::warning::Pull Request has not merged, skipping Tagging" | Out-Host + return + } + + + + $imported = + if (-not $ModulePath) { + $orgName, $moduleName = $env:GITHUB_REPOSITORY -split "/" + Import-Module ".\$moduleName.psd1" -Force -PassThru -Global + } else { + Import-Module $modulePath -Force -PassThru -Global + } + + if (-not $imported) { return } + + $targetVersion =$ExecutionContext.InvokeCommand.ExpandString($TagVersionFormat) + $existingTags = git tag --list + + @" + Target Version: $targetVersion + + Existing Tags: + $($existingTags -join [Environment]::NewLine) + "@ | Out-Host + + $versionTagExists = $existingTags | Where-Object { $_ -match $targetVersion } + + if ($versionTagExists) { + "::warning::Version $($versionTagExists)" + return + } + + if (-not $UserName) { $UserName = $env:GITHUB_ACTOR } + if (-not $UserEmail) { $UserEmail = "$UserName@github.com" } + git config --global user.email $UserEmail + git config --global user.name $UserName + + git tag -a $targetVersion -m $ExecutionContext.InvokeCommand.ExpandString($TagAnnotationFormat) + git push origin --tags + + if ($env:GITHUB_ACTOR) { + exit 0 + }} @Parameters + - name: ReleaseModule + id: ReleaseModule + shell: pwsh + run: | + $Parameters = @{} + $Parameters.ModulePath = ${env:ModulePath} + $Parameters.UserEmail = ${env:UserEmail} + $Parameters.UserName = ${env:UserName} + $Parameters.TagVersionFormat = ${env:TagVersionFormat} + $Parameters.ReleaseNameFormat = ${env:ReleaseNameFormat} + $Parameters.ReleaseAsset = ${env:ReleaseAsset} + $Parameters.ReleaseAsset = $parameters.ReleaseAsset -split ';' -replace '^[''"]' -replace '[''"]$' + foreach ($k in @($parameters.Keys)) { + if ([String]::IsNullOrEmpty($parameters[$k])) { + $parameters.Remove($k) + } + } + Write-Host "::debug:: ReleaseModule $(@(foreach ($p in $Parameters.GetEnumerator()) {'-' + $p.Key + ' ' + $p.Value}) -join ' ')" + & {param( + [string] + $ModulePath, + + # The user email associated with a git commit. + [string] + $UserEmail, + + # The user name associated with a git commit. + [string] + $UserName, + + # The tag version format (default value: 'v$(imported.Version)') + # This can expand variables. $imported will contain the imported module. + [string] + $TagVersionFormat = 'v$($imported.Version)', + + # The release name format (default value: '$($imported.Name) $($imported.Version)') + [string] + $ReleaseNameFormat = '$($imported.Name) $($imported.Version)', + + # Any assets to attach to the release. Can be a wildcard or file name. + [string[]] + $ReleaseAsset + ) + + + $gitHubEvent = if ($env:GITHUB_EVENT_PATH) { + [IO.File]::ReadAllText($env:GITHUB_EVENT_PATH) | ConvertFrom-Json + } else { $null } + + + @" + ::group::GitHubEvent + $($gitHubEvent | ConvertTo-Json -Depth 100) + ::endgroup:: + "@ | Out-Host + + if (-not ($gitHubEvent.head_commit.message -match "Merge Pull Request #(?\d+)") -and + (-not $gitHubEvent.psobject.properties['inputs'])) { + "::warning::Pull Request has not merged, skipping GitHub release" | Out-Host + return + } + + + + $imported = + if (-not $ModulePath) { + $orgName, $moduleName = $env:GITHUB_REPOSITORY -split "/" + Import-Module ".\$moduleName.psd1" -Force -PassThru -Global + } else { + Import-Module $modulePath -Force -PassThru -Global + } + + if (-not $imported) { return } + + $targetVersion =$ExecutionContext.InvokeCommand.ExpandString($TagVersionFormat) + $targetReleaseName = $targetVersion + $releasesURL = 'https://api.github.com/repos/${{github.repository}}/releases' + "Release URL: $releasesURL" | Out-Host + $listOfReleases = Invoke-RestMethod -Uri $releasesURL -Method Get -Headers @{ + "Accept" = "application/vnd.github.v3+json" + "Authorization" = 'Bearer ${{ secrets.GITHUB_TOKEN }}' + } + + $releaseExists = $listOfReleases | Where-Object tag_name -eq $targetVersion + + if ($releaseExists) { + "::warning::Release '$($releaseExists.Name )' Already Exists" | Out-Host + $releasedIt = $releaseExists + } else { + $releasedIt = Invoke-RestMethod -Uri $releasesURL -Method Post -Body ( + [Ordered]@{ + owner = '${{github.owner}}' + repo = '${{github.repository}}' + tag_name = $targetVersion + name = $ExecutionContext.InvokeCommand.ExpandString($ReleaseNameFormat) + body = + if ($env:RELEASENOTES) { + $env:RELEASENOTES + } elseif ($imported.PrivateData.PSData.ReleaseNotes) { + $imported.PrivateData.PSData.ReleaseNotes + } else { + "$($imported.Name) $targetVersion" + } + draft = if ($env:RELEASEISDRAFT) { [bool]::Parse($env:RELEASEISDRAFT) } else { $false } + prerelease = if ($env:PRERELEASE) { [bool]::Parse($env:PRERELEASE) } else { $false } + } | ConvertTo-Json + ) -Headers @{ + "Accept" = "application/vnd.github.v3+json" + "Content-type" = "application/json" + "Authorization" = 'Bearer ${{ secrets.GITHUB_TOKEN }}' + } + } + + + + + + if (-not $releasedIt) { + throw "Release failed" + } else { + $releasedIt | Out-Host + } + + $releaseUploadUrl = $releasedIt.upload_url -replace '\{.+$' + + if ($ReleaseAsset) { + $fileList = Get-ChildItem -Recurse + $filesToRelease = + @(:nextFile foreach ($file in $fileList) { + foreach ($relAsset in $ReleaseAsset) { + if ($relAsset -match '[\*\?]') { + if ($file.Name -like $relAsset) { + $file; continue nextFile + } + } elseif ($file.Name -eq $relAsset -or $file.FullName -eq $relAsset) { + $file; continue nextFile + } + } + }) + + $releasedFiles = @{} + foreach ($file in $filesToRelease) { + if ($releasedFiles[$file.Name]) { + Write-Warning "Already attached file $($file.Name)" + continue + } else { + $fileBytes = [IO.File]::ReadAllBytes($file.FullName) + $releasedFiles[$file.Name] = + Invoke-RestMethod -Uri "${releaseUploadUrl}?name=$($file.Name)" -Headers @{ + "Accept" = "application/vnd.github+json" + "Authorization" = 'Bearer ${{ secrets.GITHUB_TOKEN }}' + } -Body $fileBytes -ContentType Application/octet-stream + $releasedFiles[$file.Name] + } + } + + "Attached $($releasedFiles.Count) file(s) to release" | Out-Host + } + + + + } @Parameters + - name: PublishPowerShellGallery + id: PublishPowerShellGallery + shell: pwsh + run: | + $Parameters = @{} + $Parameters.ModulePath = ${env:ModulePath} + $Parameters.Exclude = ${env:Exclude} + $Parameters.Exclude = $parameters.Exclude -split ';' -replace '^[''"]' -replace '[''"]$' + foreach ($k in @($parameters.Keys)) { + if ([String]::IsNullOrEmpty($parameters[$k])) { + $parameters.Remove($k) + } + } + Write-Host "::debug:: PublishPowerShellGallery $(@(foreach ($p in $Parameters.GetEnumerator()) {'-' + $p.Key + ' ' + $p.Value}) -join ' ')" + & {param( + [string] + $ModulePath, + + [string[]] + $Exclude = @('*.png', '*.mp4', '*.jpg','*.jpeg', '*.gif', 'docs[/\]*') + ) + + $gitHubEvent = if ($env:GITHUB_EVENT_PATH) { + [IO.File]::ReadAllText($env:GITHUB_EVENT_PATH) | ConvertFrom-Json + } else { $null } + + if (-not $Exclude) { + $Exclude = @('*.png', '*.mp4', '*.jpg','*.jpeg', '*.gif','docs[/\]*') + } + + + @" + ::group::GitHubEvent + $($gitHubEvent | ConvertTo-Json -Depth 100) + ::endgroup:: + "@ | Out-Host + + @" + ::group::PSBoundParameters + $($PSBoundParameters | ConvertTo-Json -Depth 100) + ::endgroup:: + "@ | Out-Host + + if (-not ($gitHubEvent.head_commit.message -match "Merge Pull Request #(?\d+)") -and + (-not $gitHubEvent.psobject.properties['inputs'])) { + "::warning::Pull Request has not merged, skipping Gallery Publish" | Out-Host + return + } + + + $imported = + if (-not $ModulePath) { + $orgName, $moduleName = $env:GITHUB_REPOSITORY -split "/" + Import-Module ".\$moduleName.psd1" -Force -PassThru -Global + } else { + Import-Module $modulePath -Force -PassThru -Global + } + + if (-not $imported) { return } + + $foundModule = try { Find-Module -Name $imported.Name -ErrorAction SilentlyContinue} catch {} + + if ($foundModule -and (([Version]$foundModule.Version) -ge ([Version]$imported.Version))) { + "::warning::Gallery Version of $moduleName is more recent ($($foundModule.Version) >= $($imported.Version))" | Out-Host + } else { + + $gk = '${{secrets.GALLERYKEY}}' + + $rn = Get-Random + $moduleTempFolder = Join-Path $pwd "$rn" + $moduleTempPath = Join-Path $moduleTempFolder $moduleName + New-Item -ItemType Directory -Path $moduleTempPath -Force | Out-Host + + Write-Host "Staging Directory: $ModuleTempPath" + + $imported | Split-Path | + Get-ChildItem -Force | + Where-Object Name -NE $rn | + Copy-Item -Destination $moduleTempPath -Recurse + + $moduleGitPath = Join-Path $moduleTempPath '.git' + Write-Host "Removing .git directory" + if (Test-Path $moduleGitPath) { + Remove-Item -Recurse -Force $moduleGitPath + } + + if ($Exclude) { + "::notice::Attempting to Exlcude $exclude" | Out-Host + Get-ChildItem $moduleTempPath -Recurse | + Where-Object { + foreach ($ex in $exclude) { + if ($_.FullName -like $ex) { + "::notice::Excluding $($_.FullName)" | Out-Host + return $true + } + } + } | + Remove-Item + } + + Write-Host "Module Files:" + Get-ChildItem $moduleTempPath -Recurse + Write-Host "Publishing $moduleName [$($imported.Version)] to Gallery" + Publish-Module -Path $moduleTempPath -NuGetApiKey $gk + if ($?) { + Write-Host "Published to Gallery" + } else { + Write-Host "Gallery Publish Failed" + exit 1 + } + } + } @Parameters + BuildOpenXML: + runs-on: ubuntu-latest + if: ${{ success() }} + steps: + - name: Check out repository + uses: actions/checkout@main + - name: UseEZOut + uses: StartAutomating/EZOut@master +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} diff --git a/Build/OpenXML.ezout.ps1 b/Build/OpenXML.ezout.ps1 new file mode 100644 index 0000000..480ccb0 --- /dev/null +++ b/Build/OpenXML.ezout.ps1 @@ -0,0 +1,39 @@ +#requires -Module EZOut +# Install-Module EZOut or https://github.com/StartAutomating/EZOut +$myFile = $MyInvocation.MyCommand.ScriptBlock.File +$myRoot = $myFile | Split-Path | Split-Path +$myModuleName = $myFile | Split-Path | Split-Path | Split-Path -Leaf +Push-Location $myRoot +$formatting = @( + # Add your own Write-FormatView here, + # or put them in a Formatting or Views directory + foreach ($potentialDirectory in 'Formatting','Views','Types') { + Join-Path $myRoot $potentialDirectory | + Get-ChildItem -ea ignore | + Import-FormatView -FilePath {$_.Fullname} + } +) + +$destinationRoot = $myRoot + +if ($formatting) { + $myFormatFilePath = Join-Path $destinationRoot "$myModuleName.format.ps1xml" + # You can also output to multiple paths by passing a hashtable to -OutputPath. + $formatting | Out-FormatData -Module $MyModuleName -OutputPath $myFormatFilePath +} + +$types = @( + # Add your own Write-TypeView statements here + # or declare them in the 'Types' directory + Join-Path $myRoot Types | + Get-Item -ea ignore | + Import-TypeView + +) + +if ($types) { + $myTypesFilePath = Join-Path $destinationRoot "$myModuleName.types.ps1xml" + # You can also output to multiple paths by passing a hashtable to -OutputPath. + $types | Out-TypeData -OutputPath $myTypesFilePath +} +Pop-Location diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d2663c7 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,43 @@ +## OpenXML 0.1 + +* Initial Build of OpenXML Module (#1) +* Commands: + * `Get-OpenXML` (#2) + * `Import-OpenXML` (#14) + * `Export-OpenXML` (#15) + * `Close-OpenXML` (#16) + * `Copy-OpenXML` (#18) + * `Set-OpenXML` (#19) + * `Start-OpenXML` (#28) + * `Stop-OpenXML` (#29) +* Initial Extended Types + * `OpenXML` + * `OpenXML.get_Parts` (#17) + * `OpenXML.get_Created` (#23) + * `OpenXML.get_Modified` (#24) + * `OpenXML.File` + * `OpenXML.File.get_DocumentProperty` (#13) + * `OpenXML.File` default display (#7) + * `OpenXML.Excel.File` + * `OpenXML.Excel.File.get_Worksheets` (#5) + * `OpenXML.Excel.File.get_SharedString` (#25) + * `OpenXML.Excel.Worksheet` + * `OpenXML.Excel.Worksheet.get_Cell` (#6) + * `OpenXML.Excel.Worksheet.get_Formula` (#26) + * `OpenXML.PowerPoint.File` + * `OpenXML.PowerPoint.File.get_Slides` (#8) + * `OpenXML.PowerPoint.File.get_Text` (#9) + * `OpenXML.PowerPoint.Slide` + * `OpenXML.PowerPoint.get_Text` (#10) + * `OpenXML.Word.File` + * `OpenXML.Word.File.get_Text` (#11) +* Sample Documents (#3) +* Initial Tests (#27) +* Build workflow + * Building types with [EZOut](https://github.com/StartAutomating/EZOut) (#4) + * Building GitHub Workflow with [PSDevOps](https://github.com/StartAutomating/PSDevOps) (#12) +* Core Documentation + * README (#1) + * CODE_OF_CONDUCT (#20) + * CONTRIBUTING (#21) + * SECURITY (#22) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..a132093 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,9 @@ +# Code of Conduct + +We have a simple subjective code of conduct: + +1. Be Respectful +2. Be Helpful +3. Do No Harm + +Failure to follow the code of conduct may result in blocks or banishment. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..234457d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,17 @@ +# Contibuting + +Contributing code is very welcome! So is contributing [an issue](https://github.com/PowerShellWeb/OpenXML/issues) or starting a [discussion](https://github.com/PowerShellWeb/OpenXML/discussion). + +All projects are easier with community help, and this project is no different. + +## Contributing Examples + +Examples are more than welcome! To contribute an example, please open an issue describing your example and create a pull request. + +## Contributing Code + +If you would like to contribute code, please describe what you intend to do in an issue or two first. + + + + diff --git a/Commands/Close-OpenXML.ps1 b/Commands/Close-OpenXML.ps1 new file mode 100644 index 0000000..30cb9e6 --- /dev/null +++ b/Commands/Close-OpenXML.ps1 @@ -0,0 +1,32 @@ +function Close-OpenXML { + <# + .SYNOPSIS + Closes OpenXML files + .DESCRIPTION + Closes OpenXML files and streams + #> + param( + [Parameter(ValueFromPipeline)] + [PSObject] + $InputObject + ) + + process { + if (-not $InputObject) { return } + if ($InputObject -isnot [IO.Packaging.Package]) { return } + if ($InputObject.MemoryStream) { + try { + $InputObject.MemoryStream.Close() + } catch { + $PSCmdlet.WriteError($_) + } + } + + try { + $InputObject.Close() + } catch { + $PSCmdlet.WriteError($_) + } + } + +} diff --git a/Commands/Copy-OpenXML.ps1 b/Commands/Copy-OpenXML.ps1 new file mode 100644 index 0000000..0a166a5 --- /dev/null +++ b/Commands/Copy-OpenXML.ps1 @@ -0,0 +1,108 @@ +function Copy-OpenXML +{ + <# + .SYNOPSIS + Copies OpenXML + .DESCRIPTION + Copies content from one OpenXML file to another + #> + param( + # The destination path + [Parameter(ValueFromPipelineByPropertyName)] + [Alias('Destination')] + [string] + $DestinationPath, + + # The input object + [Parameter(ValueFromPipeline)] + [PSObject] + $InputObject, + + # If set, will update existing packages. + [switch] + $Force + ) + + process { + # If the input was not a package + if ($inputObject -isnot [IO.Packaging.Package]) { + $loadedPackage = # see if it is a file we can load + if ($InputObject -is [IO.FileInfo]) { + Get-OpenXML $InputObject.FullName + } elseif ($inputFile = Get-Item -ErrorAction Ignore -Path "$InputObject") { + Get-OpenXML $inputFile + } + + # If it was not, return. + if ($loadedPackage -isnot [IO.Packaging.Package]) { return } + $InputObject = $loadedPackage + } + + # Get the absolute path of the destination, without creating the file, + $unresolvedDestination = $ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath($DestinationPath) + + # then see if the file exists. + $fileExists = Test-Path $unresolvedDestination + # If it does and we are not using the -Force + if ($fileExists -and -not $force) { + # write an error + Write-Error "$unresolvedDestionation already exists, use -Force to update" -Category ResourceExists + return + } + # If it did not exist, create it with New-Item -Force + elseif (-not $fileExists) + { + # this will create intermediate paths. + $newFile = New-Item -ItemType File -Path $unresolvedDestination -Force + if (-not $newFile) { return } + } + + # Try to open or create our package for read and write. + $destinationPackage = [IO.Packaging.Package]::Open($unresolvedDestination, 'OpenOrCreate', 'ReadWrite') + + # If we could not, we are done. + if (-not $destinationPackage) { return } + + # Get the input parts and relationships + $inputPackageParts = $InputObject.GetParts() + $inputPackageRelationships = $InputObject.GetRelationships() + + # For each part in the input + foreach ($inputPart in $inputPackageParts) { + # Create or open a part in the destination + $destinationPart = + if (-not $destinationPackage.PartExists($inputPart.Uri)) { + $destinationPackage.CreatePart($inputPart.Uri, $inputPart.ContentType) + } else { + $destinationPackage.GetPart($inputPart.Uri) + } + + # and copy the streams. + $inputStream = $inputPart.GetStream() + $destinationStream = $destinationPart.GetStream() + $inputStream.CopyTo($destinationStream) + $inputStream.Close() + $destinationStream.Close() + } + + # Then, create any relationships that do not exist. + foreach ($inputRelationship in $inputPackageRelationships) { + if ($inputRelationship) { + if (-not $destinationPackage.RelationshipExists($inputRelationship.id)) { + $null = $destinationPackage.CreateRelationship( + $inputRelationship.targetUri, + $inputRelationship.targetMode, + $inputRelationship.relationshipType, + $inputRelationship.id + ) + } + } + } + + # We can now close our package, writing the file. + $destinationPackage.Close() + + # We want to open it right back up again as we output the updated file. + Get-OpenXML -FilePath $unresolvedDestination + } +} \ No newline at end of file diff --git a/Commands/Export-OpenXML.ps1 b/Commands/Export-OpenXML.ps1 new file mode 100644 index 0000000..550ebb9 --- /dev/null +++ b/Commands/Export-OpenXML.ps1 @@ -0,0 +1,38 @@ +function Export-OpenXML { + <# + .SYNOPSIS + Exports OpenXML + .DESCRIPTION + Exports loaded OpenXML to a file. + #> + [Alias('Save-OpenXML')] + param( + # The file path to save the turtle graphics pattern. + [Parameter(Mandatory,ValueFromPipelineByPropertyName)] + [Alias('Path')] + [string] + $FilePath, + + # The input object. + # This must be a package loaded with this module. + [Parameter(ValueFromPipeline)] + [PSObject] + $InputObject, + + # If set, will force the export even if a file already exists. + [switch] + $Force + ) + + process { + # If there is no input return + if (-not $InputObject) { return } + # If the input is not a package, pass it thru + if ($InputObject -isnot [IO.Packaging.Package]) { + return $InputObject + } + + Copy-OpenXML -DestinationPath $FilePath -InputObject $inputObject -force:$Force + } +} + diff --git a/Commands/Get-OpenXML.ps1 b/Commands/Get-OpenXML.ps1 new file mode 100644 index 0000000..735371c --- /dev/null +++ b/Commands/Get-OpenXML.ps1 @@ -0,0 +1,101 @@ +function Get-OpenXML +{ + <# + .SYNOPSIS + Gets Open Office XML files (Excel, PowerPoint, and Word) + .DESCRIPTION + Gets Open Office XML files (Excel, PowerPoint, and Word) as a structured object. + + The object contains the file path, parts, and relationships of the OpenXML document. + + This cmdlet can be used to read the contents of .docx, .pptx, .xps, .xlsx files + (or any files that are readable with [`IO.Packaging.Package`](https://learn.microsoft.com/en-us/dotnet/api/system.io.packaging.package?wt.mc_id=MVP_321542)) + .EXAMPLE + # Get an OpenXML document + Get-OpenXML -FilePath './Sample.docx' + #> + [CmdletBinding()] + [Alias('OpenXML')] + param( + # The path to the OpenXML file to read + [Parameter(ValueFromPipelineByPropertyName=$true)] + [Alias('Fullname')] + [string] + $FilePath + ) + + begin { + + filter openXMLFromFile { + $filePath = $_ + # Get the file info and read the file as a byte stream. + $fileInfo = $FilePath -as [IO.FileInfo] + # By reading the file with Get-Content -AsByteStream, we avoid locking the file + # (or the file being locked by another process) + $packageBytes = Get-Content -Path $FilePath -AsByteStream -Raw + + # If there were no bytes, return + if (-not $packageBytes) { return } + + # Create a memory stream from the byte array + $memoryStream = [IO.MemoryStream]::new($packageBytes) + # and open the package from the memory stream + $filePackage = [IO.Packaging.Package]::Open($memoryStream, "Open", "ReadWrite") + # If that did not work, return. + if (-not $filePackage) { return } + + $filePackage.pstypenames.insert(0,'OpenXML') + $filePackage.pstypenames.insert(0,'OpenXML.File') + $packageContent = $filePackage.Parts + $openXMLObject = $filePackage | + Add-Member NoteProperty FilePath $filePath -Force -PassThru | + Add-Member NoteProperty MemoryStream $memoryStream -Force -PassThru + + # Now we can get more specific about what type of OpenXML file this is. + # By looking for certain key parts, we can determine if this is a PowerPoint, Excel, or Word file. + # For example, if the package contains a part with `/ppt/` in the URI, + if ($filePackage.Parts.Keys -match '/ppt/') { + # it is an `OpenXML.PowerPoint.File` + $openXmlObject.pstypenames.insert(0, 'OpenXML.PowerPoint.File') + } + + # If the package contains a part with `/xl/` in the URI, + if ($filePackage.Parts.Keys -match '/xl/') { + # it is an `OpenXML.Excel.File` + $openXmlObject.pstypenames.insert(0, 'OpenXML.Excel.File') + } + + # If the package contains a part with `/word/` in the URI, it is a Word file. + if ($filePackage.Parts.Keys -match '/word/') { + # it is an `OpenXML.Word.File` + $openXmlObject.pstypenames.insert(0, 'OpenXML.Word.File') + } + + # If the package contains a part with `/Documents/` in the URI, + if ($filePackage.Parts.Keys -match '/Documents/') { + # it is an `OpenXML.XPS.File` + $openXmlObject.pstypenames.insert(0, 'OpenXML.XPS.File') + } + + # Now we output our openXML object + $OpenXMLObject + } + } + + process { + if ($filePath) { + # Try to resolve the file path + $resolvedPath = $ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath($FilePath) + # If we could not resolve the path, exit + if (-not $resolvedPath ) { return } + + $resolvedPath | openXMLFromFile + } else { + $memoryStream = [IO.MemoryStream]::new() + $EmptyPackage = [io.packaging.package]::Open($memoryStream ,'Create') + $EmptyPackage | Add-Member NoteProperty -Name MemoryStream -Value $memoryStream -Force + $EmptyPackage.pstypenames.insert(0, 'OpenXML') + $EmptyPackage + } + } +} diff --git a/Commands/Import-OpenXML.ps1 b/Commands/Import-OpenXML.ps1 new file mode 100644 index 0000000..0ead91e --- /dev/null +++ b/Commands/Import-OpenXML.ps1 @@ -0,0 +1,23 @@ +function Import-OpenXML { + <# + .SYNOPSIS + Imports OpenXML + .DESCRIPTION + Imports OpenXML packages in PowerShell. + .EXAMPLE + $excelFile = Import-OpenXML ./a.xlsx + .LINK + Get-OpenXML + #> + [Alias('Restore-OpenXML','Open-OpenXML')] + param( + # The path to the file + [Parameter(Mandatory,ValueFromPipelineByPropertyName)] + [string] + $FilePath + ) + + process { + Get-OpenXML @PSBoundParameters + } +} diff --git a/Commands/Set-OpenXML.ps1 b/Commands/Set-OpenXML.ps1 new file mode 100644 index 0000000..de5c4e7 --- /dev/null +++ b/Commands/Set-OpenXML.ps1 @@ -0,0 +1,108 @@ +function Set-OpenXML +{ + <# + .SYNOPSIS + Sets OpenXML content + .DESCRIPTION + Sets content in an OpenXML file. + .EXAMPLE + Get-OpenXML ./Examples/Sample.docx | + Set-OpenXML -Uri '/index.html' -Content ([xml]"

Hello World

") -ContentType text/html | + Set-OpenXML -Uri '/404.html' -Content ([xml]"

File Not Found

") -ContentType text/html | + Export-OpenXML ./Examples/Sample2.docx + .LINK + Get-OpenXML + #> + param( + # The uri to set + [Parameter(Mandatory,ParameterSetName='Uri',ValueFromPipelineByPropertyName)] + [Alias('Url')] + [uri] + $Uri, + + # The content type. By default, `text/plain` + [Parameter(ValueFromPipelineByPropertyName)] + [string] + $ContentType = 'text/plain', + + # The content to set. + [Parameter(ValueFromPipelineByPropertyName)] + [PSObject] + $Content, + + # The input object. + # This must be a package, and it must be writeable. + [Parameter(ValueFromPipeline)] + [PSObject] + $InputObject + ) + + process { + # If there is no input, there is nothing to do + if (-not $InputObject) { return } + # If the input is not a package, pass it thru. + if ($InputObject -isnot [IO.Packaging.Package]) { + return $InputObject + } + + # If the uri is not prefixed, + if ($uri -notmatch '^/') { + $uri = "/$uri" # add it to avoid easy errors. + } + + # Get or create the part + $part = + if ($InputObject.PartExists($uri)) { + $InputObject.GetPart($uri) + } else { + $InputObject.CreatePart($uri, $ContentType) + } + + if (-not $?) { return } + + # Get the stream + $partStream = $part.GetStream() + # First see if the content is a byte[] + if ($content -is [byte[]]) { + # if so, just write it + $partStream.Write($content, 0, $content.Length) + } + # If the content is a stream, + elseif ($content -is [IO.Stream]) { + # copy it in. + $content.CopyTo($partStream) + } + # If the content was xml or could be, + elseif ($content -is [xml] -or ($contentXml = $content -as [xml])) { + if ($contentXml) { $content = $contentXml } + $buffer = $OutputEncoding.GetBytes($content.OuterXml) + # write it to the package. + $partStream.Write($buffer, 0, $buffer.Length) + } elseif ($content -is [string]) { + # Put strings in as a byte array. + $buffer = $OutputEncoding.GetBytes($content) + $partStream.Write($buffer, 0, $buffer.Length) + } elseif ($contentBytes = $content -as [byte[]]) { + # Bytes are obviously a byte array + $partStream.Write($contentBytes, 0, $contentBytes.Length) + } + elseif ($ContentType -match '[/\+]json') { + # Explicitly typed json can be converted to json + $buffer = $OutputEncoding.GetBytes((ConvertTo-Json -InputObject $content -Depth 10)) + $partStream.Write($buffer, 0, $buffer.Length) + } + else { + # and everything else is stringified + $buffer = $OutputEncoding.GetBytes("$content") + $partStream.Write($buffer, 0, $buffer.Length) + } + + # Close the part stream + $partStream.Close() + + # and invalidate the parts cache on the object + $inputObject.PSObject.Properties.Remove('.Parts') + # then pass it thru so we can keep piping. + $inputObject + } +} \ No newline at end of file diff --git a/Commands/Start-OpenXML.ps1 b/Commands/Start-OpenXML.ps1 new file mode 100644 index 0000000..097a78d --- /dev/null +++ b/Commands/Start-OpenXML.ps1 @@ -0,0 +1,228 @@ +function Start-OpenXML { + <# + .SYNOPSIS + Starts an OpenXML Server + .DESCRIPTION + Starts a read only server, using one or more OpenXML files as the storage. + .NOTES + If a URI in the package is requested, that URI will be returned. + + If a path does not have an extension, it will search for an .index.html. + + If the file was not found, a 404 code will be returned. + + If the OpenXML archive contains a `/404.html`, the content in this file will be returned with the 404 + + If another method than GET or HEAD is used, a 405 code will be returned. + + If the OpenXML archive contains a `/405.html`, then content in this file will be returned with the 405. + + ### Security Implications + + By default, this only serves content locally. + + If this serves content to any other machine, the script will need to be running as administrator, and all of the content within the file will be exposed. + + If the file contained PII, this could be problematic. + + If you see this command running in an administrative process, please contact your network infrastructure and security teams + .EXAMPLE + $openXmlServer = Get-OpenXML ./Examples/Blank.docx | + Set-OpenXML -Uri '/index.html' -ContentType text/html -Content " +

Hello World

+ " | + Start-OpenXML + + Start-Process $openXmlServer.Name + .EXAMPLE + $openXmlServer = Get-OpenXML ./Examples/Blank.docx | + Set-OpenXML -Uri '/index.html' -ContentType text/html -Content " + + + Hello World + + + +

Hello World

+ + + " | + Set-OpenXML -Uri '/css/style.css' -ContentType text/css -Content " + body { background-color: #000000; color: #4488ff } + " | + Set-OpenXML -Uri '/404.html' -ContentType text/html -Content " + + + Hello World + + + +

File Not Found

+ + + " | + Start-OpenXML + + Start-Process $openXmlServer.Name + .EXAMPLE + $openXmlUpdate = Get-OpenXML ./Examples/Blank.docx | + Set-OpenXML -Uri '/index.html' -ContentType text/html -Content "

Hello World

" | + Export-OpenXML ./Examples/Server.docx -Force + + $openXmlServer = Start-OpenXML -FilePath $openXmlUpdate.FilePath + + Start-Process $openXmlServer.Name + #> + param( + # The path to an OpenXML file, or a glob that matches multiple OpenXML files. + [Parameter(Mandatory,ValueFromPipelineByPropertyName)] + [string] + $FilePath, + + # The Root + [Parameter(ValueFromPipelineByPropertyName)] + [string] + $RootUrl = "http://127.0.0.1:$(Get-Random -Minimum 4200 -Maximum 42000)/", + + # The input object. This can be provided to avoid loading a file from disk. + [Parameter(ValueFromPipeline)] + [PSObject] + $InputObject + ) + + begin { + if ($PSVersionTable.PSVersion -lt '7.0') { + Write-Error "This feature requires thread jobs, which are part of PowerShell Core" + return + } + } + process { + # Get our OpenXML + $openXml = + if ($inputObject -is [IO.Packaging.Package]) { + $inputObject + } else { + Get-OpenXML -FilePath $FilePath + } + + # and return if we could not + if (-not $openXml) { return } + + # Create a listener + $httpListener = [Net.HttpListener]::new() + $httpListener.Prefixes.Add($RootUrl) + $httpListener.Start() + + + # Create an IO object to populate the background runspace + $IO = [Ordered]@{ + HttpListener = $httpListener + OpenXML = $openXml + } + + # Because this function exposes a server, we want to fire some events. + # First is an approve event: `Approve-Start-OpenXML` + # By using Register-EngineEvent, this can be handled + $beforeEvent = + New-Event -SourceIdentifier "Approve-Start-OpenXML" -MessageData $IO -Sender $MyInvocation.MyCommand -EventArguments $IO + + # We will just wait almost no time, so that the handler can run. + Start-Sleep -Milliseconds 0 + + # To reject the event, the handler can put one of three values in the `$event.MessageData` + + if ($beforeEvent.MessageData.Rejected -or + $beforeEvent.MessageData.Reject -or + $beforeEvent.MessageData.No) { + } + + # Start a thread job + $startedJob = Start-ThreadJob -ScriptBlock { + param([Collections.IDictionary]$IO) + + # unpack our IO into local variables + foreach ($variableName in $IO.Keys) { + $ExecutionContext.SessionState.PSVariable.Set($variableName, $IO[$variableName]) + } + + # declare a little filter to serve a part + + filter servePart { + $uriPart = $_ + $packagePart = $package.GetPart($uriPart) + $response.ContentType = $packagePart.ContentType + $partStream = $packagePart.GetStream() + $partStream.CopyTo($response.OutputStream) + $partStream.Close() + $response.Close() + } + + # and start listening + :nextRequest while ($httpListener.IsListening) { + $getContextAsync = $httpListener.GetContextAsync() + # wait in short increments to minimize CPU impact and stay snappy + while (-not $getContextAsync.Wait(7)) { + + } + # Get our listener context + $context = $getContextAsync.Result + # and break that into a result and response + $request, $response = $context.Request, $context.Response + + # If they asked for an inappropriate method + if ($request.HttpMethod -notin 'GET', 'HEAD') { + # use the appropriate status code + $response.StatusCode = 405 + foreach ($package in $openXml) { + # and serve any /405.html we find. + if ($package.PartExists("/405.html")) { + "/405.html" | servePart + continue nextRequest + } + } + # close out + $response.close() + # and continue to the next request + continue nextRequest + } + + # Get the local path + $localPath = $request.Url.LocalPath + # if it lacks an extension, look for an index. + $uriPart = if ($localPath -notmatch '\..+?$') { + ($localPath -replace '/$') + '/index.html' + } else { + $localPath + } + + # If we find the part + foreach ($package in $openXml) { + if ($package.PartExists($uriPart)) { + # serve it and continue + $uriPart | servePart + continue nextRequest + } + } + # If we did not find a part, set the appropriate status code + $response.StatusCode = 404 + foreach ($package in $openXml) { + # look for a 404 to serve + if ($package.PartExists("/404.html")) { + "/404.html" | servePart + continue nextRequest + } + } + # and close the respons + $response.Close() + } + } -ArgumentList $IO -Name $RootUrl -ThrottleLimit 100 | + Add-Member NoteProperty IO $IO -Force -PassThru | + Add-Member NoteProperty HttpListener $httpListener -Force -PassThru | + Add-Member NoteProperty OpenXML $openXml -Force -PassThru + + $null = New-Event -SourceIdentifier Start-OpenXML -Sender $MyInvocation.MyCommand -EventArguments $startedJob -MessageData $IO + + $startedJob + + } +} \ No newline at end of file diff --git a/Commands/Stop-OpenXML.ps1 b/Commands/Stop-OpenXML.ps1 new file mode 100644 index 0000000..b20c1d6 --- /dev/null +++ b/Commands/Stop-OpenXML.ps1 @@ -0,0 +1,42 @@ +function Stop-OpenXML { + <# + .SYNOPSIS + Stops an OpenXML Server + .DESCRIPTION + Stops a server running from an OpenXML. + .NOTES + This will stop any jobs with HTTP listeners and OpenXML + #> + param( + # The input object. + # Any input without an HttpListener and OpenXML property will be ignored. + [Parameter(ValueFromPipeline)] + [PSObject] + $InputObject, + + # If set, will pass thru the input. + [switch] + $PassThru + ) + + process { + # If the input is lacking a listener or OpenXML, return + if (-not $InputObject.HttpListener -or -not $InputObject.OpenXML) { + # (pass thru the input if we asked) + if ($PassThru) { + return $InputObject + } + return + } + + # Stop the listener + $InputObject.HttpListener.Stop() + # and attempt to stop the job (ideally so that it reports as stopped, not completed) + if ($InputObject.StopJob -is [Management.Automation.PSMethod]) { + $InputObject.StopJob() + } + if ($PassThru) { + return $InputObject + } + } +} \ No newline at end of file diff --git a/Examples/ASlideDeck.pptx b/Examples/ASlideDeck.pptx new file mode 100644 index 0000000..4954f5a Binary files /dev/null and b/Examples/ASlideDeck.pptx differ diff --git a/Examples/BarChart.xlsx b/Examples/BarChart.xlsx new file mode 100644 index 0000000..e0736cb Binary files /dev/null and b/Examples/BarChart.xlsx differ diff --git a/Examples/Blank.docx b/Examples/Blank.docx new file mode 100644 index 0000000..49c9da3 Binary files /dev/null and b/Examples/Blank.docx differ diff --git a/Examples/Blank.pptx b/Examples/Blank.pptx new file mode 100644 index 0000000..fc3417f Binary files /dev/null and b/Examples/Blank.pptx differ diff --git a/Examples/Blank.xlsx b/Examples/Blank.xlsx new file mode 100644 index 0000000..c4a51de Binary files /dev/null and b/Examples/Blank.xlsx differ diff --git a/Examples/Blank404.docx b/Examples/Blank404.docx new file mode 100644 index 0000000..3042de4 Binary files /dev/null and b/Examples/Blank404.docx differ diff --git a/Examples/EmbeddedSVG.docx b/Examples/EmbeddedSVG.docx new file mode 100644 index 0000000..8db1773 Binary files /dev/null and b/Examples/EmbeddedSVG.docx differ diff --git a/Examples/HelloWorld.docx b/Examples/HelloWorld.docx new file mode 100644 index 0000000..5e6f6b6 Binary files /dev/null and b/Examples/HelloWorld.docx differ diff --git a/Examples/HelloWorld.pptx b/Examples/HelloWorld.pptx new file mode 100644 index 0000000..598ee88 Binary files /dev/null and b/Examples/HelloWorld.pptx differ diff --git a/Examples/HelloWorld.xlsx b/Examples/HelloWorld.xlsx new file mode 100644 index 0000000..5b99805 Binary files /dev/null and b/Examples/HelloWorld.xlsx differ diff --git a/Examples/Numbers.xlsx b/Examples/Numbers.xlsx new file mode 100644 index 0000000..e43a876 Binary files /dev/null and b/Examples/Numbers.xlsx differ diff --git a/Examples/Sample.docx b/Examples/Sample.docx new file mode 100644 index 0000000..d4c9bf4 Binary files /dev/null and b/Examples/Sample.docx differ diff --git a/Examples/Sample.xlsx b/Examples/Sample.xlsx new file mode 100644 index 0000000..de3c66e Binary files /dev/null and b/Examples/Sample.xlsx differ diff --git a/Examples/Sample.xps b/Examples/Sample.xps new file mode 100644 index 0000000..8f0e05f Binary files /dev/null and b/Examples/Sample.xps differ diff --git a/Examples/Sum.xlsx b/Examples/Sum.xlsx new file mode 100644 index 0000000..29c8e75 Binary files /dev/null and b/Examples/Sum.xlsx differ diff --git a/Examples/Table.docx b/Examples/Table.docx new file mode 100644 index 0000000..d7f3312 Binary files /dev/null and b/Examples/Table.docx differ diff --git a/OpenXML.psd1 b/OpenXML.psd1 new file mode 100644 index 0000000..b6cfc54 --- /dev/null +++ b/OpenXML.psd1 @@ -0,0 +1,123 @@ +@{ + +# Script module or binary module file associated with this manifest. +RootModule = 'OpenXML.psm1' + +# Version number of this module. +ModuleVersion = '0.1' + +# Supported PSEditions +# CompatiblePSEditions = @() +Description = 'Automate OpenXML. Excel, Word, and PowerPoint automation in PowerShell.' + +# ID used to uniquely identify this module +GUID = 'ce1bf009-73ae-4293-b57f-a19aaaa793b7' + +# Author of this module +Author = 'James Brundage' + +# Company or vendor of this module +CompanyName = 'Start-Automating' + +# Copyright statement for this module +Copyright = '2025 Start-Automating' + +TypesToProcess = @('OpenXML.types.ps1xml') + +# Functions to export from this module, for best performance, do not use wildcards and do not delete the entry, use an empty array if there are no functions to export. +FunctionsToExport = 'Get-OpenXML', 'Set-OpenXML', 'Import-OpenXML', 'Export-OpenXML' + +# Cmdlets to export from this module, for best performance, do not use wildcards and do not delete the entry, use an empty array if there are no cmdlets to export. +CmdletsToExport = '*' + +# Variables to export from this module +VariablesToExport = '*' + +# Aliases to export from this module, for best performance, do not use wildcards and do not delete the entry, use an empty array if there are no aliases to export. +AliasesToExport = 'OpenXML', 'Close-OpenXML', 'Open-OpenXML', 'Restore-OpenXML', 'Save-OpenXML' + +# Private data to pass to the module specified in RootModule/ModuleToProcess. This may also contain a PSData hashtable with additional module metadata used by PowerShell. +PrivateData = @{ + + PSData = @{ + + # Tags applied to this module. These help with module discovery in online galleries. + Tags = @('Word','Excel','PowerPoint','OpenXML','Office Open XML','OOXML') + + # A URL to the license for this module. + LicenseUri = 'https://github.com/PowerShellWeb/OpenXML/blob/main/LICENSE' + + # A URL to the main website for this project. + ProjectUri = 'https://github.com/PowerShellWeb/OpenXML' + + # A URL to an icon representing this module. + # IconUri = '' + + # ReleaseNotes of this module + ReleaseNotes = @' +## OpenXML 0.1 + +* Initial Build of OpenXML Module (#1) +* Commands: + * `Get-OpenXML` (#2) + * `Import-OpenXML` (#14) + * `Export-OpenXML` (#15) + * `Close-OpenXML` (#16) + * `Copy-OpenXML` (#18) + * `Set-OpenXML` (#19) + * `Start-OpenXML` (#28) + * `Stop-OpenXML` (#29) +* Initial Extended Types + * `OpenXML` + * `OpenXML.get_Parts` (#17) + * `OpenXML.get_Created` (#23) + * `OpenXML.get_Modified` (#24) + * `OpenXML.File` + * `OpenXML.File.get_DocumentProperty` (#13) + * `OpenXML.File` default display (#7) + * `OpenXML.Excel.File` + * `OpenXML.Excel.File.get_Worksheets` (#5) + * `OpenXML.Excel.File.get_SharedString` (#25) + * `OpenXML.Excel.Worksheet` + * `OpenXML.Excel.Worksheet.get_Cell` (#6) + * `OpenXML.Excel.Worksheet.get_Formula` (#26) + * `OpenXML.PowerPoint.File` + * `OpenXML.PowerPoint.File.get_Slides` (#8) + * `OpenXML.PowerPoint.File.get_Text` (#9) + * `OpenXML.PowerPoint.Slide` + * `OpenXML.PowerPoint.get_Text` (#10) + * `OpenXML.Word.File` + * `OpenXML.Word.File.get_Text` (#11) +* Sample Documents (#3) +* Initial Tests (#27) +* Build workflow + * Building types with [EZOut](https://github.com/StartAutomating/EZOut) (#4) + * Building GitHub Workflow with [PSDevOps](https://github.com/StartAutomating/PSDevOps) (#12) +* Core Documentation + * README (#1) + * CODE_OF_CONDUCT (#20) + * CONTRIBUTING (#21) + * SECURITY (#22) +'@ + + # Prerelease string of this module + # Prerelease = '' + + # Flag to indicate whether the module requires explicit user acceptance for install/update/save + # RequireLicenseAcceptance = $false + + # External dependent modules of this module + # ExternalModuleDependencies = @() + + } # End of PSData hashtable + +} # End of PrivateData hashtable + +# HelpInfo URI of this module +# HelpInfoURI = '' + +# Default prefix for commands exported from this module. Override the default prefix using Import-Module -Prefix. +# DefaultCommandPrefix = '' + +} + diff --git a/OpenXML.psm1 b/OpenXML.psm1 new file mode 100644 index 0000000..87c84b3 --- /dev/null +++ b/OpenXML.psm1 @@ -0,0 +1,15 @@ +$CommandsPath = Join-Path $PSScriptRoot 'Commands' +foreach ($file in Get-ChildItem -Path $CommandsPath -Filter '*-*.ps1') { + if ($file.Name -like '*.*.ps1') { + continue + } + . $file.FullName +} + +if (-not ('IO.Packaging.Package' -as [type])) { + $addedTypes = Add-type -AssemblyName System.IO.Packaging -PassThru + $packageTypeFound = $addedTypes | Where-Object FullName -eq 'System.IO.Packaging.Package' + if (-not $packageTypeFound) { + Write-Warning "Could not find [IO.Packaging.Package]" + } +} \ No newline at end of file diff --git a/OpenXML.tests.ps1 b/OpenXML.tests.ps1 new file mode 100644 index 0000000..99d9bb6 --- /dev/null +++ b/OpenXML.tests.ps1 @@ -0,0 +1,43 @@ +Push-Location $PSScriptRoot +describe OpenXML { + it 'Is a zip in a trenchcoat' { + $blankDocument = Get-OpenXML -FilePath ./Examples/Blank.docx + $blankDocument.Parts.Count | Should -BeGreaterThan 1 + } + + it 'Can access metadata' { + $blankDocument = Get-OpenXML -FilePath ./Examples/Blank.docx + $blankDocument.Created | Should -BeLessThan ([DateTime]::Now) + $blankDocument.Modified | Should -BeLessThan ([DateTime]::Now) + } + + + context Excel { + it 'Can Get Cells' { + $helloExcel = OpenXML ./Examples/HelloWorld.xlsx + $helloExcel.Worksheets.Cell.Values | Should -Be 'Hello World' + } + } + context PowerPoint { + it 'Can Get Text' { + $helloPowerPoint = OpenXML ./Examples/HelloWorld.pptx + $helloPowerPoint.Text -replace '^[\n\r]' | Should -Be 'Hello World' + } + it 'Can Get Slides' { + $aSlideDeck = OpenXML ./Examples/ASlideDeck.pptx + $aSlideDeck.Slides.Count | Should -BeGreaterThan 1 + $aSlideDeck.Slides.SlideNumber | Should -BeGreaterOrEqual 1 + } + } + + context Word { + it 'Can Get Text' { + $helloWorld = Get-OpenXML -FilePath ./Examples/HelloWorld.docx + $helloWorld.Text -replace '^[\n\r]+' | Should -Be 'Hello World' + } + } + + +} + +Pop-Location \ No newline at end of file diff --git a/OpenXML.types.ps1xml b/OpenXML.types.ps1xml new file mode 100644 index 0000000..2e801d0 --- /dev/null +++ b/OpenXML.types.ps1xml @@ -0,0 +1,395 @@ + + + + OpenXML + + + CreatedAt + Created + + + CreationTime + Created + + + DocProps + DocumentProperty + + + DocumentProperties + DocumentProperty + + + LastWriteTime + Modified + + + ModifiedAt + Modified + + + Part + Parts + + + Created + + <# +.SYNOPSIS + Gets OpenXML creation time +.DESCRIPTION + Gets the time an OpenXML file was created, according to core document metadata. +.EXAMPLE + Get-OpenXML ./Examples/Blank.docx | Select-Object -ExpandProperty Created +#> +param() +$this.Parts.'/docProps/core.xml'.content.coreProperties.created.innerText -as [DateTime] + + + + DocumentProperty + + if (-not $this.Parts) { return } +$docProps = $this.Parts[$this.Parts.Keys -match '/docProps/'] +$docProps + + + + Modified + + <# +.SYNOPSIS + Gets OpenXML modified time +.DESCRIPTION + Gets the time an OpenXML file was modified, according to core document metadata. +.EXAMPLE + Get-OpenXML ./Examples/Blank.docx | Select-Object -ExpandProperty Modified +#> +$this.Parts.'/docProps/core.xml'.content.coreProperties.modified.innerText -as [DateTime] + + + + Parts + + if ($this.'.Parts') { + return $this.'.Parts' +} + + +filter getPartContent { + $part = $_ + $partStream = $part.GetStream() + if (-not $partStream) { return } + switch ($part.ContentType) { + # If the content type looks like XML, read it as XML + { $part.ContentType -match '[\./\+]xml' } { + $streamReader = [IO.StreamReader]::new($partStream) + $streamReader.ReadToEnd() -as [xml] + $streamReader.Close() + break + } + # If the part looks like JSON, read it as JSON + { $part.Uri -match '\.json$'} { + $streamReader = [IO.StreamReader]::new($partStream) + $jsonContent = $streamReader.ReadToEnd() + $streamReader.Close() + $jsonContent | ConvertFrom-Json + break + } + { $part.ContentType -match 'text/.+?$'} { + $streamReader = [IO.StreamReader]::new($partStream) + $textContent = $streamReader.ReadToEnd() + $streamReader.Close() + $textContent + break + } + # Otherwise, read it as a memory stream and return the byte array + default { + $outputStream = [IO.MemoryStream]::new() + $partStream.CopyTo($outputStream) + $outputStream.Seek(0, 'Begin') + $outputStream.ToArray() + } + } + + $partStream.Close() + $partStream.Dispose() +} + +$packageParts = @($this.GetParts()) +$packageContent = [Ordered]@{} + +# Now we will read each part in the package, and store it in an `[Ordered]` dictionary +# Since this _might_ take a while (if you used a lot of PowerPoint images) we want to show a progress bar. + +# Prepare the progress bar +$partCount = 0 +$partTotal = $packageParts.Length +$partProgress = [Ordered]@{Id=Get-Random;Activity='Reading Parts'} + +# Then read each part +@(foreach ($part in $packageParts) { + $partCount++ + # update the progress bar + Write-Progress @partProgress -Status "Reading part $($part.Uri) ($partCount of $partTotal)" -PercentComplete ( + [math]::Round(($partCount * 100/ $partTotal)) + ) + # and store the part in the dictionary + $packageContent["$($part.Uri)"] = + [PSCustomObject]@{ + PSTypeName = 'OpenXML.Part' + Uri = $part.Uri + ContentType = $part.ContentType + # (we'll use our helper function to get the content) + Content = $part | getPartContent + FilePath = "$resolvedPath" + } +}) +<## Now that we've read all parts, we can close the package +$filePackage.Close() +# and the memory stream, too. +$memoryStream.Close()#> + +# and finally, complete the progress bar. +Write-Progress @partProgress -Status "Completed reading $partCount parts" -Completed +$this | Add-Member NoteProperty '.Parts' -Force $packageContent + +return $this.'.Parts' + + + + + + + OpenXML.Excel.File + + + SharedString + SharedStrings + + + Worksheet + Worksheets + + + SharedStrings + + <# +.SYNOPSIS + Gets an Excel File's Shared Strings +.DESCRIPTION + Gets an Excel File's Shared Strings. + + In Excel, any cell with text in it really contains an index of it's shared string. +.EXAMPLE + Get-OpenXML ./Examples/HelloWorld.xlsx | Select -Expand SharedString +#> +,@($this.Parts.'/xl/sharedStrings.xml'.Content.sst.si.t) + + + + Worksheets + + $worksheetNames = @($this.Parts['/docProps/app.xml'].Content.Properties.TitlesOfParts.vector.lpstr) + +$worksheetsInOrder = @($this.Parts[$this.Parts.keys -match '/sheet\d+'] | + Sort-Object { $_.Uri -replace '\D' -as [int]} | + Select-Object) + +$worksheetCounter = 0 +foreach ($worksheet in $worksheetsInOrder) { + $worksheetName = $worksheetNames[$worksheetCounter] + if (-not $worksheetName) { + $worksheetName = "Sheet$($worksheetCounter + 1)" + } + [PSCustomObject][Ordered]@{ + PSTypeName = 'OpenXML.Excel.Worksheet' + FilePath = $this.FilePath + Uri = $worksheet.Uri + WorksheetName = $worksheetName + Content = $worksheet.Content + ContentType = $worksheet.ContentType + OpenXML = $this + } + $worksheetCounter++ +} + + + + + + OpenXML.Excel.Worksheet + + + Cells + Cell + + + Formulae + Formula + + + Formulas + Formula + + + Cell + + <# +.SYNOPSIS + Gets cells from Excel +.DESCRIPTION + Gets individual cells in an Excel worksheet. +.EXAMPLE + Get-OpenXML ./Examples/Sum.xlsx | + Select-Object -ExpandProperty Worksheets | + Select-Object -ExpandProperty Cell +#> +param() +$excelCells = [Ordered]@{} +# Get each row from our sheet data +foreach ($worksheetRow in $this.content.worksheet.sheetdata.row) { + # and get each column from each row + foreach ($worksheetColumn in $worksheetRow.c) { + # The `r` attribute contains the cell coordinate + $excelCells[$worksheetColumn.r] = + # Excel cells are always numbers. + # If the cell contains a string, it is actually stored as an index in "sharedStrings" + if ($worksheetColumn.t -eq 's') { + # which makes indexing awfully easy (and has the side-effect of reducing the total file size for worksheets with similar text) + $this.OpenXML.SharedStrings[$worksheetColumn.v] + } else { + # Otherwise, the value should be `v`. + $worksheetColumn.v + } + } +} + +# Return our cells as dictionary +return $excelCells + + + + Formula + + $formulaCells = [Ordered]@{} +foreach ($worksheetRow in $this.content.worksheet.sheetdata.row) { + foreach ($worksheetColumn in $worksheetRow.c) { + if ($worksheetColumn.f) { + $formulaCells[$worksheetColumn.r] = $worksheetColumn.f + } + } +} +$formulaCells + + + + + + OpenXML.File + + + PSStandardMembers + + + DefaultDisplayPropertySet + + FilePath + Parts + + + + + + DefaultDisplay + FilePath +Parts + + + + + OpenXML.PowerPoint.File + + + Slide + Slides + + + Slides + + $slidesInOrder = @($this.Parts[$this.Parts.keys -match '/slide\d+\.xml$'] | + Sort-Object { $_.Uri -replace '\D' -as [int]} | + Select-Object) + + +foreach ($slide in $slidesInOrder) { + [PSCustomObject][Ordered]@{ + PSTypeName = 'OpenXML.PowerPoint.Slide' + FilePath = $this.FilePath + Uri = $slide.Uri + SlideNumber = $slide.Uri -replace '\D' -as [int] + Content = $slide.Content + ContentType = $slide.ContentType + } +} + + + + + + + Text + + $this.Slides.Content | + Select-Xml -XPath '//a:t' -Namespace @{a='http://schemas.openxmlformats.org/drawingml/2006/main'} | + Foreach-Object { + if ($_.Node.LocalName -eq 't') { + $_.Node.InnerText + } else { + [Environment]::NewLine + } + } + + + + + + OpenXML.PowerPoint.Slide + + + Text + + @($this.Content | + Select-Xml -XPath '//a:t' -Namespace @{a='http://schemas.openxmlformats.org/drawingml/2006/main'} | + Foreach-Object { + if ($_.Node.LocalName -eq 't') { + $_.Node.InnerText + } else { + [Environment]::NewLine + } + }) -join '' + + + + + + OpenXML.Word.File + + + Text + + @($this.Parts['/word/document.xml'].Content | + Select-Xml -XPath '//w:t|//w:p' -Namespace @{w='http://schemas.openxmlformats.org/wordprocessingml/2006/main'} | + Foreach-Object { + if ($_.Node.LocalName -eq 't') { + $_.Node.InnerText + } else { + [Environment]::NewLine + } + + }) -join '' + + + + + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..a3377ca --- /dev/null +++ b/README.md @@ -0,0 +1,109 @@ +# OpenXML +## Automate OpenXML. Excel, Word, and PowerPoint automation in PowerShell. + +OpenXML is the standard used for Office documents. + +You can think of it as a .zip in a trenchcoat. + +Every part of an Excel, PowerPoint, or Word document is saved into an archive. + +You can prove this to yourself by renaming any .docx file to .zip. + +I sometimes call this the "zip epiphany", because it helps you understand how much technology is really a "zip in a trenchcoat". + +(hint: it's _much_ more than just OpenXML files) + +This module is here to help you automate, inspect, and understand OpenXML files. +### Installing and Importing +You can install OpenXML from the PowerShell gallery + +~~~PowerShell +Install-Module OpenXML -Scope CurrentUser -Force +~~~ + +Once installed, you can simply `Import-Module` +~~~PowerShell +Import-Module OpenXML +~~~ + +### Commands +* Export-OpenXML +* Get-OpenXML +* Import-OpenXML +* Set-OpenXML +### Demos +~~~PowerShell + + + # Get text from a word document +Get-OpenXML ./Examples/HelloWorld.docx | + Select-Object -ExpandProperty Text + + +~~~ +~~~PowerShell + + +# Get modification times +Get-OpenXML ./Examples/HelloWorld.docx | + Select-Object -Property Created, Modified + + +~~~ +~~~PowerShell + +# Get PowerPoint slides +Get-OpenXML ./Examples/ASlideDeck.pptx | + Select-Object -ExpandProperty Slides + +~~~ +~~~PowerShell + + +# Get text from PowerPoint +Get-OpenXML ./Examples/ASlideDeck.pptx | + Select-Object -ExpandProperty Text + + +~~~ +~~~PowerShell + + +# Get worksheets from Excel + +Get-OpenXML ./Examples/Sample.xlsx | + Select-Object -ExpandProperty Worksheets + + +~~~ +~~~PowerShell + + +# Get cells from Excel + +Get-OpenXML ./Examples/Sample.xlsx | + Select-Object -ExpandProperty Worksheets | + Select-Object -ExpandProperty Cell + + +~~~ +~~~PowerShell + + +# Get formulas from Excel + +Get-OpenXML ./Examples/Sum.xlsx | + Select-Object -ExpandProperty Worksheets | + Select-Object -ExpandProperty Formula + +~~~ +### Roadmap +While OpenXML has been around since 2006, this module is considerably younger. + +It has a large amount of room to grow. + +The primary goal for the forseeable future is to increase coverage of office features. If you would like to help, please consider [contributing](CONTRIBUTING.md). +### Security +OpenXML presents some unique security challenges. + +This module makes OpenXML documents easier to read and write, which can be useful to both red and blue teams. Please see the [security guide](SECURITY.md) for more information. diff --git a/README.md.ps1 b/README.md.ps1 new file mode 100644 index 0000000..a4bc6cf --- /dev/null +++ b/README.md.ps1 @@ -0,0 +1,128 @@ + + +$ThisModule = Import-Module ./ -PassThru + + +$intro = @' + +OpenXML is the standard used for Office documents. + +You can think of it as a .zip in a trenchcoat. + +Every part of an Excel, PowerPoint, or Word document is saved into an archive. + +You can prove this to yourself by renaming any .docx file to .zip. + +I sometimes call this the "zip epiphany", because it helps you understand how much technology is really a "zip in a trenchcoat". + +(hint: it's _much_ more than just OpenXML files) + +This module is here to help you automate, inspect, and understand OpenXML files. +'@ + + + +"# $($ThisModule.Name)" + +"## $($ThisModule.Description)" + +$intro + +"### Installing and Importing" + +@" +You can install $($ThisModule.Name) from the PowerShell gallery + +~~~PowerShell +Install-Module $($ThisModule.Name) -Scope CurrentUser -Force +~~~ + +Once installed, you can simply ``Import-Module`` +~~~PowerShell +Import-Module $($ThisModule.Name) +~~~ + +"@ + + +"### Commands" + +$thisModulesFunctions = $ThisModule.ExportedFunctions.Keys | Sort-Object Name +foreach ($command in $thisModulesFunctions) { + "* $($command)" +} + +"### Demos" + +$quickDemos = { + + # Get text from a word document +Get-OpenXML ./Examples/HelloWorld.docx | + Select-Object -ExpandProperty Text + +}, { + +# Get modification times +Get-OpenXML ./Examples/HelloWorld.docx | + Select-Object -Property Created, Modified + +}, { +# Get PowerPoint slides +Get-OpenXML ./Examples/ASlideDeck.pptx | + Select-Object -ExpandProperty Slides +},{ + +# Get text from PowerPoint +Get-OpenXML ./Examples/ASlideDeck.pptx | + Select-Object -ExpandProperty Text + +}, { + +# Get worksheets from Excel + +Get-OpenXML ./Examples/Sample.xlsx | + Select-Object -ExpandProperty Worksheets + +},{ + +# Get cells from Excel + +Get-OpenXML ./Examples/Sample.xlsx | + Select-Object -ExpandProperty Worksheets | + Select-Object -ExpandProperty Cell + +},{ + +# Get formulas from Excel + +Get-OpenXML ./Examples/Sum.xlsx | + Select-Object -ExpandProperty Worksheets | + Select-Object -ExpandProperty Formula +} + + +foreach ($demo in $quickDemos) { + "~~~PowerShell" + "$demo" + "~~~" +} + +"### Roadmap" + +$roadmap = @' +While OpenXML has been around since 2006, this module is considerably younger. + +It has a large amount of room to grow. + +The primary goal for the forseeable future is to increase coverage of office features. If you would like to help, please consider [contributing](CONTRIBUTING.md). +'@ + +$roadmap + +"### Security" + +@' +OpenXML presents some unique security challenges. + +This module makes OpenXML documents easier to read and write, which can be useful to both red and blue teams. Please see the [security guide](SECURITY.md) for more information. +'@ \ No newline at end of file diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..3d39130 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,55 @@ +We take security very seriously. + +If you believe you have found a security concern, please [file an issue](https://github.com/PowerShellWeb/OpenXML) describing the issue. + +Each project may have some special security considerations that you may need to be aware of. + +## Special Security Considerations + +OpenXML has some fairly unique security considerations. Any tooling for OpenXML will inherit these concerns. + +### OpenXML Smuggling + +OpenXML files are essentially `.zip` files with a different extension. + +As such, there is a grand history of using OpenXML and other archive files to hide exploits. + +Additionally, unrecognized content within an OpenXML document is often not displayed in any form of the editor. + +It is the opinion of the author that this would be a wonderful thing to flag during load. + +If you work on a program that edits OpenXML documents, please strongly consider this. + +As it stands, OpenXML smuggling is very easy. + +As this module can read and write OpenXML packages, it makes OpenXML smuggling both easier to perform and easier to detect. + +If you see this module deployed in unexpected places, please use this module to search for OpenXML smuggled content. + +### OpenXML Data Scanning + +Word, Excel, and PowerPoint files may all contain personally identifiable information. + +Any tool capable of automatically interacting with office documents can be used to scan for sensitive information. + +This can help the blue team find targets just as much as the red team. + +This is far from the first tool to automate OpenXML, and so this threat is not unique to this tool. + +It is always important to mind your PII, and this tool will help you locate this information. + +To protect a file containing PII from inspection, add a password protection or encrypt the file. + +### OpenXML Microservers + +OpenXML files also contain content type information, which allows them to act as effecient servers. + +This can be quite useful for local multiprogram access and for development of small microservers. + +When used to serve an OpenXML document, these microservers may make it easy to exfiltrate information if they are exposed to the broader internet. + +If you see a public facing endpoint serving an OpenXML document, contact your network administrator and cybersecurity teams. + +Additionally, when combined with the OpenXML Smuggling techniques mentioned earlier, it is possible to host a server with _any_ content inside of an OpenXML file. + +Please use this tool to proactively scan for this possibility. \ No newline at end of file diff --git a/Types/OpenXML.Excel.File/Alias.psd1 b/Types/OpenXML.Excel.File/Alias.psd1 new file mode 100644 index 0000000..c51a640 --- /dev/null +++ b/Types/OpenXML.Excel.File/Alias.psd1 @@ -0,0 +1,4 @@ +@{ + SharedString = 'SharedStrings' + Worksheet = 'Worksheets' +} \ No newline at end of file diff --git a/Types/OpenXML.Excel.File/get_SharedStrings.ps1 b/Types/OpenXML.Excel.File/get_SharedStrings.ps1 new file mode 100644 index 0000000..327cccd --- /dev/null +++ b/Types/OpenXML.Excel.File/get_SharedStrings.ps1 @@ -0,0 +1,11 @@ +<# +.SYNOPSIS + Gets an Excel File's Shared Strings +.DESCRIPTION + Gets an Excel File's Shared Strings. + + In Excel, any cell with text in it really contains an index of it's shared string. +.EXAMPLE + Get-OpenXML ./Examples/HelloWorld.xlsx | Select -Expand SharedString +#> +,@($this.Parts.'/xl/sharedStrings.xml'.Content.sst.si.t) \ No newline at end of file diff --git a/Types/OpenXML.Excel.File/get_Worksheets.ps1 b/Types/OpenXML.Excel.File/get_Worksheets.ps1 new file mode 100644 index 0000000..982989d --- /dev/null +++ b/Types/OpenXML.Excel.File/get_Worksheets.ps1 @@ -0,0 +1,23 @@ +$worksheetNames = @($this.Parts['/docProps/app.xml'].Content.Properties.TitlesOfParts.vector.lpstr) + +$worksheetsInOrder = @($this.Parts[$this.Parts.keys -match '/sheet\d+'] | + Sort-Object { $_.Uri -replace '\D' -as [int]} | + Select-Object) + +$worksheetCounter = 0 +foreach ($worksheet in $worksheetsInOrder) { + $worksheetName = $worksheetNames[$worksheetCounter] + if (-not $worksheetName) { + $worksheetName = "Sheet$($worksheetCounter + 1)" + } + [PSCustomObject][Ordered]@{ + PSTypeName = 'OpenXML.Excel.Worksheet' + FilePath = $this.FilePath + Uri = $worksheet.Uri + WorksheetName = $worksheetName + Content = $worksheet.Content + ContentType = $worksheet.ContentType + OpenXML = $this + } + $worksheetCounter++ +} \ No newline at end of file diff --git a/Types/OpenXML.Excel.Worksheet/Alias.psd1 b/Types/OpenXML.Excel.Worksheet/Alias.psd1 new file mode 100644 index 0000000..0883a4c --- /dev/null +++ b/Types/OpenXML.Excel.Worksheet/Alias.psd1 @@ -0,0 +1,5 @@ +@{ + Cells = 'Cell' + Formulas = 'Formula' + Formulae = 'Formula' +} \ No newline at end of file diff --git a/Types/OpenXML.Excel.Worksheet/get_Cell.ps1 b/Types/OpenXML.Excel.Worksheet/get_Cell.ps1 new file mode 100644 index 0000000..838d9e7 --- /dev/null +++ b/Types/OpenXML.Excel.Worksheet/get_Cell.ps1 @@ -0,0 +1,32 @@ +<# +.SYNOPSIS + Gets cells from Excel +.DESCRIPTION + Gets individual cells in an Excel worksheet. +.EXAMPLE + Get-OpenXML ./Examples/Sum.xlsx | + Select-Object -ExpandProperty Worksheets | + Select-Object -ExpandProperty Cell +#> +param() +$excelCells = [Ordered]@{} +# Get each row from our sheet data +foreach ($worksheetRow in $this.content.worksheet.sheetdata.row) { + # and get each column from each row + foreach ($worksheetColumn in $worksheetRow.c) { + # The `r` attribute contains the cell coordinate + $excelCells[$worksheetColumn.r] = + # Excel cells are always numbers. + # If the cell contains a string, it is actually stored as an index in "sharedStrings" + if ($worksheetColumn.t -eq 's') { + # which makes indexing awfully easy (and has the side-effect of reducing the total file size for worksheets with similar text) + $this.OpenXML.SharedStrings[$worksheetColumn.v] + } else { + # Otherwise, the value should be `v`. + $worksheetColumn.v + } + } +} + +# Return our cells as dictionary +return $excelCells \ No newline at end of file diff --git a/Types/OpenXML.Excel.Worksheet/get_Formula.ps1 b/Types/OpenXML.Excel.Worksheet/get_Formula.ps1 new file mode 100644 index 0000000..9ed256c --- /dev/null +++ b/Types/OpenXML.Excel.Worksheet/get_Formula.ps1 @@ -0,0 +1,9 @@ +$formulaCells = [Ordered]@{} +foreach ($worksheetRow in $this.content.worksheet.sheetdata.row) { + foreach ($worksheetColumn in $worksheetRow.c) { + if ($worksheetColumn.f) { + $formulaCells[$worksheetColumn.r] = $worksheetColumn.f + } + } +} +$formulaCells \ No newline at end of file diff --git a/Types/OpenXML.File/DefaultDisplay.txt b/Types/OpenXML.File/DefaultDisplay.txt new file mode 100644 index 0000000..776357c --- /dev/null +++ b/Types/OpenXML.File/DefaultDisplay.txt @@ -0,0 +1,2 @@ +FilePath +Parts \ No newline at end of file diff --git a/Types/OpenXML.PowerPoint.File/Alias.psd1 b/Types/OpenXML.PowerPoint.File/Alias.psd1 new file mode 100644 index 0000000..fe03445 --- /dev/null +++ b/Types/OpenXML.PowerPoint.File/Alias.psd1 @@ -0,0 +1,3 @@ +@{ + Slide = 'Slides' +} \ No newline at end of file diff --git a/Types/OpenXML.PowerPoint.File/get_Slides.ps1 b/Types/OpenXML.PowerPoint.File/get_Slides.ps1 new file mode 100644 index 0000000..4805817 --- /dev/null +++ b/Types/OpenXML.PowerPoint.File/get_Slides.ps1 @@ -0,0 +1,17 @@ +$slidesInOrder = @($this.Parts[$this.Parts.keys -match '/slide\d+\.xml$'] | + Sort-Object { $_.Uri -replace '\D' -as [int]} | + Select-Object) + + +foreach ($slide in $slidesInOrder) { + [PSCustomObject][Ordered]@{ + PSTypeName = 'OpenXML.PowerPoint.Slide' + FilePath = $this.FilePath + Uri = $slide.Uri + SlideNumber = $slide.Uri -replace '\D' -as [int] + Content = $slide.Content + ContentType = $slide.ContentType + } +} + + diff --git a/Types/OpenXML.PowerPoint.File/get_Text.ps1 b/Types/OpenXML.PowerPoint.File/get_Text.ps1 new file mode 100644 index 0000000..b6f077e --- /dev/null +++ b/Types/OpenXML.PowerPoint.File/get_Text.ps1 @@ -0,0 +1,9 @@ +$this.Slides.Content | + Select-Xml -XPath '//a:t' -Namespace @{a='http://schemas.openxmlformats.org/drawingml/2006/main'} | + Foreach-Object { + if ($_.Node.LocalName -eq 't') { + $_.Node.InnerText + } else { + [Environment]::NewLine + } + } \ No newline at end of file diff --git a/Types/OpenXML.PowerPoint.Slide/get_Text.ps1 b/Types/OpenXML.PowerPoint.Slide/get_Text.ps1 new file mode 100644 index 0000000..44f009b --- /dev/null +++ b/Types/OpenXML.PowerPoint.Slide/get_Text.ps1 @@ -0,0 +1,9 @@ +@($this.Content | + Select-Xml -XPath '//a:t' -Namespace @{a='http://schemas.openxmlformats.org/drawingml/2006/main'} | + Foreach-Object { + if ($_.Node.LocalName -eq 't') { + $_.Node.InnerText + } else { + [Environment]::NewLine + } + }) -join '' \ No newline at end of file diff --git a/Types/OpenXML.Word.File/get_Text.ps1 b/Types/OpenXML.Word.File/get_Text.ps1 new file mode 100644 index 0000000..d8833db --- /dev/null +++ b/Types/OpenXML.Word.File/get_Text.ps1 @@ -0,0 +1,10 @@ +@($this.Parts['/word/document.xml'].Content | + Select-Xml -XPath '//w:t|//w:p' -Namespace @{w='http://schemas.openxmlformats.org/wordprocessingml/2006/main'} | + Foreach-Object { + if ($_.Node.LocalName -eq 't') { + $_.Node.InnerText + } else { + [Environment]::NewLine + } + + }) -join '' \ No newline at end of file diff --git a/Types/OpenXML/Alias.psd1 b/Types/OpenXML/Alias.psd1 new file mode 100644 index 0000000..1bfd370 --- /dev/null +++ b/Types/OpenXML/Alias.psd1 @@ -0,0 +1,13 @@ +@{ + Part = 'Parts' + + CreatedAt = 'Created' + ModifiedAt = 'Modified' + + # FileInfo compatibility + CreationTime = 'Created' + LastWriteTime = 'Modified' + + DocumentProperties = 'DocumentProperty' + DocProps = 'DocumentProperty' +} \ No newline at end of file diff --git a/Types/OpenXML/get_Created.ps1 b/Types/OpenXML/get_Created.ps1 new file mode 100644 index 0000000..b2b4e2a --- /dev/null +++ b/Types/OpenXML/get_Created.ps1 @@ -0,0 +1,10 @@ +<# +.SYNOPSIS + Gets OpenXML creation time +.DESCRIPTION + Gets the time an OpenXML file was created, according to core document metadata. +.EXAMPLE + Get-OpenXML ./Examples/Blank.docx | Select-Object -ExpandProperty Created +#> +param() +$this.Parts.'/docProps/core.xml'.content.coreProperties.created.innerText -as [DateTime] \ No newline at end of file diff --git a/Types/OpenXML/get_DocumentProperty.ps1 b/Types/OpenXML/get_DocumentProperty.ps1 new file mode 100644 index 0000000..a9daee0 --- /dev/null +++ b/Types/OpenXML/get_DocumentProperty.ps1 @@ -0,0 +1,3 @@ +if (-not $this.Parts) { return } +$docProps = $this.Parts[$this.Parts.Keys -match '/docProps/'] +$docProps \ No newline at end of file diff --git a/Types/OpenXML/get_Modified.ps1 b/Types/OpenXML/get_Modified.ps1 new file mode 100644 index 0000000..3f81032 --- /dev/null +++ b/Types/OpenXML/get_Modified.ps1 @@ -0,0 +1,9 @@ +<# +.SYNOPSIS + Gets OpenXML modified time +.DESCRIPTION + Gets the time an OpenXML file was modified, according to core document metadata. +.EXAMPLE + Get-OpenXML ./Examples/Blank.docx | Select-Object -ExpandProperty Modified +#> +$this.Parts.'/docProps/core.xml'.content.coreProperties.modified.innerText -as [DateTime] \ No newline at end of file diff --git a/Types/OpenXML/get_Parts.ps1 b/Types/OpenXML/get_Parts.ps1 new file mode 100644 index 0000000..a3a0ef7 --- /dev/null +++ b/Types/OpenXML/get_Parts.ps1 @@ -0,0 +1,84 @@ +if ($this.'.Parts') { + return $this.'.Parts' +} + + +filter getPartContent { + $part = $_ + $partStream = $part.GetStream() + if (-not $partStream) { return } + switch ($part.ContentType) { + # If the content type looks like XML, read it as XML + { $part.ContentType -match '[\./\+]xml' } { + $streamReader = [IO.StreamReader]::new($partStream) + $streamReader.ReadToEnd() -as [xml] + $streamReader.Close() + break + } + # If the part looks like JSON, read it as JSON + { $part.Uri -match '\.json$'} { + $streamReader = [IO.StreamReader]::new($partStream) + $jsonContent = $streamReader.ReadToEnd() + $streamReader.Close() + $jsonContent | ConvertFrom-Json + break + } + { $part.ContentType -match 'text/.+?$'} { + $streamReader = [IO.StreamReader]::new($partStream) + $textContent = $streamReader.ReadToEnd() + $streamReader.Close() + $textContent + break + } + # Otherwise, read it as a memory stream and return the byte array + default { + $outputStream = [IO.MemoryStream]::new() + $partStream.CopyTo($outputStream) + $outputStream.Seek(0, 'Begin') + $outputStream.ToArray() + } + } + + $partStream.Close() + $partStream.Dispose() +} + +$packageParts = @($this.GetParts()) +$packageContent = [Ordered]@{} + +# Now we will read each part in the package, and store it in an `[Ordered]` dictionary +# Since this _might_ take a while (if you used a lot of PowerPoint images) we want to show a progress bar. + +# Prepare the progress bar +$partCount = 0 +$partTotal = $packageParts.Length +$partProgress = [Ordered]@{Id=Get-Random;Activity='Reading Parts'} + +# Then read each part +@(foreach ($part in $packageParts) { + $partCount++ + # update the progress bar + Write-Progress @partProgress -Status "Reading part $($part.Uri) ($partCount of $partTotal)" -PercentComplete ( + [math]::Round(($partCount * 100/ $partTotal)) + ) + # and store the part in the dictionary + $packageContent["$($part.Uri)"] = + [PSCustomObject]@{ + PSTypeName = 'OpenXML.Part' + Uri = $part.Uri + ContentType = $part.ContentType + # (we'll use our helper function to get the content) + Content = $part | getPartContent + FilePath = "$resolvedPath" + } +}) +<## Now that we've read all parts, we can close the package +$filePackage.Close() +# and the memory stream, too. +$memoryStream.Close()#> + +# and finally, complete the progress bar. +Write-Progress @partProgress -Status "Completed reading $partCount parts" -Completed +$this | Add-Member NoteProperty '.Parts' -Force $packageContent + +return $this.'.Parts'