diff --git a/Commands/Get-Web.ps1 b/Commands/Get-Web.ps1
index b54a6d5..3ce2890 100644
--- a/Commands/Get-Web.ps1
+++ b/Commands/Get-Web.ps1
@@ -3,7 +3,7 @@
.Synopsis
Gets content from the web, or parses web content.
.Description
- Gets content from the web.
+ Gets content from the web.
If -Tag is passed, extracts out tags from within the document.
@@ -14,7 +14,7 @@
.Example
# Extract the rows from ConvertTo-HTML
$text = Get-ChildItem | Select Name, LastWriteTime | ConvertTo-HTML | Out-String
- Get-Web "tr" $text
+ Get-Web -Tag "tr" -Html $text
.Example
# Extract all PHP elements from a directory of .php scripts
Get-ChildItem -Recurse -Filter *.php |
@@ -39,13 +39,7 @@
ForEach-Object {
Get-Web -Html $_.Xml.InnerText -AsMicrodata -ItemType $schema
}
- .Example
- # List the top 1000 sites on the web:
- Get-Web "http://www.google.com/adplanner/static/top1000/" -Tag 'a' |
- where-Object {$_.Tag -like "*_blank*" } |
- ForEach-Object {
- ([xml]$_.StartTag.Replace('"t', '" t')).a.href
- }
+
.Link
http://schema.org
#>
@@ -53,58 +47,46 @@
[CmdletBinding(DefaultParameterSetName='HTML')]
[OutputType([PSObject],[string])]
param(
+ # The Url
+ [Parameter(Mandatory=$true,Position=0,ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [Alias('Uri')]
+ [string]$Url,
+
# The tags to extract.
- [Parameter(
- ValueFromPipelineByPropertyName=$true)]
+ [Parameter(ValueFromPipelineByPropertyName=$true)]
[string[]]$Tag,
# If used with -Tag, -RequireAttribute will only match tags with a given keyword in the tag
+ [Parameter(ValueFromPipelineByPropertyName=$true)]
[string[]]$TextInTag,
# The source HTML.
- [Parameter(Mandatory=$true,
- ParameterSetName='HTML',
- ValueFromPipelineByPropertyName=$true)]
+ [Parameter(Mandatory=$true,ParameterSetName='HTML',ValueFromPipelineByPropertyName=$true)]
[string]$Html,
- # The Url
- [Parameter(Mandatory=$true,
- Position=0,
- ParameterSetName='Url',
- ValueFromPipelineByPropertyName=$true)]
- [Alias('Uri')]
- [string]$Url,
-
# The root of the website.
# All images, css, javascript, related links, and pages beneath this root will be downloaded into a hashtable
- [Parameter(Mandatory=$true,
- ParameterSetName='WGet',
- ValueFromPipelineByPropertyName=$true)]
+ [Parameter(Mandatory=$true,ParameterSetName='WGet',ValueFromPipelineByPropertyName=$true)]
[string]$Root,
# Any parameters to the URL
- [Parameter(ParameterSetName='Url',
- Position=1,
- ValueFromPipelineByPropertyName=$true)]
- [Hashtable]$Parameter,
+ [Parameter(ParameterSetName='Url',Position=1,ValueFromPipelineByPropertyName=$true)]
+ [Collections.IDictionary]$Parameter,
# Filename
- [Parameter(Mandatory=$true,
- ParameterSetName='FileName',
- ValueFromPipelineByPropertyName=$true)]
+ [Parameter(Mandatory=$true,ParameterSetName='FileName',ValueFromPipelineByPropertyName=$true)]
[Alias('Fullname')]
[ValidateScript({$ExecutionContext.SessionState.Path.GetResolvedPSPathFromPSPath($_)})]
[string]$FileName,
# The User Agent
- [Parameter(ParameterSetName='Url',
- ValueFromPipelineByPropertyName=$true)]
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
[string]$UserAgent = "PowerShellPipeworks/Get-Web (1.0 powershellpipeworks.com)",
# If set, will not show progress for long-running operations
[Switch]$HideProgress,
- # If set, returns resutls as bytes
+ # If set, returns results as bytes
[Alias('Byte', 'Bytes')]
[Switch]$AsByte,
@@ -114,10 +96,25 @@
# If set, returns results as json
[Switch]$AsJson,
+
+ # If set, will output the results of a web request to a file.
+ # This is the best option for large content, as it avoids excessive memory consumption.
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [Parameter(ParameterSetName='WGet',ValueFromPipelineByPropertyName=$true)]
+ [string]
+ $OutputPath,
+ # An output stream.
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [IO.Stream]
+ $OutputStream,
+
# If set, extracts Microdata out of a page
[Alias('Microdata')]
[Switch]$AsMicrodata,
+
+ # If set, extracts data attributes.
+ [switch]$DataAttribute,
# If set, will get back microdata from the page that matches an itemtype
[string[]]$ItemType,
@@ -129,375 +126,217 @@
[Switch]$MetaData,
# The MIME content type you're requesting from the web site
+ [Alias('CT')]
[string]$ContentType,
+ # A list of acceptable content types. These are used for the Accept header, and to compare the final content type to determine if it was unexpected
+ [string[]]$Accept,
+
# The credential used to connect to the web site
- [Parameter(ParameterSetName='Url',
- ValueFromPipelineByPropertyName=$true)]
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [Alias('Credential','C')]
[Management.Automation.PSCredential]
$WebCredential,
# If set, will use the default user credential to connect to the web site
- [Parameter(ParameterSetName='Url',
- ValueFromPipelineByPropertyName=$true)]
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
[switch]
- $UseDefaultCredential,
-
+ $UseDefaultCredential,
- # The HTTP method to use
- [Parameter(ParameterSetName='Url',
- ValueFromPipelineByPropertyName=$true)]
+ # The HTTP method.
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
[ValidateSet('GET','POST', 'PUT', 'DELETE', 'OPTIONS', 'HEAD', 'TRACE', 'CONNECT', 'MERGE')]
+ [Alias('M')]
[string]$Method = "GET",
# a hashtable of headers to send with the request.
- [Hashtable]$Header,
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [Alias('Headers','H')]
+ [Collections.IDictionary]$Header,
# The Request Body. This can be either a string, or bytes
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [PSObject]
$RequestBody,
+ # If set, will request the web site asynchronously, and return the results
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [Switch]
+ $Async,
+
+ # The Request String Encoding
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [ValidateScript({[Text.Encoding]::$_ -ne $null})]
+ [string]
+ $RequestStringEncoding = "UTF8",
+
+ # The signature message. This parameter is used with -SignatureKey, -SignaturePrefix, and -SignatureAlgorithmn to create an Authorization header.
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [string]
+ $Signature,
+
+ # The signature prefix. This will be appended before the computed authorization header.
+ # This parameter is used with -Signature, -SignaturePrefix, and -SignatureAlgorithmn to create an Authorization header
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [string]
+ $SignaturePrefix,
+
+ # The signature key. This is used to compute the signature hash. This can be either a byte array or a Base64 encoded string
+ # This parameter is used with -Signature, -SignatureKey, and -SignatureAlgorithmn to create an Authorization header
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [string]
+ $SignatureKey,
+
+ # The signature algorithmn is the hashing algirthmn that is used to compute a signature hash. The default is HMACSHA256
+ # This parameter is used with -Signature, -SignatureKey, and -SignaturePrefix to create an Authorization header
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [ValidateSet('MD5', 'SHA1', 'SHA256', 'SHA384', 'SHA512', 'HMAC', 'HMACSHA1','HMACSHA256')]
+ [string]
+ $SignatureAlgorithmn = 'HMACSHA256',
+
+ # If set, the signature will be URL encoded
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [Switch]
+ $EncodeSignature,
+
+ # One or more thumbprints for certificates
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [string[]]
+ $ThumbPrint,
+
# Any request ascii data. Data will be joined together with &, and will be sent in the request body.
+ [Parameter(ParameterSetName='Url',ValueFromPipelineByPropertyName=$true)]
+ [Alias('d')]
[string[]]
$Data,
- # If set, will use a the Net.WebRequest class to download. Otherwise, will use the xmlhttprequest.
- # Xmlhttprequest adds some extra headers and caches GET requests, so, if you wish to avoid this, -UseWebRequest.
- [Switch]
- $UseWebRequest,
+ [Parameter(Mandatory=$true,ParameterSetName='AsyncResponse',ValueFromPipelineByPropertyName=$true)]
+ [Alias('AsyncResult')]
+ [IAsyncResult]
+ $IASyncResult,
+
+ [Parameter(Mandatory=$true,ParameterSetName='AsyncResponse',ValueFromPipelineByPropertyName=$true)]
+ [PSObject]
+ $WebRequest,
# A Progress Identifier. This is used to show progress inside of an existing layer of progress bars.
- [int]
- $ProgressIdentifier,
+ [int]$ProgressIdentifier,
# If set, the server error will be turned into a result.
# This is useful for servers that provide complex error information inside of XML or JSON.
+ [Parameter(ValueFromPipelineByPropertyName=$true)]
[Switch]
$UseErrorAsResult,
# If set, then a note property will be added to the result containing the response headers
+ [Parameter(ValueFromPipelineByPropertyName=$true)]
[Switch]
$OutputResponseHeader,
# The amount of time before a web request times out.
+ [Parameter(ValueFromPipelineByPropertyName=$true)]
[Timespan]
$Timeout,
- # If set, will request the web site asynchronously, and return the results
- [Switch]
- $Async
- )
-
- begin {
- #region Escape Special Characters
- $replacements = @{
- "
" = "
"
- "