swec

#!/usr/bin/perl
# swec - Simple Web Error Checker
# Copyright (C) Eskild Hustvedt 2008, 2009, 2010
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

use strict;
use warnings;
# Used to fetch data
use LWP::UserAgent;
# Used for cookie support
use HTTP::Cookies;
# Used for printing the error listing to the user
use Data::Dumper;
# Used for commandline parsing
use Getopt::Long;
# Used for session support
use Storable;
# OS detection
use POSIX qw(uname);
# Need these to locate the default.sdf
use Cwd qw(realpath);
use File::Basename qw(dirname);
# Makes code easier to read
use constant {
	true => 1,
	false => 0,
};

# Signal handling
$SIG{INT} = \&ErrorReport;
$SIG{TERM} = \&ErrorReport;

# The version number
my $VERSION = '0.5';
# The base URI
my $baseURI;
# The same in regex
my $baseURIRegex;
# True if we have run the error report. We don't set this to false until
# after we've started processing pages, no need to display an empty
# report.
my $hasRunErrorRep = true;
# The base user agent string
my $BaseUA = 'SWEC/'.$VERSION.' (%OS%; Simple Web Error Checker/LWP; spider/crawler) giveme ';
# Appended to the user agent, can be changed with commandline options
my $GiveMeUA = 'Mozilla/5.0 Firefox Gecko';
# The logfile
my $logfile;
# Debug logging level
my $debugLog = 0;
# The regex of filenames to ignore
my $ignoreregex = '\.(png|exe|run|gif|pdf|phps|deb|rpm|bin|jpe?g|css|bz2|gz|tar|tgz|zip|rar|odt|odp|doc|psp|swf|js|patch|vim)';
# The session hashref, if we have a session
my $session;
# The session filename, if we have a session
my $session_file;

# -- Global arrays --

# This is an array that we use in order to not have to sort the %Checks hash
# each and every time it is run. This should never be put into a session,
# but be resorted on each load.
# It's simply an array of names.
my @DefaultSortedChecks;
# The same, but for URL tests
my @URLSortedChecks;
# List of loaded SDF files
my @LoadedSDF;
# List of page seeds
my @PageSeeds;
# List of page seeds to POST to
my @PageSeedsPost;
# Logged messages, pushed to by toLog()
my @loggedMessages;


# -- Core settings, and their defaults --
my %settings = (
	SDFFile => undef,		# String, path to an SDF file (XXX: not in use)
	HardErrorMax => 5,		# Int, the hard number of max errors that can occur on a page type before we
							#  assume all pages of that type has errors.
	skipHEAD => false,		# Boolean, true if we should skip the HEAD requests
	Debug => false,			# Boolean, debugging is off by default
	OnlyNewErrors => false,	# Boolean, true if we should only print information about new errors
	keepGoing => false,		# Boolean, true if we should just keep on parsing links from pages with errors
	htmlInfo => false,		# Boolean, true if we don't have HTML::LinkExtractor but have HTML::Entities
	uriEncode => false,		# Boolean, true if we have URI::Encode
	checkSubDomains => false,# Boolean, true if we should allow subdomains in the URIRegex
	noIgnores => false,		# Boolean, true if default --matchmax ignores should not be added
	disabledLibs => {},     # Hashref of libs that SWEC should not use   
);

# -- Global hashes --

# Filter checks, loaded from an SDF file.
# See the manpage for a definition of the SDF file format.
# The syntax of this hash is:
# NAME => {
#	sdf_setting => sdf_value,
# }
# Checks will be present in this hash, even if they are suppose to be ignored
# (ie. it exists in the %IgnoreChecks hash), but will be skipped at runtime.
my %Checks;

my (
	%MatchMax,		# Hash of expression => setting pairs. See addMatchMax()
	%HasRun,		# Hash of url => true pairs of URLs that have been processed
	%HasPrinted,	# Hash of url path => true pairs of URLs that have been printed
	%CurrLinks,		# Hash of url => true pairs of URLs that are known not to have been processed
	%ErrorPages,	# Hash of url => error pairs
	%ErrorCount,	# Hash of url => number of errors
	%PageCount,		# Hash of url => number of pages fetched
	%IgnoreChecks,	# Hash of SWEC/SDF checks to ignore
	%wasPageSeed,	# Hash of url => true pairs of URLs that were seeds
);
my %MatchedMax = (
	seen => {},			# If we have seen a URL previously, its status is saved here
	checkCount => {},	# Contains how many times an expression has matched
);

# -- Global objects --

# The LWP::UserAgent object
my($UA);
# HTML::LinkExtractor object
my $EX;

# == Begin main app ==

# Display the help screen if no args were supplied.
help() if not @ARGV;

# Commandline parsing
GetOptions
(
	'ie' => sub {
		$GiveMeUA = 'Mozilla/5.0 MSIE Trident';
	},
	'opera' => sub {
		$GiveMeUA = 'Opera Presto';
	},
	'chrome' => sub
	{
		$GiveMeUA = 'Mozilla/5.0 AppleWebKit (KHTML, like Gecko) Chrome Safari';
	},
	'android' => sub
	{
		$GiveMeUA = 'Mozilla/5.0 Android AppleWebKit (KHTML, like Gecko) Mobile Safari';
	},
	'ua=s' => sub {
		shift;
		$BaseUA = shift;
		$GiveMeUA = ' (SWEC - Simple Web Error Checker-perl/LWP; spider/crawler)';
	},
	'session=s' => \$session_file,
	'i|sessnam=s' => sub {
		shift;
		my $nam = shift;
		$session_file = $nam.'.ses';
		$logfile = $nam.'.log';
	},
	's|urlseed=s' => sub {
		shift;
		my $u = shift;
		push(@PageSeeds,$u);
	},
	'postseed=s{2}' => \@PageSeedsPost,
	'm|matchmax=s' => sub {
		shift;
		my $match = shift;
		my $no = $match;
		my $reversed = 0;
		if(not $match =~ s/=\d+$//)
		{
			swecDie(10,"Failed to parse commandline: --matchmax $match\n");
		}
		if(not $no =~ s/^.*=(\d+)$/$1/)
		{
			swecDie(10,"Failed to parse commandline: --matchmax $no\n");
		}
		if ($match =~ s/^!//)
		{
			$reversed = 1;
		}
		$_ = '';
		eval
		{
			/$match/
		};
		if ($@)
		{
			swecDie(10,"Regular expression error in --matchmax: $@");
		}
		addMatchMax($match,$no,$reversed);
	},
	'b|baseurl|baseuri=s' => \$baseURI,
	'l|log=s' => \$logfile,
    'debuglog+' => \$debugLog,
	'e|harderrorlimit=i' => \$settings{HardErrorMax},
	'parsedump=s' => sub {
		shift;
		my $file = shift;
		LoadSDFv2($file,\%Checks);
		$Data::Dumper::Sortkeys = true;
		$Data::Dumper::Varname = 'SDF_PARSE_DUMP';
		$Data::Dumper::Terse = true;
		$Data::Dumper::Indent = 1;
		print Dumper(\%Checks);
		exit(0);
	},
	'validate=s' => sub {
		shift;
		my $file = shift;
		local $| = true;
		LoadSDFv2($file,\%Checks);
		print "$file syntax OK\n";
		exit(0);
	},
	'debuginfo' => sub {
		debugInfo();
	},
	'x|exclude=s' => sub {
		shift;
		my $t = shift;
		$IgnoreChecks{$t} = true;
	},
	'checksub|checksubdomains' => \$settings{checkSubDomains},
	'keepgoing' => \$settings{keepGoing},
	'nohead' => \$settings{skipHEAD},
	'noignores' => \$settings{noIgnores},
	'lwphead' => \&LWP_CommandlineEmu,
	'lwpget' => \&LWP_CommandlineEmu,
	'onlynew' => sub {
		$settings{OnlyNewErrors} = true,
	},
	'disable-lib|disablelib=s' => sub {
		shift;
		my $lib = shift;
		$settings{disabledLibs}->{$lib} = true;
	},
	'h|help' => sub { help(); },
	'help-all' => sub { help(1) },
	'version' => sub { print "SWEC version $VERSION\n"; exit 0; },
) or exit(10);

main();

# Purpose: Main program
# Usage: main();
sub main
{
	# Initialize session
	if(defined $session_file and not -e $session_file)
	{
		print "$session_file: does not exist. Assuming new session.\n";
		$session = {};
	}
	else
	{
		loadSession();
	}
	# Prepare the baseURI
	prepareBaseURI();
	setTermTitle('swec ['.$baseURI.']');
	# Prepare URL seeds
	prepareSeeds();
	# Load the SDF data
	AutoLoadSDF();
	# Initialize LWP
	InitLWP();
	# Initialize our optional libs
	initOptionalLibs();

	if(not $settings{noIgnores})
	{
		foreach my $def (qw(logout delete))
		{
			if(not defined $MatchMax{$def})
			{
				addMatchMax($def,0,false);
			}
		}
	}

	# Purge STDOUT at once
	$| = true;

	# We've begun processing, so now errors should be shown
	$hasRunErrorRep = false;

	# First fetch the post seeds
	if (@PageSeedsPost)
	{
		initPageSeedsPost();
		FetchLinks(\@PageSeedsPost,true)
	}

	# Then fetch the normal seeds
	FetchLinks(\@PageSeeds);

	# Load data from the session if we have one
	if ($session)
	{
		my %ErrorLinks;
		# Import errors from session into ErrorLinks and run through those pages.
		# (links found inside those pages will be appended to CurrLinks).
		SessionImportLinks(\%ErrorLinks,true);
		if(keys %ErrorLinks)
		{
			FetchLinks(\%ErrorLinks);
		}
		# Now import all other links saved in the session into CurrLinks
		SessionImportLinks(\%CurrLinks,false);
	}
	# Finally, loop until there are no links left
	while(keys %CurrLinks)
	{
		FetchLinks(\%CurrLinks);
	}
	# And output the error report
	ErrorReport();
}

# Purpose: Initialize our seeds
# Usage: prepareSeeds();
sub prepareSeeds
{
	my @newSeeds;
	foreach my $u (@PageSeeds)
	{
		my $seed = getFullSeedURL($u);
		if(not $seed)
		{
			swecDie(10,"--urlseed '$u' does not contain http:// and no --baseurl was found..\n");
		}

		push(@newSeeds,$seed);
	}
	@PageSeeds = @newSeeds;
}

# Purpose: Initialize a single seed
# Usage: $u = getFullSeedURL();
sub getFullSeedURL
{
	my $url = shift;
	if(not $url =~ /^http/)
	{
		if ($baseURI)
		{
			$url = $baseURI.'/'.$url;
			$url =~ s#/+#/#g;
			$url =~ s#^(https?):/#$1://#g;
		}
		else
		{
			$url = undef;
		}
	}
	return $url;
}

# Purpose: Initialize PageSeedsPost
# Usage: initPageSeedsPost();
sub initPageSeedsPost
{
	my @result;
	for(my $i = 0; $i < @PageSeedsPost; $i++)
	{
		my $u = $PageSeedsPost[$i];
		my $url = getFullSeedURL($u);
		if(not $url)
		{
			swecDie(10,"--urlseed '$u' does not contain http:// and no --baseurl was found..\n");
		}
		$i++;
		my $data = $PageSeedsPost[$i];
		my $resultingData = {};

		foreach my $l (split(/\&/,$data))
		{
			my $key = $l;
			my $value = $l;
			$key =~ s/^([^=]+)=.*/$1/;
			$value =~ s/^[^=]+=(.*)/$1/;
			if ($key eq $l or $value eq $l)
			{
				swecDie(10,"Failed to parse data pair for --postseed $url: \"$l\"");
			}
			$resultingData->{$key} = $value;
		}
		push(@result,$url,$resultingData);
	}
	@PageSeedsPost = @result;
}

# Purpose: Initialize the base URI
# Usage: prepareBaseURI();
sub prepareBaseURI
{
	# If we make an assumption about something, this should be set to true
	# and we will output a message about it at the end of this block.
	my $madeAssumption = false;
	my $useURLSeed;
	# If we don't have a $baseURI then we need to find one
	if(not $baseURI)
	{
		# Fetch from session if present
		if ($session and $session->{baseuri})
		{
			$baseURI = $session->{baseuri};
		}
		else
		{
			# Try to fetch from parameters
			if (@ARGV)
			{
				$baseURI = shift(@ARGV);
				$madeAssumption = true;
			}
			# Give up
			if (not $baseURI)
			{
				swecDie(10,"You must supply a --baseurl\n");
			}
			if(not @PageSeeds)
			{
				$useURLSeed = $baseURI;
				$baseURI =~ s{^((\w+\:+/+)?[^/]+).*}{$1};
				# If our baseURI is the same as the URL seed here, then 
				# we don't have additional URI bits, so don't set useURLSeed.
				if($baseURI eq $useURLSeed)
				{
					$useURLSeed = undef;
				}
			}
		}
	}

	# Make sure that it starts with something://, if not, assume http://
	if(not $baseURI =~ m#^\w+://#)
	{
		$baseURI = 'http://'.$baseURI;
		$madeAssumption = true;
	}
	prepareBaseURIRegex();
	# Output a message that says we assumed something
	# Ensure we have at least one page seed and load from the session if needed
	if(not @PageSeeds)
	{
		if ($session and keys(%{$session->{pageSeeds}}))
		{
			@PageSeeds = @{$session->{pageSeeds}};
		}
		else
		{
			if ($madeAssumption)
			{
				print "Assuming --baseurl $baseURI\n";
			}
			if ($useURLSeed)
			{
				$useURLSeed =~ s{^\w+://[^/]+}{};
				$useURLSeed =~ s{/+}{/};
				if(not $useURLSeed =~ m{^/})
				{
					$useURLSeed = '/'.$useURLSeed;
				}
				print "Assuming --urlseed $useURLSeed\n";
				push(@PageSeeds,$useURLSeed);
			}
			else
			{
				print "Assuming --urlseed /\n";
				push(@PageSeeds,'/');
			}
		}
	}
	if($session and not keys %MatchMax and $session->{MatchMax})
	{
		%MatchMax = %{$session->{MatchMax}};
	}
}

# Purpose: Initialize the base URI regex
# Usage: prepareBaseURIRegex();
sub prepareBaseURIRegex
{
	my $scheme = $baseURI;
	my $domain = $baseURI;
	$domain =~ s{^\w+://}{};
	$scheme =~ s{^(\w+://).*}{$1};
	if (not $scheme)
	{
		$scheme = 'http://';
	}
	if ($settings{checkSubDomains})
	{
		$baseURIRegex = qr{($scheme)?([^/]+\.)?$domain};
	}
	else
	{
		if ($domain =~ /^www/)
		{
			$baseURIRegex = qr/($scheme)?$domain/;
		}
		else
		{
			$baseURIRegex = qr/($scheme)?(www\.)?$domain/;
		}
	}
}

# Purpose: Set the title of the xterm/screen
# Usage: setTermTitle(TITLE);
sub setTermTitle
{
	return if not defined $ENV{TERM};
	my $title = shift;
	if ($ENV{TERM} =~ /screen/)
	{
		print "\033k$title\033\\";
	}
	else
	{
		print "\033]0;$title\007";
	}
}

# Purpose: Output information useful for debugging
# Usage: debuginfo();
sub debugInfo
{
	eval('use Digest::MD5;');
	print "SWEC version $VERSION\n";
	my $md5 = Digest::MD5->new();
	my $loc = realpath($0);
	open(my $f,'<',$loc);
	$md5->addfile($f);
	my $digest = $md5->hexdigest;
	close($f);
	my $fmt = '%-20s: %s'."\n";
	printf($fmt,'MD5',$digest);
	setupUAString();
	printf($fmt,'User agent',$BaseUA.$GiveMeUA);
	printf($fmt,'Ignore regex',$ignoreregex);
	if(keys %{$settings{disabledLibs}})
	{
		printf($fmt,'Libs disabled by user',join(' ',keys %{$settings{disabledLibs}}));
	}
	my $parser;
	my $uriEncode;
	if(tryUse('HTML::LinkExtractor'))
	{
		$parser = 'HTML::LinkExtractor';
		printf($fmt,'HTML::LinkExtractor','available ('.$HTML::LinkExtractor::VERSION.')');
	}
	else
	{
		printf($fmt,'HTML::LinkExtractor','not available');
		if(tryUse('HTML::Entities'))
		{
			$parser = 'builtin with HTML::Entities';
			printf($fmt,'HTML::Entities','available ('.$HTML::Entities::VERSION.')');
		}
		else
		{
			$parser = 'builtin without HTML::Entities';
			printf($fmt,'HTML::Entities','not available');
		}
	}
	printf($fmt,'Parser',$parser);
	if(tryUse('URI::Encode'))
	{
		printf($fmt,'URI::Encode','available ('.$URI::Encode::VERSION.')');
		$uriEncode = 'URI::Encode';
	}
	else
	{
		printf($fmt,'URI::Encode','not available');
		$uriEncode = '(none)';
	}
	printf($fmt,'URI encoder',$uriEncode);
	exit(0);
}

# Purpose: Output the help screen and exit
# Usage: help();
sub help
{
	my $all = shift;
	print "swec - Simple Web Error Checker version $VERSION\n\n";
	print "USAGE: swec [OPTIONS]\n";
	print " OR  : swec ([OPTIONS]) [HOST]\n\n";
	if ($all)
	{
		PrintHelp('','--help','Print the normal help screen');
		PrintHelp('','--help-all','Print this help screen');
		PrintHelp('','--version','Print version information and exit');
	}
	else
	{
		PrintHelp('','--help','Print this help screen');
		PrintHelp('','--help-all','Print an extended help screen with advanced options');
	}
	PrintHelp('','--ie','Pretend to be IE (default: Firefox)');
	PrintHelp('','--opera','Pretend to be Opera (default: Firefox)');
	PrintHelp('','--chrome','Pretend to be Google Chrome (default: Firefox)');
	PrintHelp('','--android','Pretend to be an Android phone (default: Firefox)');
	PrintHelp('','--ua','Manually specify user agent string');
	PrintHelp('-b','--baseurl','The base URL. Will only check URLs matching it and will use it as the domain/url for pages that have relative paths');
	PrintHelp('-s','--urlseed','The URL seed, the first URL to check. Can be supplied multiple times, order does not matter. Supply -b before -s in order to be able to supply relative paths');
	PrintHelp('-l','--log','Write a log to this file');
	PrintHelp('','--session','Save and load session from the file supplied. See the manpage for more information on sessions.');
	PrintHelp('-i','--sessnam','Takes a FILE argument. Short for --session FILE.ses --log FILE.log');
	PrintHelp('-x','--exclude','Exclude the test ID supplied');
	PrintHelp('','--nohead','SWEC sends a HEAD request before each GET request to detect binary files. This makes SWEC skip the HEAD request and rely on file name filtering to detect binary files.');
	PrintHelp('','--lwpget, --lwphead','SWEC equivalents of the lwp-request GET and HEAD commands. See the manpage for more information');
	PrintHelp('','--keepgoing','Parse a page for links even if it contained errors');
	if ($all)
	{
		PrintHelp('','--postseed','Works like --urlseed but takes a second parameter; a string of key-value pairs that will be POSTed. See the manpage for more information.');
		PrintHelp('','--checksub','Allow SWEC to follow and check links to subdomains of the --baseurl');
		PrintHelp('-e','--harderrorlimit','Change the hard error page limit, the max amount of pages of any link type to request before deciding that all pages of that type will return errors (and thus not request any more) (default: 5)');
		PrintHelp('-m','--matchmax','Takes a parameter in the form: [REGEXP]=COUNT. [REGEXP] will be matched against all URLs, and SWEC will never test more than COUNT URLs that matches [REGEXP]');
		PrintHelp('','--validate','Takes a FILE argument. Validate the syntax of the SDF file supplied');
		PrintHelp('','--parsedump','Takes a FILE argument. Load the SDF file supplied and dump the parsed data structure to STDOUT. Useful for debugging problems with regular expressions, or simply to see how SWEC sees your SDF file');
		PrintHelp('','--disable-lib','Takes a single argument, the library to skip using. See the manpage for details.');
		if(InPath('perldoc'))
		{
			print "\nFor more complete documentation, run: perldoc $0\n";
		}
		elsif(InPath('man'))
		{
			print "\nFor more complete documentation, run: man swec\n";
		}
	}
	exit(0);
}

# Purpose: Get the real URL for the URL pair supplied
# Usage: my $URL = GetRealUrl(PARENT, CHILD);
sub GetRealUrl
{
	my $URL = shift;
	my $parent = shift;
	my $return;
	if( $URL =~ m#^(https?)# )
	{
		return $URL;
	}
	elsif(not $URL =~ m{^/})
	{
		my $parentPath = getBaseURL($parent);
		$parentPath =~ s/^https?:\/\/[^\/]+//;
		if ($parentPath =~ /\/$/)
		{
			$return = $parentPath.$URL;
		}
		elsif($URL =~ /^\?/)
		{
			$parentPath =~ s/\?.*//;
		}
		elsif ($parentPath =~ /\w+\.\w+$/)
		{
			$parentPath =~ s/\/+\w+\.\w+$//;
		}
		$return = $parentPath.$URL;
	}
	else
	{
		$return = $URL;
	}
	if ($parent =~ m{^http})
	{
		$parent =~ s{^(https?://[^/]+).*}{$1};
		$return = $parent.'/'.$return;
	}
	else
	{
		$return = $baseURI.'/'.$return;
	}
	return $return;
}

# Purpose: Emulate the commandline GET and HEAD lwp commands
# Usage: \&LWP_CommandlineEmu in a parameter
sub LWP_CommandlineEmu
{
	# We need to set hasRunErrorRep to true, so if the user presses ctrl+c
	# we won't run a useless error report.
	$hasRunErrorRep = 1;

	my $arg = shift;
	InitLWP();
	my $type = 'head';
	if ($arg =~ /get/)
	{
		$type = 'get';
	}
	foreach my $url (@ARGV)
	{
		next if $url =~ /^\-\-\w+$/;
		$url = ($url =~ m{^\w\w+://}) ? $url : 'http://'.$url;
		print "\n--\n$url:\n--\n";
		my $r = LWP_Request($type,$url);
		print $r->headers_as_string;
		if ($type eq 'get')
		{
			print "\n";
			print $r->content;
			if(not $r->content =~ /\n$/)
			{
				print "\n";
			}
		}
	}
	exit(0);
}

# Purpose: Prepare the UA string
# Usage: setupUAString();
sub setupUAString
{
	# Set the UA string
	my @UNAME = uname();
	my $name = $UNAME[0] eq 'Linux' ? 'GNU/Linux' : $UNAME[0];
	$BaseUA =~ s/%OS%/$name/g;
}

# Purpose: Check for a file in path
# Usage: InPath(FILE)
sub InPath
{
	foreach (split /:/, $ENV{PATH}) { if (-x "$_/@_" and not -d "$_/@_" ) {	return 1; } } return 0;
}

# Purpose: Initialize LWP
# Usage: InitLWP();
sub InitLWP
{
	setupUAString();
	# Create our LWP::UserAgent object, used for HTTP requests
	$UA = LWP::UserAgent->new(
		'agent' => $BaseUA.$GiveMeUA
	);
	# We need to support cookies
	$UA->cookie_jar(HTTP::Cookies->new());
}

# Purpose: Persistant wrapper around LWP. Will re-request a URL three times with
#   three second intervals if needed
# Usage: LWP_Request('type',URL);
# type is either head or get
sub LWP_Request
{
    my($type,$URL,$postData) = @_;
    my $r;
	my $success = 0;
    for(my $l = 0; $l < 3; $l++)
    {
        if ($l != 0)
        {
            if ($l == 1)
            {
                print "connection error - ";
            }
            print "retrying... ";
            sleep(3);
        }
        if ($type eq 'head')
        {
            $r = $UA->head($URL);
        }
        elsif($type eq 'get')
        {
            $r = $UA->get($URL);
        }
		elsif($type eq 'post')
		{
            printd(2,'Posting '.Dumper($postData).' to '.$URL) if $debugLog;
            $r = $UA->post($URL,$postData);
		}
        else
        {
            swecDie(9,"LWP_Request(): unknown type: $type");
        }
        if (not (($r->is_error) && ($r->status_line =~ /(reset by peer|Connection refused|Can't connect)/i)))
        {
			$success = 1;
            last;
        }
    }
	if ($success)
	{
		return $r;
	}
	else
	{
		return;
	}
}

# Purpose: Dump an arrayref or hashref to STDOUT with a pretty format
# Usage: print swecDumper(someRef);
sub swecDumper
{
	my $ref = shift;
	my $l = "\n";
	if (ref($ref) eq 'HASH')
	{
		foreach my $h (keys %{$ref})
		{
			$l .= sprintf('%-30s => %s'."\n",$h,$ref->{$h});
		}
	}
	elsif(ref($ref) eq 'ARRAY')
	{
		foreach my $line (@{$ref})
		{
			$l .= $line;
		}
	}
	return $l;
}

# Purpose: Output the final error report and logfile, also call session writing functions
# Usage: ErrorReport();
sub ErrorReport
{
	my %Errors = %ErrorPages;
	my $rep;
	exit(0) if $hasRunErrorRep;

	my $returnVal = 0;

	$hasRunErrorRep = true;
	print "\n---\n\n";
	$rep .= $baseURI.":\n";
	$rep .= 'Out of '.scalar(keys(%HasRun)).' pages fetched, '.scalar(keys(%ErrorPages)).' had errors'."\n";
	# Only include 'base page' info if it differs from the above
	if(scalar(keys(%PageCount)) !=  scalar(keys(%HasRun)))
	{
		$rep .= 'Out of '.scalar(keys(%PageCount)).' base pages checked '.scalar(keys(%ErrorCount)).' had errors'."\n";
	}
	if ($settings{OnlyNewErrors} && $session)
	{
		SesRemoveOldErrors(\%Errors);
	}
	if(keys %Errors)
	{
		appendToSession('ErrorPages',\%Errors);
		$rep .= swecDumper(\%Errors);
		$returnVal = 1;
	}
	else
	{
		$rep .= "\nNo errors to report.\n";
	}
	print $rep;
	if ($session)
	{
		if(keys %ErrorPages)
		{
			appendToSession('LastRawErrorPages',\%ErrorPages,true);
		}
		appendToSession('lasthumanlog',$rep);
		appendToSession('baseuri',$baseURI);
		appendToSession('logfile',$logfile);
		appendToSession('pageSeeds',\@PageSeeds);
		appendToSession('HasRun',\%HasRun);
		appendToSession('HardErrorMax',$settings{HardErrorMax});
		appendToSession('BaseUA',$BaseUA);
		appendToSession('GiveMeUA',$GiveMeUA);
		appendToSession('LoadedSDF',\@LoadedSDF);
		appendToSession('MatchMax',\%MatchMax);
		if ($settings{Debug})
		{
			appendToSession('Debug_SDF',\%Checks,true);
		}
		writeSession();
	}
	if ($logfile)
	{
		open (my $rf,'>',$logfile) or swecDie(10,'Failed to open '.$logfile.' for writing: '.$!);
		print {$rf} $rep;
        if (@loggedMessages)
        {
            print {$rf} "Log messages:\n";
            print {$rf} join("\n",@loggedMessages);
        }
		print {$rf} "\n\nDumps:\n";
		print {$rf} 'Used UA: '. $BaseUA.$GiveMeUA ."\n";
		print {$rf} "HasRun:\n".swecDumper(\%HasRun);
		print {$rf} "\nErrorCount:\n".swecDumper(\%ErrorCount);
		print {$rf} "\nPageCount:\n".swecDumper(\%PageCount);
		print {$rf} "\nSeeds:\n".swecDumper(\@PageSeeds);
		close($rf);
		print "Log written to $logfile\n";
	}
	exit($returnVal);
}

# Purpose: Parse HTML entities, regardless of which parser is available
# Usage: my $parsed = htmlParseEntities($str);
sub htmlParseEntities
{
	my $str = shift;
	if ($settings{htmlInfo})
	{
		return decode_entities($str);
	}
	$str =~ s/&amp;/&/gi;
	return $str;
}

# Purpose: Get links from a string, regardless of which link extractor is in use
# Usage: my $links = getLinksFrom($string);
# Returns an array of hashrefs in the form { href => link };
sub getLinksFrom
{
	my $data = shift;
	if ($EX)
	{
		$EX->parse(\$data);
        return $EX->links;
	}
	else
	{
		my @links;
		$data =~ s/(\n|\r)/ /g;
		my @all = split(/(<|>)/,$data);
		foreach my $l (@all)
		{
			next if not $l =~ /^a\s/i;
			if($l =~ s/.*href="([^"]+)".*/$1/i || $l =~ s/.*href='([^']+)'.*/$1/i)
			{
				push(@links,{ href => htmlParseEntities($l) });
			}
		}
		return \@links;
	}
}

# Purpose: Try using the lib supplied, returning 1 on success 0 on failure
# Usage: tryUse('Some::lib qw(something)');
sub tryUse
{
	my $lib = shift;
	my $check = $lib;
	$check =~ s/\s+.+//;
	if (!$settings{disabledLibs}->{$lib} && eval('use '.$lib.';1;'))
	{
		return 1;
	}
	return 0;
}

# Purpose: Initialize the link extractor and URI encoder
# Usage: initOptionalLibs();
sub initOptionalLibs
{
	if(tryUse('HTML::LinkExtractor'))
	{
		$EX = HTML::LinkExtractor->new();
	}
	else
	{
		if ($settings{disabledLibs}->{'HTML::LinkExtractor'})
		{
			warn("Note: Use of HTML::LinkExtractor disabled\n");
		}
		if(tryUse('HTML::Entities qw(decode_entities)'))
		{
			$settings{htmlInfo} = true;
		}
		else
		{
			if ($settings{disabledLibs}->{'HTML::LinkExtractor'})
			{
				warn("Warning: Use of HTML::Entities disabled, will use (less reliable) alternate entity parsing\n");
			}
			else
			{
				warn("Warning: HTML::Entities missing, falling back to (less reliable) alternate entitiy parsing\n");
			}
		}
	}
	if(tryUse('URI::Encode qw(uri_encode)'))
	{
		$settings{uriEncode} = true;
	}
	else
	{
		if ($settings{disabledLibs}->{'URI::Encode'})
		{
			warn("Warning: Use of URI::Encode disabled, URIs will be submitted unencoded\n");
		}
		else
		{
			warn("Warning: URI::Encode missing, URIs will be submitted unencoded\n");
		}
	}
}

# Purpose: URI::Encode wrapper. It becomes a noop when URI::Encode is missing
# Usage: uri = encodeURI(uri);
sub encodeURI
{
	my $uri = shift;
	if ($settings{uriEncode})
	{
		return uri_encode($uri);
	}
	return $uri;
}

# Purpose: Extract links from a page and insert them into %CurrLinks
# Usage: ExtractLinks(CONTENTS_TO_EXTRACT_FROM, URL_OF_CONTENT);
sub ExtractLinks
{
	my $contents = shift;
	my $url = shift;
	# Used to ensure we only process unique URLs
	my %seen;

	# This enables support for the // shortcut (which is short for the protocol
	# 	used, so either http:// or https://, we extract it from the url supplied)
	my $baseProto = $url;
	$baseProto =~ s#^(\w+://).+#$1#;
	$contents =~ s{href=(["'])//}{href=$1$baseProto}g;

	my $links = getLinksFrom($contents);
	foreach my $l (@{$links})
	{
		my $href = $l->{href};
		next if not $href;
		my $origHref = $href = encodeURI($href);
		next if $href =~ /^(mailto|javascript|ftp|irc|feed|tel):/i;
		next if $href eq '#';
		next if $href =~ /$ignoreregex$/;
		$href =~ s/\#.*$//;
		if(not $wasPageSeed{$href})
		{
			$href = GetRealUrl($href,$url);
		}
		if(not $href =~ m#^\w+://#)
		{
			$href = $baseURI.'/'.$href;
		}
		$href =~ s#/+#/#g;
		$href =~ s#^(https?):/([^/]+)/?#$1://$2/#g;
		my $base = getBaseURL($href);
		# Don't fetch a page that has had errors more than $settings{HardErrorMax} times
		if (defined $ErrorCount{$base} && $ErrorCount{$base} > $settings{HardErrorMax})
		{
			next;
		}
		next if $seen{$href};
		next if not shouldCheckURL($href);
		$seen{$href} = 1;

		# Run checks
		my $err;
        foreach my $testURL ($origHref,$href)
        {
            foreach my $en (@URLSortedChecks)
            {
                printd(2,'Running check '.$en.' on URL '.$testURL) if $debugLog;
                # Get a ref to the hash
                my $ec = $Checks{$en};
                foreach my $regex (@{$ec->{regexes}})
                {
                    if ($testURL =~ $regex)
                    {
                        $err = $ec->{error};
                        last;
                    }
                }
                if ($err)
                {
                    $err =~ s/%PARENT%/$url/g;
                    AddError($testURL,$err,$en,true);
                    last;
                }
            }
            last if $err;
        }
		if(not $err)
		{
			$CurrLinks{$href} = $url;
		}
	}
}

# Purpose: Get the base URL of a page
# Usage: $url = getBaseURL($url);
sub getBaseURL
{
	my $URL = shift;
	$URL =~ s/\#.*$//;
	$URL =~ s/\?.*$//;
	return $URL;
}

# Purpose: Add an error to the list
# Usage: AddError(URL, ERROR, TEST, SILENT?);
# If SILENT is true it will not print "error\n";
sub AddError
{
	my $URL = shift;
	my $error = shift;
	my $test = shift;
	my $silent = shift;
    printd(2,'Adding error: '.$error.' on test '.$test.' for URL '.$URL) if $debugLog;
	if ($IgnoreChecks{$test})
	{
		if(not $silent)
		{
			print "error - ignored\n";
		}
		return;
	}
	my $base = getBaseURL($URL);
	$ErrorPages{$URL} = $error.' ['.$test.']';
	$ErrorCount{$base}++;
	if(not $silent)
	{
		print 'error ['.$test."]\n";
	}
}

# Purpose: Check if we should run checks on the URL supplied
# Usage: bool = shouldCheckURL(URL);
sub shouldCheckURL
{
	my $URL = shift;
	# Base check
	if 	(
			(defined $HasRun{$URL}) or
			(not $URL =~ /^$baseURIRegex/)
		)
	{
		return false;
	}
	if (defined $MatchedMax{seen}->{$URL})
	{
		return $MatchedMax{seen}->{$URL};
	}
	# Matchmax check
	foreach my $check (keys %MatchMax)
	{
		my $result = 0;
		if (not $MatchMax{$check}->{reversed} and $URL =~ /$check/)
		{
			$result = 1;
		}
		elsif ($MatchMax{$check}->{reversed} and not $URL =~ /$check/)
		{
			$result = 1;
		}

		if ($result)
		{
			$MatchedMax{checkCount}->{$check}++;
			if ($MatchMax{$check}->{max} < $MatchedMax{checkCount}->{$check})
			{
				$MatchedMax{seen}->{$URL} = false;
				return false;
			}
			$MatchedMax{seen}->{$URL} = true;
		}
	}
	return true;
}

# Purpose: Add a 'MatchMax' test
# Usage: addMatchMax('expression',max,reversed?);
sub addMatchMax
{
	my $expression = shift;
	my $max = shift;
	my $reversed = shift;
	eval
	{
		$expression = qr/$expression/;
		1;
	} or do {
		swecDie('Failed to compile regular expression /'.$expression.'/: '.$@);
	};

	$MatchMax{$expression} = {
		max => $max,
		reversed => $reversed,
	};
	$MatchedMax{checkCount}->{$expression} = 0;
}

# Purpose: Loop over a hash or array of links and fetch each one in turn, running checks on the content recieved
# Usage: FetchLinks(\%hashOfLinks OR \@arrayOfLinks);
sub FetchLinks
{
	my $source = shift;
	my $postEnabled = shift;
	if (ref($source) eq 'ARRAY')
	{
		# We need a hash for FetchLink
		my $fakeSource = {};
		for(my $i = 0; $i < scalar(@{$source}); $i++)
		{
			my $URL = $source->[$i];
			my $postData;
			if ($postEnabled)
			{
				$i++;
				$postData = $source->[$i];
			}
			$fakeSource->{$URL} = 'seed';
			$wasPageSeed{$URL} = true;
			FetchLink($URL,$fakeSource,$postData);
		}
	}
	elsif(ref($source) eq 'HASH')
	{
		swecDie(9,'Fatal: FetchLinks got hashref in postEnabled mode') if $postEnabled;
		foreach my $URL(sort keys %{$source})
		{
			FetchLink($URL,$source);
		}
	}
	else
	{
		swecDie(9,'Fatal: FetchLinks got unknown reference: '.ref($source));
	}
}

# Purpose: Loop over a hash of links and fetch each in turn, running checks on the content recieved
# Usage: FetchLink(\%hashOfLinks);
sub FetchLink
{
	my $URL = shift;
	my $source = shift;
	my $postData = shift;
	# XXX: Check if shouldCheckURL needs special handling for postData
	if (not shouldCheckURL($URL))
	{
		delete($source->{$URL});
		return;
	}
	my $base = getBaseURL($URL);
	$PageCount{$base}++;

	# Generate a pretty URL for printing
	my $prettyURL = $URL;
	$prettyURL =~ s/^https?:\/\/[^\/]+//;
	# If we are in subdomain mode, or if we have already printed an identical
	# URL, only remove the http bit but keep the domain.
	if ($settings{checkSubDomains} or $HasPrinted{$prettyURL})
	{
		$prettyURL = $URL;
		$prettyURL =~ s{^https?://}{};
	}
	# Mark as 'has been printed' 
	$HasPrinted{$prettyURL} = true;

    printf('- %-30s : ',$prettyURL);
	$HasRun{$URL} = $source->{$URL};

	my @realError;
	my $genericError = 'Failed to fetch page. LWP said: "%s" - page was referenced by: %s';
	my $c;

	if(not $settings{skipHEAD})
	{
		$c = LWP_Request('head',$URL);
		if(not defined $c)
		{
				AddError($URL,'Failed to connect to server','SWEC_INT_CONNECTIONERROR');
				return;
		}
		elsif(not $c->is_success)
		{
			if(not ref($c) eq 'HTTP::Response')
			{
				@realError = ($URL,'Failed to fetch page and LWP returned a strange object of type '.ref($c).'. This is possibly a bug, either in swec or LWP. Dumping object:'."\n".Dumper($c), 'SWEC_INT_STRANGEOBJ');
			}
			else
			{
				@realError = ($URL,sprintf($genericError,$c->status_line,$source->{$URL}),'SWEC_INT_HTTP_'.$c->code);
			}
		}
		if(not $c->content_type =~ /(text|x?html|xml)/i)
		{
			if (@realError)
			{
				AddError(@realError);
			}
			else
			{
				print "skipped, non-HTML content-type\n";
			}
			return;
		}
	}
	# Fetch the document
	if ($postData)
	{
		$c = LWP_Request('post',$URL,$postData);
	}
	else
	{
		$c = LWP_Request('get',$URL);
	}

	if(not $c)
	{
		print "giving up\n";
		AddError($URL,'Connection error','SWEC_INT_CONNERR',true);
		return;
	}

	if ($c->is_error and not @realError)
	{
		@realError = ($URL,sprintf($genericError,$c->status_line,$source->{$URL}),'SWEC_INT_HTTP_'.$c->code);
	}

	my $err;
	# Fetch content into a string
	my $content = $c->content;
	# Get the length
	my $len = length($content);
	# Prepare it for a lot of regular expressions
	study($content);

    printd(3,'Content of page at URL '.$URL.': '.$content) if $debugLog;

	if(not defined $content or $len == 0)
	{
		if (@realError)
		{
			AddError(@realError);
		}
		else
		{
			AddError($URL,'Empty page','SWEC_INT_EMPTYCONTENT');
		}
		return;
	}

	foreach my $en (@DefaultSortedChecks)
	{
        printd(2,'Running check '.$en.' on URL '.$URL) if $debugLog;
		# Get a ref to the hash
		my $ec = $Checks{$en};
		# Regex check
		if ($ec->{type} =~ m{^regex/\w+$})
		{
			foreach my $regex (@{$ec->{check}})
			{
				if ($content =~ /$regex/is)
				{
					$err = $ec->{error};
					last;
				}
                printd(2,$URL.' did not match '.$regex) if $debugLog;
			}
		}
		# Minlength check
		elsif ($ec->{type} eq 'minlength')
		{
			my $len = length($content);
			foreach my $length (@{$ec->{check}})
			{
				if($len < $length)
				{
					$err = $ec->{error};
				}
			}
		}
		else
		{
			swecDie(9,"Unknown typecheck: $ec->{type} from $en\n");
		}
		# If $err is defined then there's an error.
		if (defined $err)
		{
			$err =~ s/%PARENT%/$source->{$URL}/g;
			AddError($URL,$err,$en);
			$err = true;
			last;
		}
	}
	if(not $err)
	{
        printd(2,'No errors for '.$URL) if $debugLog;
		if (@realError)
		{
			AddError(@realError);
		}
		else
		{
			print "ok\n";
			ExtractLinks($content,$URL);
		}
	}
	if($settings{keepGoing} and ($err or @realError))
	{
		ExtractLinks($content,$URL);
	}
}

# Purpose: Load a session from $session_file
# Usage: loadSession();
# Will simply return if $session_file is undef;
sub loadSession
{
	return if not $session_file;
	$session = retrieve($session_file) or swecDie('Failed to retrieve session: '.$!);
	if(defined $baseURI && not $session->{baseuri} eq $baseURI)
	{
		swecDie(10,"Session appears to not be for $baseURI\n");
	}
	if(not $session->{sesver} == 1)
	{
		swecDie(10,"Session version unsupported\n");
	}
}

# Purpose: Write the session to disk
# Usage: writeSession();
# Will simply return if there is no session.
sub writeSession
{
	return if not $session;
	$session->{sesver} = 1;
	unlink($session_file) if -e $session_file;
	store($session,$session_file) or swecDie('Failed to store session: '.$!);
}

# Purpose: Remove old errors that has been reported in earlier sessions from the list of errors
# Usage: SesRemoveOldErrors();
# Simply returns if there is no session
sub SesRemoveOldErrors
{
	my $new = shift;
	foreach my $p (keys %{$session->{ErrorPages}})
	{
		if ($new->{$p} && $new->{$p} eq $session->{ErrorPages})
		{
			delete($new->{$p});
		}
	}
}

# Purpose: Import old links from the session
# Usage: SessionImportLinks(\%HashToPutURLSInto, only_put_pages_with_known_errors?);
sub SessionImportLinks
{
	return if not $session;
	my $source = shift;
	my $errorsOnly = shift;
	if(not $errorsOnly and $session->{HasRun})
	{
		foreach my $URL(keys %{$session->{HasRun}})
		{
			if(not $source->{$URL} and not $HasRun{$URL})
			{
				$source->{$URL} = $session->{HasRun}->{$URL};
			}
		}
	}
	elsif ($session->{LastRawErrorPages})
	{
		# Ensure that the error pages are there
		foreach my $URL(keys %{$session->{LastRawErrorPages}})
		{
			if(not $source->{$URL} and not $HasRun{$URL})
			{
				# Get info from HasRun if possible
				if ($session->{HasRun}->{$URL})
				{
					$source->{$URL} = $session->{HasRun}->{$URL};
				}
				else
				{
					$source->{$URL} = 'Error page from previous session';
				}
			}
		}
	}
}

# Purpose: Append a value or hash to the session
# Usage: appendToSession(NAME; SCALAR/ARRAY/HASHREF, replace?);
# If it's a HASH and replace is false the contents of the hash will merely be appended
sub appendToSession
{
	return if not $session;
	my $name = shift;
	my $href = shift;
	my $overwrite = shift;
	if(ref($href) eq 'HASH')
	{
		if ($overwrite)
		{
			delete($session->{$name});
		}
		foreach my $k (keys %{$href})
		{
			$session->{$name}->{$k} = $href->{$k};
		}
	}
	else
	{
		$session->{$name} = $href;
	}
}

# Purpose: Print formatted --help output
# Usage: PrintHelp("-shortoption", "--longoption", "description");
#  Description will be reformatted to fit within a normal terminal
sub PrintHelp {
	# The short option
	my $short = shift,
	# The long option
	my $long = shift;
	# The description
	my $desc = shift;
	# The generated description that will be printed in the end
	my $GeneratedDesc;
	# The current line of the description
	my $currdesc = '';
	# The maximum length any line can be
	my $maxlen = 80;
	# The length the options take up
	my $optionlen = 20;
	# Check if the short/long are LONGER than optionlen, if so, we need
	# to do some additional magic to take up only $maxlen.
	# The +1 here is because we always add a space between them, no matter what
	if ((length($short) + length($long) + 1) > $optionlen)
	{
		$optionlen = length($short) + length($long) + 1;
	}
	# Split the description into lines
	foreach my $part (split(/ /,$desc))
	{
		if(defined $GeneratedDesc)
		{
			if ((length($currdesc) + length($part) + 1 + 20) > $maxlen)
			{
				$GeneratedDesc .= "\n";
				$currdesc = '';
			}
			else
			{
				$currdesc .= ' ';
				$GeneratedDesc .= ' ';
			}
		}
		$currdesc .= $part;
		$GeneratedDesc .= $part;
	}
	# Something went wrong
	swecDie(9,'Option mismatch') if not $GeneratedDesc;
	# Print it all
	foreach my $description (split(/\n/,$GeneratedDesc))
	{
		printf "%-4s %-15s %s\n", $short,$long,$description;
		# Set short and long to '' to ensure we don't print the options twice
		$short = '';$long = '';
	}
	# Succeed
	return true;
}

# SDF parsing
# Purpose: Die with a usable error from problems in an SDF
# Usage: perr(error_text,line_no,file, boolean near_line?,extrainfo);
sub perr
{
	my $error = shift;
	my $lineno = shift;
	my $file = shift;
	my $near = shift(@_) ? 'near' : 'at';
    my $extra = shift;
    $extra = $extra ? '. '.$extra : '';
	swecDie(2,"SDF parser error: $error $near line $lineno in $file$extra\n");
}

# Purpose: Warn with a usable message about possible SDF problems
# Usage: pwarn(SAME_AS_perr);
sub pwarn
{
	my $warning = shift;
	my $lineno = shift;
	my $file = shift;
	my $near = shift(@_) ? 'near' : 'at';
	warn("SDF parser warning: $warning $near line $lineno in $file\n");
}

# Purpose: Die with a preset return value
# Usage: swecDie(value,error);
sub swecDie
{
	my ($error, $returnval);
	if (@_ > 1)
	{
		$returnval = shift;
	}
	$error = shift;
	if (not defined $returnval)
	{
		die($error);
	}
	else
	{
		warn($error);
		exit($returnval);
	}
}

# Purpose: Verify and prepare a test
# Usage: SDFFinalizeTest(\%Data,$name,$file,$lineno);
sub SDFFinalizeTest
{
	my $Data = shift;
	my $name = shift;
	my $file = shift;
	my $lineno = shift;
	$lineno -= 2;
	# Needs at least one of each of these
	foreach my $v(qw(type check error sortindex))
	{
		if(not defined $Data->{$name}{$v} or not length $Data->{$name}{$v})
		{
			perr('Required setting "'.$v.'" missing for '.$name,$lineno,$file,1);
		}
	}
	# Compile regexes if needed
	if ($Data->{$name}->{type} =~ /regex/)
	{
		$Data->{$name}->{regexes} = GetSDFRegexObject($Data,$name,$file,$lineno);
	}

}

# Purpose: Get a list of regex objects for a check
# Usage: GetSDFRegexObject(\%Data,$name,$file,$lineno);
# Returns: Arrayref of regex objects
sub GetSDFRegexObject
{
	my $Data = shift;
	my $name = shift;
	my $file = shift;
	my $lineno = shift;
	my $checksHash = $Data->{$name};
	my @finalRegexes;
	my $modifiers = '';
	my $regexModifiers = '';
	if ($checksHash->{'type'} =~ m{/})
	{
		$modifiers = $checksHash->{'type'};
		$modifiers =~ s{^.*/+}{};

		foreach my $str (split(//,$modifiers))
		{
			next if $str eq 's';
			if ($str =~ /^(i)$/)
			{
				$regexModifiers .= $str;
			}
			else
			{
				pwarn('Unknown modifier "'.$str.'"',$name,$lineno,$file,true);
			}
		}
	}

	foreach my $check (@{$checksHash->{check}})
	{
		if ($modifiers =~ /s/)
		{
			$check =~ s/\s+/ /g;
			$check =~ s/ /(\\s+|&nbsp;|<[^>]+>)+/g;
		}
		my $regex =  eval('qr/$check/s'.$regexModifiers);
		if ($@)
		{
			perr('Failed to compile regular expression: '.$@,$name,$lineno,$file,true);
		}
		push(@finalRegexes,$regex);
	}
	return \@finalRegexes;
}

# Purpose: Load an SDF file
# Usage: my %Conf = LoadSDF(FILE);
sub LoadSDF
{
	my $file = shift;
	my $Data = shift;
	printd(1,"Loading SDF: $file") if $debugLog;
	if(not $Data)
	{
		$Data = {};
	}
	open (my $f, '<',$file) or swecDie(10,"Failed to open $file for reading: $!\n");
	my $currnam;
	my $currtype;
	my $lineno = 0;
	while ($_ = <$f>)
	{
		$lineno++;
		next if /^#/;
		next if not /\S/;
		chomp;

		if (/^\[/)
		{
			my $prevnam = $currnam;
			$currtype = undef;
			# This is a negative statement, add it to the ignore list.
			if (/^\[\-/)
			{
				($currnam = $_) =~ s/(\[|\]|\-)*//g;
				$IgnoreChecks{$currnam} = true;
				printd(1,"Added $currnam to the ignore list") if $debugLog;
				# We don't currently have a valid section.
				$currnam = undef;
			}
			else
			{
				($currnam = $_) =~ s/(\[|\])*//g;
				if ($Data->{$currnam})
				{
					perr('Duplicate definition of '.$currnam,$lineno,$file,false,'Maybe this file contains mixed SDFv1 and SDFv2 definitions?');
				}
				$Data->{$currnam} = {};
			}
			if ($prevnam)
			{
				SDFFinalizeTest($Data,$prevnam,$file,$lineno);
			}
		}
		else
		{
			if(not $currnam)
			{
				perr('Data found before section name definition',$lineno,$file);
			}
			elsif(not /\=/)
			{
				perr('Unable to parse line, unknown data',$lineno,$file);
			}
			my $name = $_;
			my $content = $_;
			$name =~ s/^(\w+)\W.*$/$1/;
			$content =~ s/^[^=]+=\s?//;
			if(not $content =~ /\S/)
			{
				perr('Failed to parse setting content',$lineno,$file);
			}
			elsif(not $name =~ /\S/)
			{
				perr('Failed to parse setting name',$lineno,$file);
			}
			elsif($name eq $_ or $content eq $_)
			{
				perr('General line parsing error',$lineno,$file);
			}
			elsif ($name eq 'type')
			{
				if (not $content =~ /^(urlrex|regex(s)?|minlength)\s*$/)
				{
					perr('Unknown type specified, must be one of regex, minlength, urlrex (maybe you meant to use SDFv2?)',$lineno,$file);
				}
				if ($content eq 'urlrex')
				{
					$Data->{$currnam}{'source'} = 'url';
					$Data->{$currnam}{'type'} = 'regex/i';
				}
				elsif($content =~ /regex/)
				{
					$Data->{$currnam}{'source'} = 'body';
					if ($content =~ /regexs/)
					{
						$Data->{$currnam}{'type'} = 'regex/si';
					}
					else
					{
						$Data->{$currnam}{'type'} = 'regex/i';
					}
				}
				elsif($content eq 'minlength')
				{
					$Data->{$currnam}{'source'} = 'body';
					$Data->{$currnam}{'type'} = 'minlength';
				}
				$currtype = $content;
			}
			elsif (not $currtype)
			{
				perr('Data definitions located before type definition',$lineno,$file,false,'type must be the first setting set for any definition because it changes how the rest is parsed (maybe you have mixed SDFv1 and SDFv2?)');
			}
			elsif ($name eq 'check')
			{
				if(not defined $Data->{$currnam}{'check'})
				{
					$Data->{$currnam}{'check'} = [];
				}
				push(@{$Data->{$currnam}{'check'}},$content);
			}
            else
            {
				if(not $name =~ /^(error|sortindex)$/)
				{
					if ($name =~ /^(regexes)$/)
					{
						perr('Illegal setting "'.$name.'" found in SDF file',$lineno,$file);
					}
					else
					{
						pwarn('Unknown setting "'.$name.'" found in SDF file',$lineno,$file);
					}
				}
                $Data->{$currnam}{$name} = $content;
            }
		}
	}
	close($f);
	if ($currnam)
	{
		SDFFinalizeTest($Data,$currnam,$file,$lineno);
	}
	push(@LoadedSDF,$file);
	return true;
}

# Purpose: Load an SDFv2 file
# Usage: LoadSDFv2(FILE,\%Conf);
sub LoadSDFv2
{
	my $file = shift;
	my $Data = shift;
	printd(1,"Loading SDF: $file") if $debugLog;
	if(not $Data)
	{
		$Data = {};
	}
	open (my $f, '<',$file) or swecDie(10,"Failed to open $file for reading: $!\n");
	my $currnam;
	my (
		$currsource
	);
	my $lineno = 0;
	while ($_ = <$f>)
	{
		$lineno++;
		next if /^#/;
		next if not /\S/;
		chomp;

		if (/^\[/)
		{
			my $prevnam = $currnam;
			$currsource = undef;
			# This is a negative statement, add it to the ignore list.
			if (/^\[\-/)
			{
				($currnam = $_) =~ s/(\[|\]|\-)*//g;
				$IgnoreChecks{$currnam} = true;
				printd(1,"Added $currnam to the ignore list") if $debugLog;
				# We don't currently have a valid section.
				$currnam = undef;
			}
			else
			{
				($currnam = $_) =~ s/(\[|\])*//g;
				if ($Data->{$currnam})
				{
					perr('Duplicate definition of '.$currnam,$lineno,$file);
				}
				$Data->{$currnam} = {};
			}
			if ($prevnam)
			{
				SDFFinalizeTest($Data,$prevnam,$file,$lineno);
			}
		}
		else
		{
			if(not $currnam)
			{
				perr('Data found before section name definition',$lineno,$file);
			}
			elsif(not /\=/)
			{
				perr('Unable to parse line, unknown data',$lineno,$file);
			}
			my $name = $_;
			my $content = $_;
			$name =~ s/^(\w+)\W.*$/$1/;
			$content =~ s/^[^=]+=\s?//;
			if(not $content =~ /\S/)
			{
				perr('Failed to parse setting content',$lineno,$file);
			}
			elsif(not $name =~ /\S/)
			{
				perr('Failed to parse setting name',$lineno,$file);
			}
			elsif($name eq $_ or $content eq $_)
			{
				perr('General line parsing error',$lineno,$file);
			}
			elsif($name eq 'source')
			{
				if(not $content =~ /^(body|url)$/i)
				{
					perr('Unknown source specified, must be one of body or url',$lineno,$file);
				}
				$currsource = $content;
				$Data->{$currnam}{'source'} = $content;
			}
			elsif ($name eq 'type')
			{
				if(not $currsource)
				{
					pwarn('type= specified before source=. Enabling SDF version 1 compatibility mode for this file (will be removed in 0.6)',$lineno,$file);
                    delete($Data->{$currnam});
					return LoadSDF($file,$Data);
				}
				elsif (not $content =~ m{^(regex(/[si]+)?|minlength)\s*$})
				{
					if ($content =~ m{/})
					{
						perr('Unknown type or modifier specified, must be one of regex, minlength with one of the modifiers specified in the manpage.',$lineno,$file);
					}
					else
					{
						perr('Unknown type specified, must be one of regex, minlength',$lineno,$file);
					}
				}
				elsif ($content eq 'minlength' and $currsource eq 'url')
				{
					perr('The "minlength" type can not be applied to "url" sources',$file,$lineno);
				}
				$Data->{$currnam}{'type'} = $content;
			}
			elsif (not $currsource)
			{
				perr('Data definitions located before source definition',$lineno,$file,false,'source must be the first setting set for any definition because it changes how the rest is parsed');
			}
			elsif ($name eq 'check')
			{
				if(not defined $Data->{$currnam}{'check'})
				{
					$Data->{$currnam}{'check'} = [];
				}
				push(@{$Data->{$currnam}{'check'}},$content);
			}
			else
			{
				if(not $name =~ /^(error|sortindex)$/)
				{
					if ($name =~ /^(regexes)$/)
					{
						perr('Illegal setting "'.$name.'" found in SDF file',$lineno,$file);
					}
					else
					{
						pwarn('Unknown setting "'.$name.'" found in SDF file',$lineno,$file);
					}
				}
				$Data->{$currnam}{$name} = $content;
			}
		}
	}
	if ($currnam)
	{
		SDFFinalizeTest($Data,$currnam,$file,$lineno);
	}
	push(@LoadedSDF,$file);
	return true;
}

# Purpose: Auto load an SDF file, detecting the path and dying if it fails
# Usage: AutoLoadSDF();
sub AutoLoadSDF
{
	# Autodetect the default SDF
	my $name = 'default.sdf';
	my $loc = dirname(realpath($0));
	if(not -e $loc)
	{
		swecDie(10,"Unable to locate directory containing swec\n");
	}
	elsif(not -e $loc.'/'.$name)
	{
		swecDie(10,"Unable to find $name\n");
	}
	LoadSDFv2($loc.'/'.$name,\%Checks);
	# Check if the user has an ~/.swecrc
	if (-e $ENV{HOME}.'/.swecrc')
	{
		if(not -r $ENV{HOME}.'/.swecrc')
		{
			swecDie(10,"~/.swecrc exists but is not readable\n");
		}
		LoadSDFv2($ENV{HOME}.'/.swecrc',\%Checks);
	}
	# Load the user supplied SDF if needed
	if ($settings{SDFFile})
	{
		if(not -e $settings{SDFFile})
		{
			swecDie(10,"$settings{SDFFile}: does not exist\n");
		}
		elsif(not -r $settings{SDFFile})
		{
			swecDie(10,"$settings{SDFFile}: is not readable\n");
		}
		LoadSDFv2($settings{SDFFile},\%Checks);
	}
	GenerateSDFCache();
}

# Purpose: Generated the SDF cache in @DefaultSortedChecks and @URLSortedChecks
# Usage: GenerateSDFCache();
sub GenerateSDFCache
{
	foreach my $en (sort { $Checks{$a}->{sortindex} <=> $Checks{$b}->{sortindex} } keys(%Checks))
	{
		next if $IgnoreChecks{$en};

		if ($Checks{$en}->{source} eq 'body')
		{
			push(@DefaultSortedChecks,$en);
		}
		elsif($Checks{$en}->{source} eq 'url')
		{
			push(@URLSortedChecks,$en);
		}
		else
		{
			die('Unknown check source "'.$Checks{$en}->{source}.'" for test '.$en."\n");
		}

	}
}

sub toLog
{
    my $message = shift;
    push(@loggedMessages,$message);
}

# Purpose: Print debugging info
# Usage: printd(INFO);
sub printd
{
    my $level = shift;
    if ($level > $debugLog)
    {
        return;
    }
    my $msg = shift;
    toLog($msg);
}

# Including END so that in the case that there's a bug, ErrorReport will still be run.
END
{
	if(not $hasRunErrorRep)
	{
		print "Crash detected. SWEC is unable to continue, dumping error report:\n";
		ErrorReport();
	}
};
__END__

=head1 NAME

swec - Simple Web Error Checker

=head1 SYNOPSIS

B<swec> [I<OPTIONS>]

B<swec> [I<HOST>]

=head1 DESCRIPTION

SWEC is a program that automates testing of dynamic websites.
You provide it with a base URL, it then parses each HTML file it finds
on that site for links, and if those are within the site specified (ie. local
links, not external), it will check those as well (in this respect it
is a lot like a crawler, more on that later). It will then parse the pages
looking for known errors and report those (such as Mason or PHP errors), and
will report if a page can not be read (by either returning a 404, 500 or
similar).

Since you may often want SWEC to be logged in on your site, you have to be
careful. When logged in, SWEC will still click on all links it finds, including
things like 'join group' or 'delete account' (though by default SWEC attempts to
avoid the latter, see --noignore). Therefore it is highly recommended that when
you run SWEC as a logged-in user on a site, either use a test server or specify
strict --matchmax settings that makes SWEC avoid following such links.

Running SWEC on a live site without being logged in as a user is perfectly fine,
it won't do anything a normal crawler wouldn't do (except that SWEC will ignore
robots.txt).

It also has various helpers to assist with some other basic debugging tasks,
such as a cookie-supporting version of lwp-request's HEAD and GET commands.

=head1 OPTIONS

=over

=item B<--help>

Print a quick help screen with the most common options

=item B<--help-all>

Print a help screen with all options

=item B<--version>

Print version information and exit

=item B<--ie, --opera, --chrome, --android>

Pretend (somewhat) to be IE, Opera, Google Chrome or the Android browser. The
default is to pretend (somewhat) to be Firefox. To see the entire user-agent
string, run swec with the --debuginfo parameter at the end (ie. after any
user-agent altering parameters). Chrome pretends to be Safari, so it doubles as
that (and should do the same for any other webkit/KHTML-based browser).

=item B<--ua> USERAGENT

Use USERAGENT as the user agent, instead of the default one. The user agent
you supply here will have a string that identifies it as SWEC appended to it,
this can not be overridden without editing the source.

=item B<-b, --baseurl>

The base URL. Will only check URLs matching it and will use it as the
domain/url for pages that have relative paths. Optionally you may omit
--baseurl and simply supply the URL on the command-line without
the parameter (ie. I<swec example.org> and I<swec --baseurl example.org> are
essentially the same).

=item B<--checksub>

Allow SWEC to descend into subdomains of --baseurl. It will then follow
any links that lead to subdomains of the supplied --baseurl and check
those as well. Useful for running a quick check of a site and all of
its linked children.

=item B<-s, --urlseed>

The URL seed, the first URL to check. Can be supplied multiple times,
the URLs are checked in the order supplied on the commandline. These
can be absolute paths, with the domain name, or relative paths (ie. like
/index.html).

=item B<--postseed> I<URL> I<DATA PAIR>

This works like --urlseed (and the URL parameter is identical to that of
--urlseed), but it also takes a second DATA PAIR parameter. This data pair
is a simple string of key-values in the form:

	user=123&password=321&_form=login

The string should not be URI-encoded, SWEC will handle that.
You can use this to allow SWEC to log into sites that only accept POSTed
login data, or to test specific pages.

Note that all --postseed URLs are checked before any --urlseed URLs.

=item B<-e, --harderrorlimit>

Change the hard error page limit, the max amount of pages of any link type
to request before deciding that all pages of that type will return errors
(and thus not request any more) (default: 5)

A link type is a complete path without its arguments. So /files/list
is a link type, and /files/list?file_id=1 would be one of the pages
of that link type. With a soft page limit of 20, SWEC will attempt
to not request more than 20 instances of /files/list with varying
parameters.

=item B<-m, --matchmax>

This parameter lets you add advanced limits to the amount of pages that
SWEC will check. It takes a parameter in the form:

	[REGEX]=[COUNT]

[REGEX] is a perl regular expression (which can be just a basic string as well),
optionally preceeded by a !. If REGEX starts with ! then the ! will be
removed from the expression and it will be labeled as a reverse check.

SWEC will only run on COUNT number of URLs that matches REGEX (or, if it is
labeled as reverse, then only COUNT number of URLs that does NOT match REGEX).
All others will be skipped after the URL tests are run (meaning that SWEC will
not download those pages and will not run any tests on the pages itself).

You can add as many --matchmax parameters as you want. You can use "=" in the REGEX,
SWEC knows to only count the last "=" as the separator for the count-expression.

See also --noignore.

Examples:

=over

=item --matchmax edit=300

Check at most 300 URLs that contain the word 'edit'

=item --matchmax '!admin=300'

Check at most 300 URLs that does not contain the word 'admin'

=item --matchmax '/files/get=0'

Don't check any URL that matches /files/get

=item --matchmax .=1000

Don't check more than 1000 URLs

=back

If a URL matches multiple patterns, then each patterns counter will be increased.
This means that if you have the setup '--matchmax files=30' and '--matchmax edit=200'
and the url '/files/edit' then that will increase the counter for both files=30 and
edit=200, because it matches both.

=item B<--noignore>

By default SWEC will pretend I<--matchmax delete=0> and I<--matchmax logout=0> was 
supplied to avoid deleting stuff and logging out when logged in. If you supply this
parameter SWEC will not add these matchmax entries.

=item B<-l, --log>

Write a log of the session to this file.

=item B<--session>

Save and load session from the file supplied.

=item B<-i, --sessnam>

Takes a FILE argument. Short for --session FILE.ses --log FILE.log

=item B<-x, --exclude>

Exclude the test name supplied. The test name is the name inside of brackets ([NAME]) in the
SDF files, and the name at the end of any error in the error report generated by swec.
All predefined tests start with SWEC_.

=item B<--nohead>

By default SWEC sends a HEAD request to the server before each GET request. This is
so that it can detect binary files and skip GETting them. This can save some bandwidth
when simply running a basic SWEC on a site. However, if you either have only basic
binary files that SWEC detects (see the --debuginfo output to see the regex used
on a URL to check), or you add a --matchmax [something]=0 statement that makes
SWEC skip all binary files anyway, you may want to disable the HEAD request to
save some time during a test run.

It is always safe to use this, the worst case scenario is that SWEC downloads
some binary files and wastes some bandwidth.

=item B<--keepgoing>

By default SWEC will stop processing a page if it hits an error. If you supply
this parameter, SWEC will parse a document for links even if it contained an
error (as long as that error was not an SWEC_INT_* error).

=item B<--validate>

Takes a FILE argument. Validates the syntax of the SDF file supplied.

=item B<--lwpget>, B<--lwphead> url1, url2, ..

This is an SWEC implementation of the lwp-request HEAD and GET commands that come
with LWP.

The difference between this and LWP (other than that this one doesn't support
any extra parameters) is that it is run using SWEC's LWP wrappers, so you get
the added benifit of cookies, which means that you can supply a login URL first
to examine how a request would be responded to after login. This is particulary
useful to examine JSON or XML replies, or to check if the headers being replied
are set properly.

=item B<--disable-lib> LIB

Using this parameter you can disable the various optional libs that SWEC uses.
During normal use you won't have to do this, however it can be used if you hit
problems related to these libraries. The following libraries can be toggled
off using --disable-lib: HTML::LinkExtractor, HTML::Entities, URI::Encode

You can supply --disable-lib as many times as you want to disable use of
multiple libraries. Note that when using --disable-lib along with --debuginfo
you must supply all --disable-lib parameters before --debuginfo.

=item B<--debuginfo>

Outputs various information that is useful in bug reports. This also
lists the regular expression of files SWEC ignores, as well as the
user-agent string.

=back

=head1 THE ERROR REPORT

When SWEC has finished a run, you will get an error report. There it will
list the number of pages checked, the number of pages that had errors
and which pages had what errors.

If your website uses GET parameters (ie /path/?somekey=somevalue), then
SWEC will output two lines of page information. The first one is the standard
line that shows the total number of pages fetched, and the total number of pages
that had errors. The second line refers to 'base pages'. A base page is any
page with its ?key=value parameters removed. So for instance for the request
/index.php?file=something, /index.php is the base page. This information is
provided so that you can easily see if the issue is simply with a few specific
files/controllers.

=head1 SDF FILE FORMAT

The file format that you can use to write SWEC tests is SDF (SWEC definition file).
Its syntax is similar to INI-files, with a few minor differences.

It consists of definitions of various checks, each check definition starts with a
[NAME] header, where NAME is the name of the test. The next line is required to
be the source statement, and all checks must have at least one "type", "check",
"error" and "sortindex" statement. The general syntax is:

	[NAME]
	statement = value

If you just want a quick overview of the format, check out the I<EXAMPLES>
section.

=head2 VERSION

The version described here is SDFv2 (which is an incremental improvement over SDFv1
which was used in SWEC 0.1.x-0.3.x), used in SWEC 0.4+. New tests should be written
using this format, rather than SDFv1 (however, SWEC will be able to load v1 test
files until SWEC 0.6).

=head2 STATEMENTS

=over

=item I<source>

This is the source for the check. See the SOURCES section for a list of the various
sources.

=item I<type>

This is the type of check. See the TYPES section for a list of the various types.

=item I<check>

This is a check of the type "type" (defined earlier). A test can have as many
check= statements as it wants, the page will have failed the test if I<one> of the
check= tests fail.

=item I<error>

This is the error message you want reported to the user when the check finds something.
You can use the following substitutions in the text.
	%PARENT% = the file that referenced the file that contained the error being reported.

=item I<sortindex>

This is used for sorting which checks are to be run first. SWEC-bundled checks will
never have a negative sortindex value (but may have any positive value), so if you need
your checks to run before the bundled ones, simply give them a sortindex of 0 or less.
Lower values gets run first.

=back

=head2 SOURCES

=over

=item I<body>

Runs the check(s) on the body of a page. This is the primary source, as it allows
you to check for errors printed in the page body itself.

=item I<url>

Runs the check(s) on the URL leading to a page.  As an example of its use, SWEC
uses the url source type to check for URLs with odd characters, newlines or
that reference file:/ instead of http(s):// and friends.

=back

=head2 TYPES

=over

=item I<regex>

This is a full perl regex which is supplied the entire (raw, unparsed) HTML of the
page being tested. You can supply multiple check= statements, where any of them
will trigger the error if they match.

This check can take various modifiers (you can write the check= entry as
I<regex/B<modifiers>>).

Modifiers:

=over

=item B<i>

Same as the usual regex /i modifier. Makes the expression case-insensitive.

=item B<s>

Enables SWEC 'smart regex' mode. It will convert any whitespace found in the
regex to something that matches any whitespace, some &nbsp;, ignoring HTML tags
and so on. This allows for more flexibility (and is probably the best choice
for most searches), but has the downside of modifying your regular expression
during runtime, adding more points of failure and taking some control away from
you.

=back

=item I<minlength>

This is the minimum length of any file. check= is set to the minimum amount of characters
you want any page to have.

=back

=head2 NEGATIVE DEFINITIONS

You can specify that a check is never to be run in an SDF file (useful for
overriding a bundled check for instance). The syntax is simply [-NAME].

=head2 EXAMPLES

Here are a few exmples of SWEC tests, along with an explanation of them.
See the bundled I<default.sdf> file for more checks.

=over

=item I<SWEC_404>

This will look for the string 404: file not found, case insensitive, with an
optional ":" and using the 'smart regex' mode (so it will ignore all html tags,
and allow &nbsp; instead of a real space). The error will also output
which URL referred to it, in case the page actually isn't valid.

	[SWEC_404]
	source = body
	type = regex/is
	check = 404(:)? file not found
	check = file not found
	error = This file/resource appears not to exist (linked to by %PARENT%)
	sortindex = 999


=item I<SWEC_CATALYST_ERROR>

This will look for either of two known Catalyst namespaces that
appear in catalyst errors. This is a generic 'catch-all' for catalyst
errors, when more specific checks does not match.

	[SWEC_CATALYST_ERROR]
	source = body
	type = regex/is
	check = Catalyst::Request
	check = Catalyst::Response
	error = Catalyst error
	sortindex = 35

=item I<SWEC_PERL_CARP>

This looks for a file path that looks like it might be referring
somehow to the perl carp functions. If it is found, then some sort
of perl error has occurred.

	[SWEC_PERL_CARP]
	source = body
	type = regex/is
	check = /usr/(\w+/)+perl\d*?(/[^/]+)?/Carp/Clan.pm
	check = /usr/(\w+/)+perl\d*?(/[^/]+)?/(CGI/)?Carp.pm
	error = Unknown perl error (Carp)
	sortindex = 999

=item I<SWEC_URI_SCALAROBJ>

This applies a regular expression to URLs, checking if the
URL appears to have a printed perl reference in it.

	[SWEC_URI_SCALAROBJ]
	source = url
	type = regex
	check = (ARRAY|HASH|SCALAR|GLOB)\(0x
	error = Strange URL, appears to have tried to use perl objects as scalars. Was referenced by %PARENT%.
	sortindex = 9

=back

=head2 SUBMITTING TESTS UPSTREAM

If you write an SWEC test that you think would be useful for other people as well,
please submit a bug report with the test at L<http://random.zerodogg.org/swec/bugs>
and we will look into getting it into the next SWEC release. The only catch is
that you will need to make the test available under the GNU GPLv3 (or later).

=head1 HELP/SUPPORT

If you need help or support please visit the website at L<http://random.zerodogg.org/swec>

=head1 BUGS AND LIMITATIONS

=over

=item - Does not respect robots.txt.

SWEC assumes that the owner of the site is running it,
and thus does not wish for swec to respect robots.txt.

=back

If you find a bug, please report it at L<http://random.zerodogg.org/swec/bugs>

=head1 INCOMPATIBILITIES

=over

=item SWEC 0.2 and older returned 0 even if some tests failed.

=item SWEC 0.3 and older can not load SDFv2 test files.

=item SWEC 0.6 and later will no longer be able to load SDFv1 test files.

=back

=head1 FILES

=over

=item I<~/.swecrc>

The SWEC rc file. This is an SDF-file that you can use to add local tests,
or for disabling some bundled tests that you don't find useful.

=item I<default.sdf>

The default SDF file containing the bundled definitions. This resides
in the data dir containing swec. You can in most cases locate this directory
by issuing:

	 dirname $(readlink $(which swec))

at a bash prompt. If that does not give any results then you need
to search for the file yourself, or consult your distributions documentation.

=back

=head1 EXIT STATUS

=over

=item Returns 0 on success, if no errors occurred.

=item Returns 1 on a failure during testing.

=item Returns 2 on SDF parsing error

=item Returns 9 on internal error

=item Returns 10 on any other error

=back

=head1 AUTHOR

SWEC was written by Eskild Hustvedt I<<eskild at the domain zerodogg dot org>>

=head1 LICENSE AND COPYRIGHT

Copyright (C) Eskild Hustvedt 2008, 2009, 2010

This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.