Permalink
Browse files

initialise repo with v0.30

  • Loading branch information...
0 parents commit a01c72ab2e26cd16d4bfea6828d0a91d5dc42133 barbie committed Oct 1, 2008
111 CHANGES
@@ -0,0 +1,111 @@
+Revision history for Perl module CPAN::WWW::Testers::Generator.
+
+0.30 01/10/2008
+ - ensure upload pattern matching is for the end of the string.
+ - add more option validation in bin/cpanstats-verify (v0.20).
+
+0.29 19/09/2008
+ - ignore any uploads that are not in a supported archive format used
+ for testing.
+
+0.28 18/09/2008
+ - added SQLite pragma 'auto_vacuum' to database creation.
+ - created an _init function to generate db/nntp objects in one place
+ - add --localonly option to help screen in bin/cpanstats-verify (v0.19)
+ - fixed test script to correctly do cmp_ok :)
+
+0.27 11/09/2008
+ - added 'grade' option to search in bin/cpanstats-select (v0.08)
+ - fixed help text in bin/cpanstats-update (v0.06)
+ - fixed test bug in 12generate.t (I can't count!)
+
+0.26 02/09/2008
+ - abstracted all database functionality out of Generator.pm into
+ Database.pm, including database creation. All scripts updated too.
+ - database links changed to reference CPAN Testers development site.
+ - corrections to the database schema in POD
+ - several POD updates across all files, including
+ bin/cpanstats (v0.04)
+ bin/cpanstats-delete (v0.11)
+ bin/cpanstats-reparse (v0.07)
+ bin/cpanstats-select (v0.07)
+ bin/cpanstats-update (v0.05)
+ bin/cpanstats-verify (v0.18)
+ - fixed dependency in cpanstats
+ - consistently provided help|h and version|V options to all scripts
+ - fix to support old form of PAUSE uploads
+ - abstracted reparse code into Generator.pm from cpanstats-reparse
+ - more tests
+
+0.25 28/08/2008
+ - added POD about bugs and see also sections.
+ - fixed POD in scripts.
+ - added author to upload metadata, to save lookups elsewhere.
+ - renamed several supporting scripts:
+ delstats => bin/cpanstats-delete (v0.10)
+ selectstats => bin/cpanstats-select (v0.06)
+ upstats => bin/cpanstats-update (v0.04)
+ readstats => bin/cpanstats-verify (v0.17)
+ - added further functionality to cpanstats-select.
+ - added bin/cpanstats-reparse (v0.06) to distribution
+ - added nostore option to not retain old articles in the articles
+ database.
+
+0.24 19/08/2008
+ - added bin/readstats (v0.16), bin/delstats (v0.09),
+ bin/selectstats (v0.05) and bin/upstats (v0.03) from the non-CPAN
+ cpanstats distribution.
+ - updated all scripts to use an options hash rather than individual
+ variables.
+ - added new Database.pm module to handle all local DB interaction.
+ - fixed test bug in t/12generate.t (thanks to Slaven Rezic).
+
+0.23 18/08/2008
+ - new maintainer: Barbie.
+ - major overhaul to draw in CPAN Testers Statistics code.
+ - added version to Article.pm.
+ - added ability to decode QuotedPrintable and Base64 (Article.pm)
+ - added functionality to parse the upload articles as well as the
+ report articles (Article.pm).
+ - added more reliable subject parsing.
+ - added more reliable Perl -V parsing.
+ - testers.db now renamed to cpanstats.db.
+ - news.db now renamed to articles.db.
+ - field names changed:
+ status => state,
+ distribution => dist,
+ archname => platform.
+ - new fields added: postdate,tester.
+ - fixed prerequisites in modules, Makefile.PL and META.yml.
+ - bin/cpan_www_testers_generate replaced with bin/cpanstats.pl
+ - added more test samples
+ - added more tests
+
+0.22 Thu Feb 3 15:27:41 GMT 2005
+ - now includes perl + OS version in the testers.db
+ - use SQLite 3.0
+ - bin/cpan_www_testers_generate expanded to give more
+ flexibility for execution
+ - added Makefile.PL (as per TODO)
+ - added tests
+ - added more POD
+ - (thanks to Barbie)
+
+0.21 Sun Mar 7 18:31:35 GMT 2004
+ - the NNTP cache now uses SQLite instead of DB_File
+ - rename Changes to CHANGES
+
+0.20 Wed Aug 13 21:52:06 BST 2003
+ - make sure to HTML escape everything
+ - design update, now with CSS
+ - Fix bug where reports are use A/AM/AMS/Crypt-TEA-1.22.tar.gz
+ instead of Crypt-TEA-1.22.tar.gz
+ - added a TODO
+ - sort using version, not Sort::Version
+ - renamed to CPAN::WWW::Testers::Generator
+ - included HISTORY section by Elaine Ashton
+ - moved examples/generate.pl to an installed script,
+ bin/cpan_www_testers_generate
+
+0.10 Thu Jul 31 23:09:25 BST 2003
+ - initial release
27 MANIFEST
@@ -0,0 +1,27 @@
+CHANGES
+MANIFEST This list of files
+Makefile.PL
+META.yml
+README
+bin/cpanstats
+bin/cpanstats-delete
+bin/cpanstats-reparse
+bin/cpanstats-select
+bin/cpanstats-update
+bin/cpanstats-verify
+examples/generate.sh
+lib/CPAN/WWW/Testers/Generator.pm
+lib/CPAN/WWW/Testers/Generator/Article.pm
+lib/CPAN/WWW/Testers/Generator/Database.pm
+t/01base.t
+t/10functions.t
+t/11article.t
+t/12generate.t
+t/13database.t
+t/90podtest.t
+t/91podcover.t
+t/94metatest.t
+t/nntp/126015.txt
+t/nntp/125106.txt
+t/nntp/1804993.txt
+t/nntp/1805500.txt
59 META.yml
@@ -0,0 +1,59 @@
+--- #YAML:1.0
+name: CPAN-WWW-Testers-Generator
+version: 0.30
+abstract: Download and summarize CPAN Testers data
+author:
+ - Barbie <barbie@cpan.org>
+ - Leon Brocard <leon@astray.com>
+
+license: perl
+distribution_type: module
+installdirs: site
+
+requires:
+ Cwd: 0
+ CPAN::DistnameInfo: 0
+ Class::Accessor::Fast: 0
+ DBD::SQLite: 1.07
+ DBI: 0
+ Email::Simple: 0
+ File::Basename: 0
+ File::Path: 0
+ Getopt::Long: 0
+ MIME::Base64: 0
+ MIME::QuotedPrint: 0
+ Net::NNTP: 0
+ version: 0
+recommends:
+ Test::More: 0.70
+ Test::MockObject: 0
+ Test::Pod: 1.00
+ Test::Pod::Coverage: 0.08
+ Test::CPAN::Meta: 0.12
+build_requires:
+ Test::More: 0.01
+
+provides:
+ CPAN::WWW::Testers::Generator:
+ file: lib/CPAN/WWW/Testers/Generator.pm
+ version: 0.30
+ CPAN::WWW::Testers::Generator::Article:
+ file: lib/CPAN/WWW/Testers/Generator/Article.pm
+ version: 0.30
+ CPAN::WWW::Testers::Generator::Database:
+ file: lib/CPAN/WWW/Testers/Generator/Database.pm
+ version: 0.30
+
+no_index:
+ directory:
+ - t
+ - examples
+
+resources:
+ license: http://dev.perl.org/licenses/
+ bugtracker: http://rt.cpan.org/Public/Dist/Display.html?Name=-CPAN-WWW-Testers-Generator
+
+meta-spec:
+ version: 1.4
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+generated_by: Hand 1.0
31 Makefile.PL
@@ -0,0 +1,31 @@
+use ExtUtils::MakeMaker;
+WriteMakefile(
+ 'NAME' => 'CPAN::WWW::Testers::Generator',
+ 'VERSION_FROM' => 'lib/CPAN/WWW/Testers/Generator.pm',
+ 'PREREQ_PM' => {
+ 'Cwd' => 0, # only for cpanstats.pl
+ 'Getopt::Long' => 0, # only for cpanstats.pl
+
+ 'Class::Accessor::Fast' => 0,
+ 'CPAN::DistnameInfo' => 0,
+ 'DBD::SQLite' => '1.07',
+ 'DBI' => 0,
+ 'Email::Simple' => 0,
+ 'File::Basename' => 0,
+ 'File::Path' => 0,
+ 'MIME::Base64' => 0,
+ 'MIME::QuotedPrint' => 0,
+ 'Net::NNTP' => 0,
+ 'version' => 0,
+
+ 'Test::More' => 0, # only for testing
+ },
+ 'PL_FILES' => {},
+ 'INSTALLDIRS' => 'site',
+ 'EXE_FILES' => [ 'bin/cpanstats' ],
+ NO_META => 1,
+
+ ($] >= 5.005 ? ## Add these new keywords supported since 5.005
+ (ABSTRACT => 'CPAN Testers Database Generator',
+ AUTHOR => 'Barbie <barbie@cpan.org>') : ()),
+);
115 README
@@ -0,0 +1,115 @@
+NAME
+ CPAN::WWW::Testers::Generator - Download and summarize CPAN Testers data
+
+SYNOPSIS
+ % cpanstats
+ # ... wait patiently
+ # ... then use cpanstats.db, an SQLite database
+
+DESCRIPTION
+ This distribution was originally written by Leon Brocard to download and
+ summarize CPAN Testers data. However, much of the original code has been
+ rewritten to use the CPAN Testers Statistics database generation code. This
+ now means that all the CPAN Testers sites including the Reports site, the
+ Statistics site and the CPAN Dependencies site, can use the same database.
+
+ This module downloads articles from the cpan-testers newsgroup, generating or
+ updating an SQLite database containing all the most important information. You
+ can then query this database, or use CPAN::WWW::Testers to present it over the
+ web.
+
+ A good example query for Acme-Colour would be:
+
+ SELECT version, status, count(*) FROM reports WHERE
+ distribution = "Acme-Colour" group by version, status;
+
+ To create a database from scratch can take several hours, as there are now over
+ 1.5 million articles in the newgroup. As such updating from a known copy of the
+ database is much more advisable. If you don't want to generate the database
+ yourself, you can obtain the latest official copy (compressed with gzip) at
+ http://devel.cpantesters.org/cpanstats.db.gz
+
+INTERFACE
+ The Constructor
+ * new
+ Instatiates the object CPAN::WWW::Testers::Generator.
+
+ Methods
+ * logfile
+
+ Accessor to set/get where the logging information is to be kept. Note
+ that if this not set, no logging occurs.
+
+ * database
+
+ Accessor to set/get the database full path.
+
+ * directory
+
+ Accessor to set/get the directory where the database is to be created.
+
+ * generate
+
+ Starting from the last recorded article, retrieves all the more recent
+ articles from the NNTP server, parsing each and recording the articles
+ that either upload announcements or reports.
+
+ * insert_article
+
+ Inserts the components of a parsed article into the database.
+
+DATABASE SCHEMA
+
+ The database schema is very straightforward, one main table with several
+ index tables to speed up searches. The main table is as below:
+
+ +--------------------------------+
+ | cpanstats |
+ +----------+---------------------+
+ | id | INTEGER PRIMARY KEY |
+ | state | TEXT |
+ | postdate | TEXT |
+ | tester | TEXT |
+ | dist | TEXT |
+ | version | TEXT |
+ | platform | TEXT |
+ | perl | TEXT |
+ | osname | TEXT |
+ | osvers | TEXT |
+ | archname | TEXT |
+ +----------+---------------------+
+
+HISTORY
+ The CPAN testers was conceived back in May 1998 by Graham Barr and Chris
+ Nandor as a way to provide multi-platform testing for modules. Today there
+ are over 1.5 million tester reports and more than 100 testers each month
+ giving valuable feedback for users and authors alike.
+
+BECOME A TESTER
+ The objective of the CPAN Testers is to test as many of the distributions
+ on CPAN as possible, on as many platforms as possible. The ultimate goal is
+ to improve the portability of the distributions on CPAN, and provide good
+ feedback to the authors.
+
+ Whether you have a common platform or a very unusual one, you can help by
+ testing modules you install and submitting reports. There are plenty of
+ module authors who could use test reports and helpful feedback on their
+ modules and distributions.
+
+ If you'd like to get involved, please take a look at the CPAN Testers Wiki,
+ where you can learn how to install and configure one of the recommended
+ smoke tools.
+
+ For further help and advice, please subscribe to the the CPAN Testers
+ discussion mailing list.
+
+ CPAN Testers Wiki - http://wiki.cpantesters.org
+ CPAN Testers Discuss mailing list
+ - http://lists.cpan.org/showlist.cgi?name=cpan-testers-discuss
+
+AUTHOR
+ Original author: Leon Brocard <acme@astray.com> (C) 2002-2008
+ Current maintainer: Barbie <barbie@cpan.org> (C) 2008
+
+LICENSE
+ This code is distributed under the same license as Perl.
139 bin/cpanstats
@@ -0,0 +1,139 @@
+#!/usr/bin/perl -w
+use strict;
+
+my $VERSION = '0.04';
+
+#----------------------------------------------------------------------------
+# Library Modules
+
+use lib qw(../lib lib);
+
+use Cwd;
+use Getopt::ArgvFile default=>1;
+use Getopt::Long;
+
+use CPAN::WWW::Testers::Generator '0.26';
+
+#----------------------------------------------------------------------------
+# Variables
+
+my %options;
+
+#----------------------------------------------------------------------------
+# Progam
+
+init_options();
+
+my $directory = $options{directory} || cwd();
+my $t = CPAN::WWW::Testers::Generator->new(
+ ignore => $options{ignore},
+ nostore => $options{nostore},
+ directory => $directory,
+ logfile => $directory . '/logs/cpanstats.log'
+);
+
+if($options{rebuild}) { $t->rebuild($options{start},$options{end}); }
+else { $t->generate; }
+
+
+# -------------------------------------
+# Subroutines
+
+sub init_options {
+ GetOptions( \%options,
+ 'directory|d=s',
+ 'ignore|i',
+ 'nostore|n',
+ 'rebuild|r',
+ 'start=i',
+ 'end=i',
+ 'help|h',
+ 'version|V'
+ );
+
+ _help(1) if($options{help});
+ _help(0) if($options{version});
+}
+
+sub _help {
+ my $full = shift;
+
+ if($full) {
+ print <<HERE;
+
+Usage: $0 [-d directory] [-r [--start=n] [--end=n]] [-i] [-n] [-h] [-V]
+
+ -d directory use named directory
+ -r rebuild cpanstats from articles
+ --start start id to rebuild
+ --end end id to rebuild
+ -i ignore no article errors
+ -n do not store old articles
+ -h this help screen
+ -V program version
+
+HERE
+
+ }
+
+ print "$0 v$VERSION\n";
+ exit(0);
+}
+
+__END__
+
+=head1 NAME
+
+cpanstats - script to access the NNTP server and update the database.
+
+=head1 SYNOPSIS
+
+ # use defaults
+ cpanstats
+
+ # rebuild default database
+ cpanstats --rebuild --start=100 --end=200
+
+ # update database in named directory, without storing articles,
+ # and ignoring any missing article responses from the NNTP server
+ cpanstats -d=../db/ --nostore --ignore
+
+=head1 DESCRIPTION
+
+Acts as a wrapper script to the underlying CPAN::WWW::Testers::Generator code
+that downloads articles from the NNTP server, extracts all components of each,
+then stores the data into a local SQLite database.
+
+=head1 BUGS, PATCHES & FIXES
+
+There are no known bugs at the time of this release. However, if you spot a
+bug or are experiencing difficulties, that is not explained within the POD
+documentation, please send an email to barbie@cpan.org. However, it would help
+greatly if you are able to pinpoint problems or even supply a patch.
+
+Fixes are dependant upon their severity and my availablity. Should a fix not
+be forthcoming, please feel free to (politely) remind me.
+
+=head1 SEE ALSO
+
+L<CPAN::WWW::Testers>,
+L<CPAN::Testers::WWW::Statistics>
+
+F<http://www.cpantesters.org/>,
+F<http://stats.cpantesters.org/>,
+F<http://wiki.cpantesters.org/>
+
+=head1 AUTHOR
+
+ Barbie, <barbie@cpan.org>
+ for Miss Barbell Productions <http://www.missbarbell.co.uk>.
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright (C) 2005-2008 Barbie for Miss Barbell Productions.
+
+ This module is free software; you can redistribute it and/or
+ modify it under the same terms as Perl itself.
+
+=cut
+
197 bin/cpanstats-delete
@@ -0,0 +1,197 @@
+#!/usr/bin/perl
+use strict;
+$|++;
+
+my $VERSION = '0.11';
+
+#----------------------------------------------------------------------------
+
+=head1 NAME
+
+cpanstats-delete - script to delete entries from the cpanstats database.
+
+=head1 SYNOPSIS
+
+ perl cpanstats-delete [-a=0] [-d=0] [--database=$db] [--file=$file]
+
+=head1 DESCRIPTION
+
+Using the cpanstats database, which should in the local directory, extracts
+all the data into the components of each page. Creates the graphs, then
+creates each HTML page of the site.
+
+=head1 OPTIONS
+
+=over 4
+
+=item -a | --all
+
+Delete all the NNTP ids greater than the specified number.
+
+=item -d | --delete
+
+Delete a single NNTP id.
+
+=item --database
+
+Specify the exact path to the cpanstats database if not ./cpanstats.db.
+
+=item --file
+
+Named file used when deleting a list of NNTP ids.
+
+=back
+
+=cut
+
+# -------------------------------------
+# Library Modules
+
+use lib qw(./lib ../lib);
+
+use DBI;
+use Getopt::Long;
+use IO::File;
+
+use CPAN::WWW::Testers::Generator::Database;
+
+# -------------------------------------
+# Variables
+
+use constant DATABASE => 'cpanstats.db';
+
+my (%options);
+
+# -------------------------------------
+# Program
+
+##### INITIALISE #####
+
+init_options();
+
+$options{database} ||= DATABASE;
+
+my $dbi = CPAN::WWW::Testers::Generator::Database->new(database => $options{database}, AutoCommit => 1);
+print STDERR "Cannot connect to database [$options{database}]\n" unless($dbi);
+
+
+##### MAIN #####
+
+my @list = get_list();
+for my $id (@list) {
+ print "Deleting ... $id\n";
+ $dbi->do_query("DELETE from cpanstats WHERE id=$id");
+}
+
+do_query("DELETE from cpanstats WHERE id>$options{all}") if($options{all});
+do_query("DELETE from cpanstats WHERE id=$options{delete}") if($options{delete});
+
+# -------------------------------------
+# Subroutines
+
+=item get_list
+
+Returns the list of NNTP ids from the named file.
+
+=cut
+
+sub get_list {
+ my @list;
+ my $file = $options{file} || return ();
+ die "file [$file] not found" unless(-f $file);
+
+ my $fh = IO::File->new($file) or die "Cannot open file [$file]: $!";
+ while(<$fh>) {
+ chomp;
+ my ($num) = (m/^(\d+)/);
+ push @list, $num if($num);
+ }
+ $fh->close;
+ return @list;
+}
+
+=item init_options
+
+Determine command line options and initialise any defaults.
+
+=cut
+
+sub init_options {
+ GetOptions( \%options,
+ 'all|a=i',
+ 'delete|d=i',
+ 'database=s',
+ 'file=s',
+ 'help|h',
+ 'version|V'
+ );
+
+ _help(1) if($options{help});
+ _help(0) if($options{version});
+}
+
+sub _help {
+ my $full = shift;
+
+ if($full) {
+ print <<HERE;
+
+Usage: $0 \\
+ [--database=<db>] \\
+ ( --all=<num> | --delete=<num> | --file=<file> ) \\
+ [-h] [-V]
+
+ --database=<db> use named database
+ --all=<num> delete all entried greater than given id
+ --delete=<num> delete given id
+ --file=<file> delete multiple ids (1 per line)
+ -h this help screen
+ -V program version
+
+HERE
+
+ }
+
+ print "$0 v$VERSION\n";
+ exit(0);
+}
+
+__END__
+
+=back
+
+=head1 BUGS, PATCHES & FIXES
+
+There are no known bugs at the time of this release. However, if you spot a
+bug or are experiencing difficulties, that is not explained within the POD
+documentation, please send bug reports and patches to the RT Queue (see below).
+
+Fixes are dependant upon their severity and my availablity. Should a fix not
+be forthcoming, please feel free to (politely) remind me.
+
+RT Queue -
+http://rt.cpan.org/Public/Dist/Display.html?Name=CPAN-WWW-Testers-Generator
+
+=head1 SEE ALSO
+
+L<CPAN::WWW::Testers>,
+L<CPAN::Testers::WWW::Statistics>
+
+F<http://www.cpantesters.org/>,
+F<http://stats.cpantesters.org/>,
+F<http://wiki.cpantesters.org/>
+
+=head1 AUTHOR
+
+ Barbie, <barbie@cpan.org>
+ for Miss Barbell Productions <http://www.missbarbell.co.uk>.
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright (C) 2005-2008 Barbie for Miss Barbell Productions.
+
+ This module is free software; you can redistribute it and/or
+ modify it under the same terms as Perl itself.
+
+=cut
+
198 bin/cpanstats-reparse
@@ -0,0 +1,198 @@
+#!/usr/bin/perl -w
+use strict;
+$|++;
+
+my $VERSION = '0.07';
+
+#----------------------------------------------------------------------------
+
+=head1 NAME
+
+cpanstats-reparse - script to reparse an NNTP article.
+
+=head1 SYNOPSIS
+
+ perl cpanstats-reparse \
+ [--directory=<dir>] \
+ [--check|c] [--localonly|l] \
+ ( [--id=<nntpid>] | [--file=<filename>] ) \
+ [--exclude|x=<fields>] \
+ [--help|h]
+
+=head1 DESCRIPTION
+
+This script is used to reparse an NNTP article, which may have been
+incorrectly parsed by the cpanstats, and should feature in the stats
+for the CPAN Testers Statistics database.
+
+Note that the "check" option will only go through the motions and will not
+update the local database, while the "localonly" option will ensure only the
+local articles.db database is used to reparse, no NNTP lookup is used.
+
+The ability to ignore field checking for specific fields is enabled via the
+use of the exclude option. Using a comma separated list you may enter one
+or more of the fields 'dist', 'version', 'from', 'perl' and 'platform'.
+This is useful for parsing a faulty report and then using upstats.pl to
+amend the appropriate field to the correct value.
+
+=cut
+
+# -------------------------------------
+# Library Modules
+
+use lib qw(./lib ../lib);
+
+use Cwd;
+use DBI;
+use Getopt::ArgvFile default=>1;
+use Getopt::Long;
+use IO::File;
+
+use CPAN::WWW::Testers::Generator;
+
+# -------------------------------------
+# Variables
+
+my (%options,@exclude);
+
+# -------------------------------------
+# Program
+
+##### INITIALISE #####
+
+init_options();
+
+my $directory = $options{directory} || cwd();
+my $t = CPAN::WWW::Testers::Generator->new(
+ directory => $directory,
+ logfile => $directory . '/logs/cpanstats.log'
+);
+
+# GetOptions allows several different ways of passing multiple values, this
+# line is to ensure we have a list as we want it :)
+my %exclude = map {$_ => 1} split(/,/,join(',',@exclude));
+$options{exclude} = \%exclude;
+
+##### MAIN #####
+
+my @list = get_list();
+$t->reparse(\%options,@list);
+
+# -------------------------------------
+# Subroutines
+
+=item get_list
+
+Returns the list of NNTP ids from the named file.
+
+=cut
+
+sub get_list {
+ my @list;
+
+ # we're only parsing one id
+ return ($options{id}) if(defined $options{id});
+
+ # we're parsing a list of ids
+ my $file = $options{file} || die "--file not specified";
+ die "file [$file] not found" unless(-f $file);
+
+ my $fh = IO::File->new($file,'r') or die "Cannot read file [$file]: $!";
+ while(<$fh>) {
+ chomp;
+ my ($num) = (m/^(\d+)/);
+ push @list, $num;
+ }
+ $fh->close;
+ return @list;
+}
+
+=item init_options
+
+Determine command line options and initialise any defaults.
+
+=cut
+
+sub init_options {
+ GetOptions( \%options,
+ 'directory|d=s',
+ 'check|c',
+ 'localonly|l',
+ 'id|i=i',
+ 'file=s',
+ 'exclude|x=s' => \@exclude,
+ 'help|h',
+ 'version|V'
+ );
+
+ _help(1) if($options{help});
+ _help(0) if($options{version});
+}
+
+sub _help {
+ my $full = shift;
+
+ if($full) {
+ print <<HERE;
+
+Usage: $0 \\
+ [-d=<directory>] [--check|c] [--localonly|l] \\
+ ( --id|i=<num> | --file=<file> ) \\
+ [--exclude=<list>] [-h] [-V]
+
+ -d=<directory> use named directory
+ -c check only do not update
+ -l local only lookup
+ -i=<num> named id to reparse
+ --file=<file> file containing ids to reparse
+ --exclude=<list> exclude fields from parsing
+ -h this help screen
+ -V program version
+
+HERE
+
+ }
+
+ print "$0 v$VERSION\n";
+ exit(0);
+}
+
+__END__
+
+=back
+
+=head1 BUGS, PATCHES & FIXES
+
+There are no known bugs at the time of this release. However, if you spot a
+bug or are experiencing difficulties, that is not explained within the POD
+documentation, please send bug reports and patches to the RT Queue (see below).
+
+Fixes are dependant upon their severity and my availablity. Should a fix not
+be forthcoming, please feel free to (politely) remind me.
+
+RT Queue -
+http://rt.cpan.org/Public/Dist/Display.html?Name=CPAN-WWW-Testers-Generator
+
+=head1 SEE ALSO
+
+L<CPAN::WWW::Testers>,
+L<CPAN::Testers::WWW::Statistics>
+
+F<http://www.cpantesters.org/>,
+F<http://stats.cpantesters.org/>,
+F<http://wiki.cpantesters.org/>
+
+=head1 AUTHOR
+
+ Barbie, <barbie@cpan.org>
+ for Miss Barbell Productions <http://www.missbarbell.co.uk>.
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright (C) 2005-2008 Barbie for Miss Barbell Productions.
+
+ This module is free software; you can redistribute it and/or
+ modify it under the same terms as Perl itself.
+
+=cut
+
214 bin/cpanstats-select
@@ -0,0 +1,214 @@
+#!/usr/bin/perl
+use strict;
+$|++;
+
+my $VERSION = '0.09';
+
+#----------------------------------------------------------------------------
+
+=head1 NAME
+
+cpanstats-select - select stats from the CPAN Testers Statistics database.
+
+=head1 SYNOPSIS
+
+ perl cpanstats-select \
+ [--database=<db>] \
+ [--nntp|-n=<nntpid>] \
+ [--grade|-g=<grade>] \
+ [--disto|-m=<distname>] \
+ [--dist|-d=<distname>] \
+ [--version|-v=<distversion>] \
+ [--date|-y=<YYYYMM>] \
+ [--tester|-t=<email>] \
+ [--platform|-o=<platform>] \
+ [--perl|-p=<perlversion>] \
+ [--help|-h]
+
+=head1 DESCRIPTION
+
+Using the cpanstats database, which should be in the local directory, extracts
+all the required data.
+
+=head1 OPTIONS
+
+=over 4
+
+=item --database
+
+Specify the exact path to the cpanstats database if not ./cpanstats.db.
+
+=back
+
+=cut
+
+# -------------------------------------
+# Library Modules
+
+use lib qw(./lib ../lib);
+
+use DBI;
+use Getopt::ArgvFile default=>1;
+use Getopt::Long;
+
+use CPAN::WWW::Testers::Generator::Database;
+
+# -------------------------------------
+# Variables
+
+use constant DATABASE => 'cpanstats.db';
+
+my (%options);
+
+# -------------------------------------
+# Program
+
+##### INITIALISE #####
+
+init_options();
+
+$options{database} ||= DATABASE;
+
+my $dbi = CPAN::WWW::Testers::Generator::Database->new(database => $options{database}, AutoCommit => 1);
+print STDERR "Cannot connect to database [$options{database}]\n" unless($dbi);
+
+
+##### MAIN #####
+
+# '(id,state,postdate,tester,dist,version,platform,perl) '.
+
+my $sql = "SELECT * FROM cpanstats WHERE ";
+my @where;
+
+# only one of the following at a time
+@where = ("dist like '%$options{distro}%'") if(defined $options{distro});
+@where = ("dist='$options{dist}'") if(defined $options{dist});
+@where = ("id=$options{nntp}") if(defined $options{nntp});
+
+push @where, "state='$options{grade}'" if(defined $options{grade});
+push @where, "version='$options{version}'" if(defined $options{version});
+push @where, "version LIKE '%$options{distversion}%'" if(defined $options{distversion});
+push @where, "postdate='$options{date}'" if(defined $options{date});
+push @where, "tester LIKE '%$options{tester}%'" if(defined $options{tester} && $options{tester} ne '-');
+push @where, "tester=''" if(defined $options{tester} && $options{tester} eq '-');
+push @where, "platform like '$options{platform}%'" if(defined $options{platform} && $options{platform} ne '-');
+push @where, "platform=''" if(defined $options{platform} && $options{platform} eq '-');
+push @where, "perl='$options{perl}'" if(defined $options{perl} && $options{perl} ne '-');
+push @where, "perl=''" if(defined $options{perl} && $options{perl} eq '-');
+
+if(@where) {
+ my @rows = $dbi->get_query($sql . join(' AND ',@where));
+ if(@rows) {
+ for my $row (@rows) {
+ print join(",",@$row) . "\n";
+ }
+ } else {
+ print "Sorry, no results returned\n";
+ }
+} else {
+ print "No SQL arguments given\n";
+}
+
+
+# -------------------------------------
+# Subroutines
+
+sub init_options {
+ GetOptions( \%options,
+ 'database=s',
+ 'nntp|n=s',
+ 'grade|g=s',
+ 'distro|m=s',
+ 'dist|d=s',
+ 'distversion|x=s',
+ 'platform|o=s',
+ 'perl|p=s',
+ 'tester|t=s',
+ 'version|v=s',
+ 'date|y=s',
+ 'help|h',
+ 'version|V'
+ );
+
+ _help(1) if($options{help});
+ _help(0) if($options{Version});
+
+ $options{grade} = lc $options{grade} if($options{grade});
+}
+
+sub _help {
+ my $full = shift;
+
+ if($full) {
+ print <<HERE;
+
+Usage: $0
+ [--database=<db>] - path to cpanstats database
+
+ [--nntp|-n=<nntpid>] - NNTP article id
+ [--grade|-g=<grade>] - report grade
+ [--dist|-d=<distname>] - distribution name
+ [--disto|-m=<distname>] - distribution name (partial match)
+ [--version|-v=<distversion>] - distribution version
+ [--date|-y=<YYYYMM>] - year/month
+ [--tester|-t=<email>] - tester email
+ [--platform|-o=<platform>] - platform (partial match)
+ [--perl|-p=<perlversion>] - perl version
+
+ [--help|-h] - this screen
+ [--Version|-V] - program version
+
+Notes:
+ - combine options (except help) to refine your search
+ - all entries (except distro and paltform) require an exact match
+ - only one of nntp, dist or distro (in order of preference) is accepted
+ - use '-' for tester, perl and platform to find blank entries
+
+HERE
+
+ }
+
+ print "$0 v$VERSION\n";
+ exit(0);
+}
+
+
+__END__
+
+=back
+
+=head1 BUGS, PATCHES & FIXES
+
+There are no known bugs at the time of this release. However, if you spot a
+bug or are experiencing difficulties, that is not explained within the POD
+documentation, please send bug reports and patches to the RT Queue (see below).
+
+Fixes are dependant upon their severity and my availablity. Should a fix not
+be forthcoming, please feel free to (politely) remind me.
+
+RT Queue -
+http://rt.cpan.org/Public/Dist/Display.html?Name=CPAN-WWW-Testers-Generator
+
+=head1 SEE ALSO
+
+L<CPAN::WWW::Testers>,
+L<CPAN::Testers::WWW::Statistics>
+
+F<http://www.cpantesters.org/>,
+F<http://stats.cpantesters.org/>,
+F<http://wiki.cpantesters.org/>
+
+=head1 AUTHOR
+
+Barbie, E<lt>barbie@cpan.orgE<gt>
+for Miss Barbell Productions L<http://www.missbarbell.co.uk>.
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright (C) 2005-2008 Barbie for Miss Barbell Productions
+ All Rights Reserved.
+
+ This module is free software; you can redistribute it and/or
+ modify it under the same terms as Perl itself.
+
+=cut
215 bin/cpanstats-update
@@ -0,0 +1,215 @@
+#!/usr/bin/perl
+use strict;
+$|++;
+
+my $VERSION = '0.06';
+
+#----------------------------------------------------------------------------
+
+=head1 NAME
+
+cpanstats-update - script to update entries in the cpanstats database.
+
+=head1 SYNOPSIS
+
+ perl cpanstats-update [-i=0] [--database=$db] [--file=$file]
+
+=head1 DESCRIPTION
+
+Using the cpanstats database, which should in the local directory, will search
+for records matching the given id on the command line, or the list provided in
+a named file. If the named file is correctly formatted, the nominated columns
+will also be updated.
+
+=head1 OPTIONS
+
+=over 4
+
+=item -i | --id
+
+Display the record matching the given NNTP id.
+
+=item --database
+
+Specify the exact path to the cpanstats database if not ./cpanstats.db.
+
+=item --file
+
+The named file will be used to retrieve a list of NNTP ids, and if correctly
+formatted will update columns for the nominated id.
+
+To display records for a list of ids, the file should look like:
+
+ 1
+ 2
+ 3
+ 4
+
+To update columns for given ids, the file should look like:
+
+ 1,state='pass'
+ 2,state='pass',dist='MyDist'
+
+Note that the second entry will update multiple columns
+
+=back
+
+=cut
+
+# -------------------------------------
+# Library Modules
+
+use lib qw(./lib ../lib);
+
+use DBI;
+use Getopt::Long;
+use IO::File;
+
+use CPAN::WWW::Testers::Generator::Database;
+
+# -------------------------------------
+# Variables
+
+use constant DATABASE => 'cpanstats.db';
+
+my (%options,@rows);
+
+# -------------------------------------
+# Program
+
+##### INITIALISE #####
+
+init_options();
+
+$options{database} ||= DATABASE;
+
+my $dbi = CPAN::WWW::Testers::Generator::Database->new(database => $options{database}, AutoCommit => 1);
+print STDERR "Cannot connect to database [$options{database}]\n" unless($dbi);
+
+
+##### MAIN #####
+
+print "#id,state,postdate,tester,dist,version,platform,perl\n";
+my @list = get_list();
+push @list, {id=>$options{id}} if($options{id});
+
+for my $item (@list) {
+ if($item->{set}) {
+ $dbi->do_query("UPDATE cpanstats SET $item->{set} WHERE id=$item->{id}");
+ }
+
+ @rows = $dbi->get_query("SELECT * FROM cpanstats WHERE id=$item->{id}");
+ print join(",",@$_)."\n" for(@rows);
+}
+
+@rows = $dbi->get_query("SELECT max(id) FROM cpanstats");
+print "\n#MAX ID=$rows[0][0]\n" if(@rows);
+
+# -------------------------------------
+# Subroutines
+
+=item get_list
+
+Returns the list of NNTP ids from the named file.
+
+=cut
+
+sub get_list {
+ my @list;
+ my $file = $options{file} || return ();
+ die "file [$file] not found" unless(-f $file);
+
+ my $fh = IO::File->new($file) or die "Cannot open file [$file]: $!";
+ while(<$fh>) {
+ next if(/^\s*$/); # ignore empty lines
+ chomp;
+ my ($id,$str) = (m/^(\d+)(?:,(.*))?/);
+ push @list, {id=>$id,set=>$str} if($id);
+ }
+ $fh->close;
+
+ return @list;
+}
+
+=item init_options
+
+Determine command line options and initialise any defaults.
+
+=cut
+
+sub init_options {
+ GetOptions( \%options,
+ 'database=s',
+ 'id|i=i',
+ 'file=s',
+ 'help|h',
+ 'version|V'
+ );
+
+ _help(1) if($options{help});
+ _help(0) if($options{version});
+}
+
+sub _help {
+ my $full = shift;
+
+ if($full) {
+ print <<HERE;
+
+Usage: $0 \\
+ [--database=<db>] [--id=<num>] [--file=<file>] \\
+ [-h] [-V]
+
+ --database=<db> use named database
+ --id=<num> refresh this id
+ --file=<file> refresh these ids (1 per line)
+ -h this help screen
+ -V program version
+
+HERE
+
+ }
+
+ print "$0 v$VERSION\n";
+ exit(0);
+}
+
+__END__
+
+=back
+
+=head1 BUGS, PATCHES & FIXES
+
+There are no known bugs at the time of this release. However, if you spot a
+bug or are experiencing difficulties, that is not explained within the POD
+documentation, please send bug reports and patches to the RT Queue (see below).
+
+Fixes are dependant upon their severity and my availablity. Should a fix not
+be forthcoming, please feel free to (politely) remind me.
+
+RT Queue -
+http://rt.cpan.org/Public/Dist/Display.html?Name=CPAN-WWW-Testers-Generator
+
+=head1 SEE ALSO
+
+L<CPAN::WWW::Testers>,
+L<CPAN::Testers::WWW::Statistics>
+
+F<http://www.cpantesters.org/>,
+F<http://stats.cpantesters.org/>,
+F<http://wiki.cpantesters.org/>
+
+=head1 AUTHOR
+
+ Barbie, <barbie@cpan.org>
+ for Miss Barbell Productions <http://www.missbarbell.co.uk>.
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright (C) 2005-2008 Barbie for Miss Barbell Productions.
+
+ This module is free software; you can redistribute it and/or
+ modify it under the same terms as Perl itself.
+
+=cut
+
495 bin/cpanstats-verify
@@ -0,0 +1,495 @@
+#!/usr/bin/perl
+use strict;
+$|++;
+
+my $VERSION = '0.20';
+
+#----------------------------------------------------------------------------
+
+=head1 NAME
+
+cpanstats-verify - script to verify the contents of the cpanstats database.
+
+=head1 SYNOPSIS
+
+ perl cpanstats-verify [-c|-m] [-v|-s --file=$file] [--start=0] [--end=0]
+
+=head1 DESCRIPTION
+
+Reads the cpanstats database and verifies the contents. Three kinds of
+verification can be applied; Check (-c), Missing (-m) and Verify (-v).
+The start and end counts for the NNTP server can be specified, otherwise
+values within the database and the script will be assumed.
+
+=head1 OPTIONS
+
+=over 4
+
+=item -c | --check
+
+Looks up the entries stored in the database between the START and END NNTP ids,
+checking the NNTP server for the subjects of any that are missing. Also
+highlights any subjects that have been marked as bad during processing.
+
+=item -m | --missing
+
+Looks up the entries stored in the database between the START and END NNTP ids,
+checking each to ensure all the fields are complete for each type.
+
+=item -v | --verify
+
+Looks up the entries stored in the database as stored in the named file, and
+either prints the entry or highlights that it is missing.
+
+=item -s | --search
+
+Looks up the NNTP entries directly as listed in the named file.
+
+=item --database
+
+Specify the exact path to the cpanstats database if not ./cpanstats.db.
+
+=item --file
+
+Named file used when verifying a list of NNTP ids.
+
+=item --start --end
+
+Start and end NNTP ids.
+
+=item -h | --help
+
+Print the help screen
+
+=back
+
+=cut
+
+# -------------------------------------
+# Library Modules
+
+use lib qw(./lib ../lib);
+
+use DBI;
+use Email::Simple;
+use Net::NNTP;
+use Getopt::ArgvFile default=>1;
+use Getopt::Long;
+use IO::File;
+
+use CPAN::WWW::Testers::Generator::Database;
+
+# -------------------------------------
+# Variables
+
+my (%log,%options);
+my $PROGRESS = 0;
+
+use constant DATABASE => 'cpanstats.db';
+use constant NNTPSTART => 872391;
+
+# -------------------------------------
+# Program
+
+##### INITIALISE #####
+
+progress("init");
+
+init_options();
+
+$options{database} ||= DATABASE;
+
+my $dbi = CPAN::WWW::Testers::Generator::Database->new(database => $options{database}, AutoCommit => 1);
+print STDERR "Cannot connect to database [$options{database}]\n" unless($dbi);
+
+$options{start} ||= NNTPSTART;
+$options{end} ||= get_lastid();
+
+my $nntp;
+unless($options{localonly}) {
+ $nntp = Net::NNTP->new("nntp.perl.org") || die "Cannot connect to nntp.perl.org";
+ $nntp->group("perl.cpan.testers");
+}
+
+##### MAIN #####
+
+_clear_log();
+
+_log("FROM: $options{start} - $options{end}") if($options{missing} || $options{check});
+
+#load_log();
+
+progress("start");
+
+check_stats() if($options{check});
+missing_stats() if($options{missing});
+verify_stats() if($options{verify});
+search_stats() if($options{search});
+
+progress("finish");
+
+# -------------------------------------
+# Subroutines
+
+=item get_lastid
+
+Returns the last NNTP id recorded in the database.
+
+=cut
+
+sub get_lastid {
+ my @rows = $dbi->get_query("SELECT MAX(id) FROM cpanstats");
+ return $rows[0]->[0];
+}
+
+=item missing_stats
+
+Report on the database entries with missing field values.
+
+=cut
+
+# insert_report($id,$state,$date,$from,$dist,$version,$platform,$perl);
+
+sub missing_stats {
+ progress("start - missing stats");
+
+ $" = ",";
+ my $count = 0;
+ my $iterator = $dbi->get_query_iterator("SELECT * FROM cpanstats WHERE id >= $options{start} AND id <= $options{end}");
+ while(my $row = $iterator->()) {
+ next if($row->[1] =~ /^na|pass|fail|unknown|cpan$/);
+ _log("BADPARSE:") unless($count++);
+ _log("@$row");
+ }
+
+ $count = 0;
+ $iterator = $dbi->get_query_iterator("SELECT * FROM cpanstats WHERE state in ('na','pass','fail','unknown') AND id >= $options{start} AND id <= $options{end}");
+ while(my $row = $iterator->()) {
+ next if( defined $row->[2] &&
+ defined $row->[3] &&
+ defined $row->[4] &&
+ defined $row->[5] &&
+ defined $row->[6]);
+ _log("\nBADREPORTS:") unless($count++);
+ _log("@$row");
+ }
+
+ $count = 0;
+ $iterator = $dbi->get_query_iterator("SELECT * FROM cpanstats WHERE state in ('cpan') AND id >= $options{start} AND id <= $options{end}");
+ while(my $row = $iterator->()) {
+ next if( defined $row->[2] &&
+ defined $row->[4] &&
+ defined $row->[5]);
+ _log("\nBADUPLOADS:") unless($count++);
+ _log("@$row");
+ }
+}
+
+=item check_stats
+
+Report on the database entries which are either missing, or have reported bad
+processing.
+
+=cut
+
+sub check_stats {
+ progress("start - check stats");
+
+ my $count = $options{start};
+ my $iterator = $dbi->get_query_iterator("SELECT * FROM cpanstats WHERE id >= $options{start} AND id <= $options{end} ORDER BY id");
+ while(my $row = $iterator->()) {
+ #progress("$count - $row->[0]");
+
+ # missing entries (ignore human replies)
+ while($count < $row->[0]) {
+ my ($subj,$from) = get_subject($count);
+ _log("$count,$from,missing - $subj") unless($subj =~ /(?:Re:|Fw:|Ab:|mirror update)/i ||
+ $subj =~ /\.(?:readme|pl|cgi|pdf|html?|doc|txt|ppd|asc|yml|jpg|png|gif|rtf|css|pod|sig|diff)$/i);
+
+ # note that in the above two regexes we ignore conversation
+ # threads, mirror updates and a whole host of uploads that
+ # are nothing to do with a distribution upload. The caveats
+ # to this are the entries that are potentially bad uploads
+ # (bad archive naming or uploading a .pm file) and any
+ # reports that are in error.
+
+ $count++;
+ }
+
+ # badly parsed entries
+ if($row->[1] =~ /bad/) {
+ my ($subj,$from) = get_subject($row->[0]);
+ _log("$row->[0],$from,$row->[1] - $subj");
+# print join(",",@$row) . "\n";
+ }
+
+ # missing fields
+ elsif($row->[1] =~ /(na|pass|fail|unknown)/) {
+ unless( defined $row->[2] &&
+ defined $row->[3] &&
+ defined $row->[4] &&
+ defined $row->[5] &&
+ defined $row->[6]) {
+ my ($subj,$from) = get_subject($row->[0]);
+ _log("$row->[0],$from,$row->[1] - $subj");
+ }
+ }
+ $count++;
+ }
+}
+
+=item verify_stats
+
+Report on the given database entries highlighting those which are missing.
+
+=cut
+
+sub verify_stats {
+ progress("start - verify stats");
+
+ $" = ",";
+ my @list = get_list();
+ for my $id (@list) {
+ my @rows = $dbi->get_query("SELECT * FROM cpanstats WHERE id=$id");
+ if(@rows) {
+ my $row = $rows[0];
+ _log("@$row");
+ } else {
+ _log("$id,missing");
+ }
+ }
+}
+
+=item search_stats
+
+Report on the given database entries highlighting those which are missing.
+
+=cut
+
+sub search_stats {
+ progress("start - search stats");
+
+ $" = ",";
+ my @list = get_list();
+ for my $id (@list) {
+ my ($subj,$from) = get_subject($id);
+ _log("$id,$from,$subj");
+ }
+}
+
+=item get_subject
+
+Access the NNTP server to get the real subject recorded for the article,
+unless we can short cut the network by accessing the information from the
+project log file.
+
+=cut
+
+sub get_subject {
+ my $id = shift;
+
+ # can we short cut?
+ find_id($id) unless($log{$id});
+ return($log{$id}{subject},$log{$id}{from}) if($log{$id});
+
+ return "" if($options{localonly});
+
+ # talk NNTP
+ my $article = join "", @{$nntp->article($id) || []};
+ return "" unless($article);
+
+ # parse the resulting headers
+ my $mail = Email::Simple->new($article);
+ return($mail->header("Subject"),$mail->header("From"));
+}
+
+=item get_list
+
+Returns the list of NNTP ids from the named file.
+
+=cut
+
+sub get_list {
+ my @list;
+ my $file = $options{file} || die "--file not specified";
+ die "file [$file] not found" unless(-f $file);
+
+ my $fh = IO::File->new($file) or die "Cannot open file [$file]: $!";
+ while(<$fh>) {
+ chomp;
+ my ($num) = (m/^(\d+)/);
+ push @list, $num;
+ }
+ $fh->close;
+ return @list;
+}
+
+=item load_log
+
+Load log file, the output of cpanstats.pl.
+
+=cut
+
+sub load_log {
+ my $fh = IO::File->new($options{log},'r') or die "Cannot read log file [$options{log}]: $!\n";
+ while(<$fh>) {
+ next unless(/^ID \[(\d+)\] \[([^\]]+)\] (.*?)\s*$/);
+ next unless($1 >= $options{start});
+ $log{$1} = {from => $2, subject => $3};
+ }
+}
+
+# note read the whole file in case it has been reparsed
+
+sub find_id {
+ my $id = shift || return;
+
+ my $fh = IO::File->new($options{log},'r') or die "Cannot read log file [$options{log}]: $!\n";
+ while(<$fh>) {
+ next unless(/^ID \[$id\] \[([^\]]+)\] (.*?)\s*$/);
+ $log{$id} = {from => $1, subject => $2};
+ }
+ return;
+}
+
+sub _clear_log {
+ my $fh = IO::File->new($options{out},'w') or die "Cannot write to file [$options{out}]: $!\n";
+ print $fh '';
+ $fh->close;
+}
+
+sub _log {
+ my $msg = shift;
+
+ my $fh = IO::File->new($options{out},'a+') or die "Cannot write to file [$options{out}]: $!\n";
+ print $fh "$msg\n";
+ $fh->close;
+}
+
+=item progress
+
+Simple audit logging function.
+
+=cut
+
+my $lasttime = time;
+
+sub progress {
+ return unless($PROGRESS);
+
+ my $msg = shift;
+ my $time = time;
+ my @localtime = localtime($time);
+ my $secs = $time - $lasttime;
+ printf STDERR "%02d:%02d:%02d\t%03d\t%s\n", $localtime[2], $localtime[1], $localtime[0], $secs, $msg;
+ $lasttime = $time;
+}
+
+sub init_options {
+ GetOptions( \%options,
+ 'database=s',
+ 'localonly|l',
+ 'missing|m',
+ 'check|c',
+ 'verify|v',
+ 'search|s',
+ 'start=i',
+ 'end=i',
+ 'file=s',
+ 'log=s',
+ 'out=s',
+ 'help|h',
+ 'version|V'
+ );
+
+ _help(1) if($options{help});
+ _help(0) if($options{version});
+
+ unless($options{database} && -f $options{database}) {
+ print "No database specified\n\n";
+ _help(1);
+ }
+
+ unless($options{log} && -f $options{log}) {
+ print "No cpanstats.log file specified\n\n";
+ _help(1);
+ }
+
+ unless($options{out}) {
+ print "No results output file specified\n\n";
+ _help(1);
+ }
+}
+
+sub _help {
+ my $full = shift;
+
+ if($full) {
+ print <<HERE;
+
+Usage: $0 \\
+ [-database=<db>] [-c] [-m] [-v] [-s] \\
+ [--file=<file>] [--start=n] [--end=n] \\
+ [--log=<file>] [-out=<file>] [-h] [-V] \\
+ [--localonly]
+
+ --database=<db> use a named database
+ -m check for missing entries
+ -c check existing entries for bad parsing
+ -v provide a verification report
+ -s search stats providing id + subject
+ --file file of IDs to reference
+ --start start id for -m or -c
+ --end end id for -m or -c
+ --log log file for shortcut reference for -m and -c
+ --out results output file
+ -h this help screen
+ -V program version
+
+HERE
+
+ }
+
+ print "$0 v$VERSION\n";
+ exit(0);
+}
+
+__END__
+
+=back
+
+=head1 BUGS, PATCHES & FIXES
+
+There are no known bugs at the time of this release. However, if you spot a
+bug or are experiencing difficulties, that is not explained within the POD
+documentation, please send bug reports and patches to the RT Queue (see below).
+
+Fixes are dependant upon their severity and my availablity. Should a fix not
+be forthcoming, please feel free to (politely) remind me.
+
+RT Queue -
+http://rt.cpan.org/Public/Dist/Display.html?Name=CPAN-WWW-Testers-Generator
+
+=head1 SEE ALSO
+
+L<CPAN::WWW::Testers>,
+L<CPAN::Testers::WWW::Statistics>
+
+F<http://www.cpantesters.org/>,
+F<http://stats.cpantesters.org/>,
+F<http://wiki.cpantesters.org/>
+
+=head1 AUTHOR
+
+ Barbie, <barbie@cpan.org>
+ for Miss Barbell Productions <http://www.missbarbell.co.uk>.
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright (C) 2005-2008 Barbie for Miss Barbell Productions.
+
+ This module is free software; you can redistribute it and/or
+ modify it under the same terms as Perl itself.
+
+=cut
+
11 examples/generate.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/bash
+
+BASE=/home/barbie/projects/cpanstats
+
+date
+mkdir -p $BASE/logs
+
+cd $BASE
+perl bin/cpanstats.pl >>logs/cpanstats.out 2>&1
+perl bin/readstats.pl -c -m >logs/readstats.out
+
550 lib/CPAN/WWW/Testers/Generator.pm
@@ -0,0 +1,550 @@
+package CPAN::WWW::Testers::Generator;
+
+use warnings;
+use strict;
+use vars qw($VERSION);
+
+$VERSION = '0.30';
+
+#----------------------------------------------------------------------------
+# Library Modules
+
+use DBI;
+use File::Basename;
+use File::Path;
+use IO::File;
+use Net::NNTP;
+
+use CPAN::WWW::Testers::Generator::Article;
+use CPAN::WWW::Testers::Generator::Database;
+
+use base qw(Class::Accessor::Fast);
+
+#----------------------------------------------------------------------------
+# The Application Programming Interface
+
+__PACKAGE__->mk_accessors(qw(articles database directory logfile));
+
+sub new {
+ my $class = shift;
+ my %hash = @_;
+
+ $hash{directory} ||= '.';
+
+ my $self = {};
+ bless $self, $class;
+
+ # continue when no article
+ $self->{ignore} = $hash{ignore} if($hash{ignore});
+
+ # do not store old articles
+ $self->{nostore} = $hash{nostore} if($hash{nostore});
+
+ # prime the logging
+ $self->logfile($hash{logfile}) if($hash{logfile});
+
+ # prime the databases
+ $self->directory($hash{directory});
+ $self->database("$hash{directory}/cpanstats.db");
+ $self->articles("$hash{directory}/articles.db");
+
+ return $self;
+}
+
+sub DESTROY {
+ my $self = shift;
+}
+
+#----------------------------------------------------------------------------
+# Public Methods
+
+sub _init {
+ my ($self,$nntp) = @_;
+
+ $self->{stats} ||= CPAN::WWW::Testers::Generator::Database->new(database => $self->database);
+ $self->{arts} ||= CPAN::WWW::Testers::Generator::Database->new(database => $self->articles);
+ return unless($nntp);
+
+ $self->{nntp} ||= $self->nntp_connect();
+}
+
+sub generate {
+ my $self = shift;
+
+ $self->_init(1);
+
+ my $start = $self->_get_lastid() +1;
+ my $end = $self->{nntp_last};
+ die "Cannot access NNTP server\n" unless($end); # better to bail out than fade away!
+
+ # starting from last retrieved article
+ for(my $id = $start; $id <= $end; $id++) {
+
+ $self->_log("ID [$id]");
+ my $article = join "", @{$self->{nntp}->article($id) || []};
+
+ # no article for that id!
+ unless($article) {
+ $self->_log(" ... no article\n");
+ if($self->{ignore}) {
+ next;
+ } else {
+ die "No article returned [$id]\n";
+ }
+ }
+
+ $self->insert_article($id,$article);
+ $self->parse_article($id,$article);
+ }
+
+ $self->cleanup if($self->{nostore});
+ $self->{stats}->do_commit;
+ $self->{arts}->do_commit;
+}
+
+
+sub rebuild {
+ my ($self,$start,$end) = @_;
+
+ $self->_init(0);
+
+ $start ||= 1;
+ $end ||= $self->_get_lastid();
+
+ $self->{stats}->do_query("DELETE FROM cpanstats WHERE id >= $start AND id <= $end");
+
+ my $iterator = $self->{arts}->get_query_iterator("SELECT * FROM articles WHERE id >= $start AND id <= $end ORDER BY id asc");
+ while(my $row = $iterator->()) {
+ my $id = $row->[0];
+ my $article = $row->[1];
+
+ $self->_log("ID [$id]");
+
+ # no article for that id!
+ unless($article) {
+ $self->_log(" ... no article\n");
+ if($self->{ignore}) {
+ next;
+ } else {
+ die "No article returned [$id]\n";
+ }
+ }
+
+ $self->parse_article($id,$article);
+ }
+
+ $self->{stats}->do_commit;
+ $self->{arts}->do_commit;
+}
+
+sub reparse {
+ my ($self,$options,@ids) = @_;
+ return unless(@ids);
+
+ my $flag = ($options && $options->{localonly}) ? 0 : 1;
+ $self->_init($flag);
+
+ my $last = $self->_get_lastid();
+
+ for my $id (@ids) {
+ next if($id < 1 || $id > $last);
+
+ my $article;
+ my @rows = $self->{arts}->get_query('SELECT * FROM articles WHERE id = ?',$id);
+ if(@rows) {
+ $article = $rows[0]->[1];
+
+ } elsif($options && $options->{localonly}) {
+ next;
+
+ } else {
+ $article = join "", @{$self->{nntp}->article($id) || []};
+ }
+
+ next unless($article);
+ $self->_log("ID [$id]");
+
+ $self->{stats}->do_query('DELETE FROM cpanstats WHERE id = ?',$id) unless($options && $options->{check});
+ $self->parse_article($id,$article,$options);
+ }
+
+ $self->{stats}->do_commit;
+}
+
+sub cleanup {
+ my $self = shift;
+ my $id = $self->_get_lastid();
+ return unless($id);
+
+ $self->{arts}->do_query('DELETE FROM articles WHERE id < ?',$id);
+ $self->{arts}->do_commit;
+}
+
+#----------------------------------------------------------------------------
+# Private Methods
+
+sub nntp_connect {
+ my $self = shift;
+
+ # connect to NNTP server
+ my $nntp = Net::NNTP->new("nntp.perl.org") or die "Cannot connect to nntp.perl.org";
+ ($self->{nntp_num}, $self->{nntp_first}, $self->{nntp_last}) = $nntp->group("perl.cpan.testers");
+
+ return $nntp;
+}
+
+sub parse_article {
+ my ($self,$id,$article,$options) = @_;
+ my $object = CPAN::WWW::Testers::Generator::Article->new($article);
+
+ unless($object) {
+ $self->_log(" ... bad parse\n");
+ return;
+ }
+
+ my $subject = $object->subject;
+ my $from = $object->from;
+ $self->_log(" [$from] $subject\n");
+ return if($subject =~ /Re:/i);
+
+ unless($subject =~ /(CPAN|FAIL|PASS|NA|UNKNOWN)\s+/i) {
+ $self->_log(" . [$id] ... bad subject\n");
+ return;
+ }
+
+ my $state = lc $1;
+ my ($post,$date,$dist,$version,$platform,$perl,$osname,$osvers) = ();
+
+ if($state eq 'cpan') {
+ if($object->parse_upload()) {
+ $dist = $object->distribution;
+ $version = $object->version;
+ $from = $object->author;
+ }
+
+ return unless($self->_valid_field($id, 'dist' => $dist) || ($options && $options->{exclude}{dist}));
+ return unless($self->_valid_field($id, 'version' => $version) || ($options && $options->{exclude}{version}));
+ return unless($self->_valid_field($id, 'author' => $from) || ($options && $options->{exclude}{from}));
+
+ } else {
+ if($object->parse_report()) {
+ $dist = $object->distribution;
+ $version = $object->version;
+ $from = $object->from;
+ $perl = $object->perl;
+ $platform = $object->archname;
+ $osname = $object->osname;
+ $osvers = $object->osvers;
+
+ $from =~ s/'/''/g; #'
+ }
+
+ return unless($self->_valid_field($id, 'dist' => $dist) || ($options && $options->{exclude}{dist}));
+ return unless($self->_valid_field($id, 'version' => $version) || ($options && $options->{exclude}{version}));
+ return unless($self->_valid_field($id, 'from' => $from) || ($options && $options->{exclude}{from}));
+ return unless($self->_valid_field($id, 'perl' => $perl) || ($options && $options->{exclude}{perl}));
+ return unless($self->_valid_field($id, 'platform' => $platform) || ($options && $options->{exclude}{platform}));
+ return unless($self->_valid_field($id, 'osname' => $osname) || ($options && $options->{exclude}{osname}));
+ return unless($self->_valid_field($id, 'osvers' => $osvers) || ($options && $options->{exclude}{osname}));
+ }
+
+ $post = $object->postdate;
+ $date = $object->date;
+ $self->insert_stats($id,$state,$post,$from,$dist,$version,$platform,$perl,$osname,$osvers,$date)
+ unless($options && $options->{check});
+}
+
+sub insert_stats {
+ my $self = shift;
+
+ my @fields = @_;
+ $fields[$_] ||= 0 for(0);
+ $fields[$_] ||= '' for(1,2,3,4,5,6,8,9,10);
+ $fields[$_] ||= '0' for(7);
+
+ my $INSERT = 'INSERT INTO cpanstats VALUES (?,?,?,?,?,?,?,?,?,?,?)';
+
+ $self->{stats}->do_query($INSERT,@fields);
+ if((++$self->{stat_count} % 50) == 0) {
+ $self->{stats}->do_commit;
+ }
+}
+
+sub insert_article {
+ my $self = shift;
+
+ my @fields = @_;
+ $fields[$_] ||= 0 for(0);
+ $fields[$_] ||= '' for(1);
+
+ my $INSERT = 'INSERT INTO articles VALUES (?,?)';
+
+ $self->{arts}->do_query($INSERT,@fields);
+ if((++$self->{arts_count} % 50) == 0) {
+ $self->{arts}->do_commit;
+ }
+}
+
+#----------------------------------------------------------------------------
+# Private Functions
+
+sub _valid_field {
+ my ($self,$id,$name,$value) = @_;
+ return 1 if(defined $value);
+ $self->_log(" . [$id] ... missing field: $name\n");
+ return 0;
+}
+
+sub _get_lastid {
+ my $self = shift;
+
+ my @rows = $self->{arts}->get_query("SELECT max(id) FROM articles");
+ return 0 unless(@rows);
+ return $rows[0]->[0] || 0;
+}
+
+sub _log {
+ my $self = shift;
+ my $log = $self->logfile() or return;
+ mkpath(dirname($log)) unless(-f $log);
+ my $fh = IO::File->new($log,'a+') or die "Cannot append to log file [$log]: $!\n";
+ print $fh @_;
+ $fh->close;
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+CPAN::WWW::Testers::Generator - Download and summarize CPAN Testers data
+
+=head1 SYNOPSIS
+
+ % cpanstats
+ # ... wait patiently, very patiently
+ # ... then use cpanstats.db, an SQLite database
+
+=head1 DESCRIPTION
+
+This distribution was originally written by Leon Brocard to download and
+summarize CPAN Testers data. However, all of the original code has been
+rewritten to use the CPAN Testers Statistics database generation code. This
+now means that all the CPAN Testers sites including the Reports site, the
+Statistics site and the CPAN Dependencies site, can use the same database.
+
+This module downloads articles from the cpan-testers newsgroup, generating or
+updating an SQLite database containing all the most important information. You
+can then query this database, or use CPAN::WWW::Testers to present it over the
+web.
+
+A good example query for Acme-Colour would be:
+
+ SELECT version, status, count(*) FROM cpanstats WHERE
+ distribution = "Acme-Colour" group by version, state;
+
+To create a database from scratch can take several days, as there are now over
+2 million articles in the newgroup. As such updating from a known copy of the
+database is much more advisable. If you don't want to generate the database
+yourself, you can obtain the latest official copy (compressed with gzip) at
+http://devel.cpantesters.org/cpanstats.db.gz
+
+With over 2 million articles in the archive, if you do plan to run this
+software to generate the databases it is recommended you utilise a high-end
+processor machine. Even with a reasonable processor it can takes days!
+
+=head1 DATABASE SCHEMA
+
+The cpanstats database schema is very straightforward, one main table with
+several index tables to speed up searches. The main table is as below:
+
+ +--------------------------------+
+ | cpanstats |
+ +----------+---------------------+
+ | id | INTEGER PRIMARY KEY |
+ | state | TEXT |
+ | postdate | TEXT |
+ | tester | TEXT |
+ | dist | TEXT |
+ | version | TEXT |
+ | platform | TEXT |
+ | perl | TEXT |
+ | osname | TEXT |
+ | osvers | TEXT |
+ | date | TEXT |
+ +----------+---------------------+
+
+It should be noted that 'postdate' refers to the YYYYMM formatted date, whereas
+the 'date' field refers to the YYYYMMDDhhmm formatted date and time.
+
+The articles database schema is again very straightforward, and consists of one
+table, as below:
+
+ +--------------------------------+
+ | articles |
+ +----------+---------------------+
+ | id | INTEGER PRIMARY KEY |
+ | article | TEXT |
+ +----------+---------------------+
+
+=head1 INTERFACE
+
+=head2 The Constructor
+
+=over
+
+=item * new
+
+Instatiates the object CPAN::WWW::Testers::Generator. Accepts a hash containing
+values to prepare the object. These are described as:
+
+ my $obj = CPAN::WWW::Testers::Generator->new(
+ logfile => './here/logfile',
+ directory => './here'
+ );
+
+Where 'logfile' is the location to write log messages. Log messages are only
+written if a logfile entry is specified, and will always append to any existing
+file. The 'directory' value is where all databases will be created.
+
+=back
+
+=head2 Accessors
+
+=over
+
+=item * articles
+
+Accessor to set/get the database full path.
+
+=item * database
+
+Accessor to set/get the database full path.
+
+=item * directory
+
+Accessor to set/get the directory where the database is to be created.
+
+=item * logfile
+
+Accessor to set/get where the logging information is to be kept. Note that if
+this not set, no logging occurs.
+
+=back
+
+=head2 Public Methods
+
+=over
+
+=item * generate
+
+Starting from the last recorded article, retrieves all the more recent articles
+from the NNTP server, parsing each and recording the articles that either
+upload announcements or reports.
+
+=item * rebuild
+
+In the event that the cpanstats database needs regenerating, either in part or
+for the whole database, this method allow you to do so. You may supply
+parameters as to the 'start' and 'end' values (inclusive), where all records
+are assumed by default. Note that the 'nostore' option is ignored and no
+records are deleted from the articles database.
+
+=item * reparse
+
+Rather than a complete rebuild the option to selective reparse selected entries
+is useful if there are posts which have since been identified as valid and now
+have supporting parsing code within the codebase.
+
+In addition there is the option to exclude fields from parsing checks, where
+they may be corrupted, and can be later amended using the 'cpanstats-update'
+tool.
+
+=item * cleanup
+
+In the event that you do not wish to store all the articles permanently in the
+articles database, this method removes all but the most recent entry, which is
+kept to ensure that subsequent runs will start from the correct article. To
+enable this feature, specify 'nostore' within the has passed to new().
+
+=back
+
+=head2 Private Methods
+
+=over
+
+=item * nntp_connect
+
+Sets up the connection to the NNTP server.
+
+=item * parse_article
+
+Parses an article extracting the metadata required for the stats database.
+
+=item * insert_article
+
+Inserts an article into the articles database.
+
+=item * insert_stats
+
+Inserts the components of a parsed article into the statistics database.
+
+=back
+
+=head1 HISTORY
+
+The CPAN testers was conceived back in May 1998 by Graham Barr and Chris
+Nandor as a way to provide multi-platform testing for modules. Today there
+are over 2 million tester reports and more than 100 testers each month
+giving valuable feedback for users and authors alike.
+
+=head1 BECOME A TESTER
+
+Whether you have a common platform or a very unusual one, you can help by
+testing modules you install and submitting reports. There are plenty of
+module authors who could use test reports and helpful feedback on their
+modules and distributions.
+
+If you'd like to get involved, please take a look at the CPAN Testers Wiki,
+where you can learn how to install and configure one of the recommended
+smoke tools.
+
+For further help and advice, please subscribe to the the CPAN Testers
+discussion mailing list.
+
+ CPAN Testers Wiki
+ - http://wiki.cpantesters.org
+ CPAN Testers Discuss mailing list
+ - http://lists.cpan.org/showlist.cgi?name=cpan-testers-discuss
+
+=head1 BUGS, PATCHES & FIXES
+
+There are no known bugs at the time of this release. However, if you spot a
+bug or are experiencing difficulties, that is not explained within the POD
+documentation, please send bug reports and patches to the RT Queue (see below).
+
+Fixes are dependant upon their severity and my availablity. Should a fix not
+be forthcoming, please feel free to (politely) remind me.
+
+RT Queue -
+http://rt.cpan.org/Public/Dist/Display.html?Name=CPAN-WWW-Testers-Generator
+
+=head1 SEE ALSO
+
+L<CPAN::WWW::Testers>,
+L<CPAN::Testers::WWW::Statistics>
+
+F<http://www.cpantesters.org/>,
+F<http://stats.cpantesters.org/>,
+F<http://wiki.cpantesters.org/>
+
+=head1 AUTHOR
+
+ Original author: Leon Brocard <acme@astray.com> 200?-2008
+ Current maintainer: Barbie <barbie@cpan.org> 2008-present
+
+=head1 LICENSE
+
+This code is distributed under the same license as Perl.
365 lib/CPAN/WWW/Testers/Generator/Article.pm
@@ -0,0 +1,365 @@
+package CPAN::WWW::Testers::Generator::Article;
+
+use warnings;
+use strict;
+use vars qw($VERSION);
+
+$VERSION = '0.30';
+
+#----------------------------------------------------------------------------
+# Library Modules
+
+use MIME::Base64;
+use MIME::QuotedPrint;
+use CPAN::DistnameInfo;
+use Email::Simple;
+
+use base qw( Class::Accessor::Fast );
+
+#----------------------------------------------------------------------------
+# Variables
+
+my %month = (
+ Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6,
+ Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12,
+);
+
+#----------------------------------------------------------------------------
+# The Application Programming Interface
+
+__PACKAGE__->mk_accessors(qw( postdate date status from distribution version
+ perl osname osvers archname subject author));
+
+sub new {
+ my($class, $article) = @_;
+ my $self = {};
+ bless $self, $class;
+
+ $article = decode_qp($article) if($article =~ /=3D/);
+
+ my $mail = Email::Simple->new($article);
+ return unless $mail;
+ return if $mail->header("In-Reply-To");
+
+ my $from = $mail->header("From");
+ my $subject = $mail->header("Subject");
+ return unless $subject;
+ return if $subject =~ /::/; # it's supposed to be a distribution
+
+ $self->{mail} = $mail;
+ $self->{from} = $from;
+ $self->{subject} = $subject;
+
+ ($self->{postdate},$self->{date}) = _parse_date($mail);
+
+ return $self;
+}
+
+sub _parse_date {
+ my $mail = shift;
+ my ($date1,$date2) = _extract_date($mail->header("Date"));
+ my @received = $mail->header("Received");
+
+ for my $hdr (@received) {
+ next unless($hdr =~ /.*;\s+(.*)\s*$/);
+ my ($dt1,$dt2) = _extract_date($1);
+ if($dt2 > $date2 + 1200) {
+ $date1 = $dt1;
+ $date2 = $dt2;
+ }
+ }
+
+#print STDERR " ... X.[Date: ".($date||'')."]\n";
+ return($date1,$date2);
+}
+
+my %regexes = (
+ # with time
+ 1 => { re => qr/(?:\w+,)?\s+(\d+)\s+(\w+)\s+(\d+)\s+(\d+):(\d+)/, f => [qw(day month year hour min)] }, # Wed, 13 September 2004 06:29
+ 2 => { re => qr/(\d+)\s+(\w+)\s+(\d+)\s+(\d+):(\d+)/, f => [qw(day month year hour min)] }, # 13 September 2004 06:29
+ 3 => { re => qr/(\w+)?\s+(\d+),?\s+(\d+)\s+(\d+):(\d+)/, f => [qw(month day year hour min)] }, # September 22, 1999 06:29
+
+ # just the date
+ 4 => { re => qr/(?:\w+,)?\s+(\d+)\s+(\w+)\s+(\d+)/, f => [qw(day month year)] }, # Wed, 13 September 2004
+ 5 => { re => qr/(\d+)\s+(\w+)\s+(\d+)/, f => [qw(day month year)] }, # 13 September 2004
+ 6 => { re => qr/(\w+)?\s+(\d+),?\s+(\d+)/, f => [qw(month day year)] }, # September 22, 1999 06:29
+);
+
+sub _extract_date {
+ my $date = shift;
+ my (%fields,@fields,$index);
+
+#print STDERR "# ... 0.[Date: ".($date||'')."]\n";
+
+ for my $inx (sort {$a <=> $b} keys %regexes) {
+ (@fields) = ($date =~ $regexes{$inx}->{re});
+ if(@fields) {
+ $index = $inx;
+ last;
+ }
+ }
+
+ return('000000','000000000000') unless(@fields && $index);
+
+ @fields{@{$regexes{$index}->{f}}} = @fields;
+
+ $fields{month} = substr($fields{month},0,3);
+ return('000000','000000000000') unless($month{$fields{month}} && $fields{year} > 1998);
+
+ $fields{$_} ||= 0 for(qw(year month day hour min));
+
+#print STDERR "# ... 1.[$_][$fields{$_}]\n" for(qw(year month day hour min));
+ my $short = sprintf "%04d%02d", $fields{year}, $month{$fields{month}};