Permalink
Browse files

Support branches in the github parser.

A project can have multiple tracked branches, each of which get their
own list of target channels.  Reorganize things to fit this.  Rename
some methods: fetch_feed becomes process_project, process_feed becomes
process_branch.

Rework the data structures into a per-project / per-branch / per-target
tree.  Document that structure a little better.  Fix try_link() to
update the tree at the appropriate points; make process_project()
process all branches, one at a time.
  • Loading branch information...
1 parent 9e2043c commit 5d3c76e8d53839c938b1649622c407e4ab5cfea4 @Infinoid committed Dec 3, 2009
Showing with 123 additions and 98 deletions.
  1. +114 −52 modules/local/githubparser.pm
  2. +0 −39 modules/local/rakudonglog.pm
  3. +3 −3 t/novemberlog.t
  4. +6 −4 t/rakudolog.t
@@ -5,6 +5,7 @@ use warnings;
use XML::Atom::Client;
use HTML::Entities;
+use modules::local::karmalog;
use base 'modules::local::karmalog';
=head1 NAME
@@ -18,6 +19,10 @@ is also knowledgeable enough about github's URL schemes to be able to recognise
repository URLs, extract the project name/owner/path and generate ATOM feed
URLs.
+This is a base class, there is one subclass per tracked project. For each
+subclass, it keeps track of which branches are being tracked, and for each
+branch, which channels to emit updates.
+
=cut
@@ -27,38 +32,60 @@ our $feed_number = 1;
# This is a map of $self objects. Because botnix does not use full class
# instances and instead calls package::name->function() to call methods, the
-# actual OO storage for those modules ends up in here.
+# actual OO storage for those modules ends up in here. This hash maps us
+# back to the $self objects.
our %objects_by_package;
+# Each $self pointer in this hash is a hash tree. In pseudo-YAML, the layout
+# of $self looks like:
+# $self:
+# project: rakudo
+# modulename: rakudo # same thing but with invalid characters changed to "_"
+# branches:
+# master:
+# url: http://github.com/feeds/rakudo/commits/rakudo/master
+# targets:
+# -
+# - magnet
+# - #parrot
+# -
+# - freenode
+# - #perl6
+# ng:
+# url: http://github.com/feeds/rakudo/commits/rakudo/ng
+# targets:
+# -
+# - freenode
+# - #perl6
+
=head1 METHODS
-=head2 fetch_feed
+=head2 process_project
-This is a pseudomethod called as a timer callback. It fetches the feed, parses
-it into an XML::Atom::Feed object and passes that to process_feed().
+This is a pseudomethod called as a timer callback. It enumerates the branches,
+calling process_branch() for each.
This is the main entry point to this module. Botnix does not use full class
instances, instead it just calls by package name. This function maps from the
function name to a real $self object (stored in %objects_by_package).
=cut
-sub fetch_feed {
+sub process_project {
my $pkg = shift;
- my $self = $objects_by_package{$pkg};
- my $atom = XML::Atom::Client->new();
- for (@{$self->{urls}}) {
- my $feed = $atom->getFeed($_);
- $pkg->process_feed($feed);
+ my $self = $pkg->get_self();
+ foreach my $branch (sort keys %{$$self{branches}}) {
+ $self->process_branch($branch);
}
}
-=head2 process_feed
+=head2 process_branch
- $self->process_feed($feed);
+ $self->process_branch($branch);
+Fetches the ATOM feed for the
Enumerates the commits in the feed, emitting any events it hasn't seen before.
This subroutine manages a "seen" cache in $self, and will take care not to
announce any commit more than once.
@@ -69,9 +96,16 @@ events. So it just populates the seen-cache silently.
=cut
-sub process_feed {
- my ($pkg, $feed) = @_;
- my $self = $objects_by_package{$pkg};
+sub process_branch {
+ my ($self, $branch, $feed) = @_;
+
+ # allow the testsuite to call us in a slightly different way.
+ $self = $self->get_self() unless ref $self;
+ if(!defined($feed)) {
+ my $atom = XML::Atom::Client->new();
+ $feed = $atom->getFeed($$self{branches}{$branch}{url});
+ }
+
my @items = $feed->entries;
@items = sort { $a->updated cmp $b->updated } @items; # ascending order
my $newest = $items[-1];
@@ -85,7 +119,7 @@ sub process_feed {
# output new entries to channel
next if exists($$self{seen}{$rev});
$$self{seen}{$rev} = 1;
- $self->output_item($item, $link, $rev);
+ $self->output_item($item, $branch, $link, $rev);
} else {
$$self{seen}{$rev} = 1;
}
@@ -114,17 +148,25 @@ sub longest_common_prefix {
=head2 try_link
- modules::local::githubparser->try_link($url, ['network', '#channel'], $branches);
+ modules::local::githubparser->try_link(
+ $url,
+ ['network', '#channel'],
+ [qw(master ng)]
+ );
This is called by autofeed.pm. Given a github.com URL, try to determine the
project name and canonical path. Then configure a feed reader for it if one
doesn't already exist.
The array reference containing network and channel are optional. If not
specified, magnet/#parrot is assumed. If the feed already exists but didn't
-have the specified target, the existing feed is extended.
+have the specified target, the existing feed is extended. Similarly, if the
+feed already existed but didn't have the specified branch, the existing feed
+is extended.
-C<$branches> is an optional array reference containing the branches to be
+The array reference containing branch names are also optional. However,
+to prevent ambiguity, you must also specify the network/channel in this case.
+Branches is an optional array reference containing the branches to be
monitored, and defaults to C<[qw(master)]>.
Currently supports 3 URL formats:
@@ -158,55 +200,70 @@ sub try_link {
my $parsername = $project . "log";
my $modulename = "modules::local::" . $parsername;
$modulename =~ s/-/_/g;
- if(exists($objects_by_package{$modulename})) {
- # extend existing feed if necessary
- my $self = $objects_by_package{$modulename};
+
+ # create project, if necessary
+ my $self = $objects_by_package{$modulename};
+ if(!defined($self)) {
+ $objects_by_package{$modulename} = $self = {
+ project => $project,
+ modulename => $modulename,
+ branches => {},
+ };
+
+ # create a dynamic subclass to get the timer callback back to us
+ eval "package $modulename; use base 'modules::local::githubparser';";
+ $objects_by_package{$modulename} = bless($self, $modulename);
+ main::create_timer($parsername."_fetch_feed_timer", $modulename,
+ "fetch_feed", 300 + $feed_number++);
+ main::lprint("github: created project $project ($modulename)");
+ }
+
+ # create branches, if necessary
+ foreach my $branchname (@$branches) {
+ my $branch = $$self{branches}{$branchname};
+ if(!defined($branch)) {
+ my $url = "http://github.com/feeds/$author/commits/$project/$branchname";
+ $$self{branches}{$branchname} = $branch = {
+ url => $url,
+ targets => [],
+ };
+ main::lprint("github: $project has branch $branchname with feed url $url");
+ }
+
+ # update target list, if necessary
my $already_have_target = 0;
- foreach my $this (@{$$self{targets}}) {
+ foreach my $this (@{$$branch{targets}}) {
$already_have_target++
if($$target[0] eq $$this[0] && $$target[1] eq $$this[1]);
}
- push(@{$$self{targets}}, $target) unless $already_have_target;
- return;
+ unless($already_have_target) {
+ push(@{$$branch{targets}}, $target);
+ main::lprint("github: $project/$branchname will output to ".join("/",@$target));
+ }
}
-
- # create new feed
- # url, feed_name, targets, objects_by_package
- my @rss_links =
- map "http://github.com/feeds/$author/commits/$project/$_",
- @$branches;
- my $self = {
- urls => \@@rss_links,
- feed_name => $project,
- modulename => $modulename,
- targets => [ $target ],
- };
- # create a dynamic subclass to get the timer callback back to us
- eval "package $modulename; use base 'modules::local::githubparser';";
- $objects_by_package{$modulename} = bless($self, $modulename);
- main::lprint("$parsername github ATOM parser autoloaded.");
- main::create_timer($parsername."_fetch_feed_timer", $modulename,
- "fetch_feed", 300 + $feed_number++);
}
=head2 output_item
- $self->output_item($item, $link, $revision);
+ $self->output_item($item, $branch, $link, $revision);
Takes an XML::Atom::Entry object, extracts the useful bits from it and calls
put() to emit the karma message.
The karma message is typically as follows:
-feedname: $revision | username++ | $commonprefix:
-feedname: One or more lines of commit log message
-feedname: review: http://link/to/github/diff/page
+feedname/branch: $revision | username++ | $commonprefix:
+feedname/branch: One or more lines of commit log message
+feedname/branch: review: http://link/to/github/diff/page
+
+The "/branch" suffix is only emitted if we track more than one branch for this
+repository.
=cut
sub output_item {
- my ($self, $item, $link, $rev) = @_;
+ my ($self, $item, $branch, $link, $rev) = @_;
my $prefix = 'unknown';
my $creator = $item->author;
if(defined($creator)) {
@@ -230,7 +287,7 @@ sub output_item {
push(@files, $1);
shift(@lines);
}
- return main::lprint($$self{feed_name}.": error parsing filenames from description")
+ return main::lprint($$self{project}.": error parsing filenames from description")
unless $lines[0] eq '';
shift(@lines);
pop(@lines) if $lines[-1] =~ /^git-svn-id: http:/;
@@ -249,17 +306,22 @@ sub output_item {
my @log_lines = split(/[\r\n]+/, $log);
$rev = substr($rev, 0, 7);
+ my $project = $$self{project};
+ if(scalar keys %{$$self{branches}} > 1) {
+ $project .= "/$branch";
+ }
+
$self->emit_karma_message(
- feed => $$self{feed_name},
+ feed => $project,
rev => $rev,
user => $creator,
log => \@log_lines,
link => $link,
prefix => $prefix,
- targets => $$self{targets},
+ targets => $$self{branches}{$branch}{targets},
);
- main::lprint($$self{feed_name}.": output_item: output rev $rev");
+ main::lprint($$self{project}.": output_item: output $project rev $rev");
}
@@ -1,39 +0,0 @@
-package modules::local::rakudolog;
-use strict;
-use warnings;
-use base 'modules::local::githubparser';
-
-
-=head1 NAME
-
- modules::local::rakudonglog
-
-=head1 DESCRIPTION
-
-This is a subclass of modules::local::githubparser. It adds a parser to emit
-rakudo commits on the 'ng' branch.
-
-Normally rakudo commits are configured automatically by
-modules::local::autofeed. However, that autoconfig sends the messages to
-magnet's #parrot, whereas we want rakudo commits to go to freenode's #perl6 as
-well.
-
-
-=head1 METHODS
-
-=head2 init
-
-This is a pseudo-method called by botnix when the module is first loaded. It
-hands the URL to the github parser module and tells it to emit to freenode's
-#perl6. (If a rakudo parser was already configured, it will consolidate the
-config and simply add the freenode channel to the list of targets.)
-
-=cut
-
-my $url = 'http://github.com/rakudo/rakudo';
-
-sub init {
- modules::local::githubparser->try_link($url, ['freenode', '#perl6'], 'ng');
-}
-
-1;
View
@@ -43,7 +43,7 @@ my $xml = $xml_header . '<updated>2009-05-01T09:32:55-07:00</updated>' . $xml_fo
my $feed = XML::Atom::Feed->new(\$xml);
my $rl = modules::local::novemberlog->get_self();
ok(!exists($$rl{lastrev}), "no lastrev by default");
-call_func('process_feed', $feed);
+call_func('process_branch', 'master', $feed);
my $output = [output()];
is(scalar @$output, 0, "nothing output the first time around");
is($$rl{not_first_time}, 1, "not_first_time was set");
@@ -73,7 +73,7 @@ Big refactor of Rakudo's enums, making them more compliant with S12, and buildin
__XML__
$xml = $xml_header . '<updated>2009-05-01T09:58:40-07:00</updated>' . $xml_footer;
$feed = XML::Atom::Feed->new(\$xml);
-call_func('process_feed', $feed);
+call_func('process_branch', 'master', $feed);
$output = [output()];
is(scalar @$output, 6, "6 lines of output");
is($$output[0]{net} , 'freenode', "line to freenode/#november-wiki");
@@ -112,7 +112,7 @@ Add some micro-benchmakrs.&lt;/pre&gt;</content>
__XML__
$xml = $xml_header . '<updated>2009-05-15T06:45:18-07:00</updated>' . $xml_footer;
$feed = XML::Atom::Feed->new(\$xml);
-call_func('process_feed', $feed);
+call_func('process_branch', 'master', $feed);
$output = [output()];
is(scalar @$output, 12, "12 lines of output");
is($$output[0]{net} , 'freenode', "line to freenode/#november-wiki");
View
@@ -46,7 +46,7 @@ my $xml = $xml_header . '<updated>2009-05-01T09:32:55-07:00</updated>' . $xml_fo
my $feed = XML::Atom::Feed->new(\$xml);
my $rl = modules::local::rakudolog->get_self();
ok(!exists($$rl{lastrev}), "no lastrev by default");
-call_func('process_feed', $feed);
+call_func('process_branch', 'master', $feed);
my $output = [output()];
is(scalar @$output, 0, "nothing output the first time around");
is($$rl{not_first_time}, 1, "not_first_time was set");
@@ -76,14 +76,16 @@ Big refactor of Rakudo's enums, making them more compliant with S12, and buildin
__XML__
$xml = $xml_header . '<updated>2009-05-01T09:58:40-07:00</updated>' . $xml_footer;
$feed = XML::Atom::Feed->new(\$xml);
-call_func('process_feed', $feed);
+call_func('process_branch', 'master', $feed);
$output = [output()];
is(scalar @$output, 6, "6 lines of output");
is($$output[0]{net} , 'magnet' , "line to magnet/#parrot");
is($$output[0]{chan}, '#parrot' , "line to magnet/#parrot");
+like($$output[0]{text}, qr|rakudo/master: |, "master branch");
is($$output[1]{net} , 'freenode', "line to freenode/#perl6");
is($$output[1]{chan}, '#perl6' , "line to freenode/#perl6");
-BEGIN { $tests += 5 };
+like($$output[1]{text}, qr|rakudo/master: |, "master branch");
+BEGIN { $tests += 7 };
# update with multiple commits having the same timestamp
reset_output();
@@ -115,7 +117,7 @@ Add some micro-benchmakrs.&lt;/pre&gt;</content>
__XML__
$xml = $xml_header . '<updated>2009-05-15T06:45:18-07:00</updated>' . $xml_footer;
$feed = XML::Atom::Feed->new(\$xml);
-call_func('process_feed', $feed);
+call_func('process_branch', 'master', $feed);
$output = [output()];
is(scalar @$output, 12, "12 lines of output");
is($$output[0]{net} , 'magnet' , "line to magnet/#parrot");

0 comments on commit 5d3c76e

Please sign in to comment.